Skip to content

Latest commit

 

History

History
193 lines (119 loc) · 4.59 KB

File metadata and controls

193 lines (119 loc) · 4.59 KB

toFloat32

Convert a half-precision floating-point number to the nearest single-precision floating-point number.

Usage

var float16ToFloat32 = require( '@stdlib/number/float16/base/to-float32' );

float16ToFloat32( x )

Convert a half-precision floating-point number to the nearest single-precision floating-point number.

var float64ToFloat16 = require( '@stdlib/number/float64/base/to-float16' );

var y = float16ToFloat32( float64ToFloat16( 1.337 ) );
// returns 1.3369140625

Examples

var float64ToFloat16 = require( '@stdlib/number/float64/base/to-float16' );
var uniform = require( '@stdlib/random/array/uniform' );
var map = require( '@stdlib/array/base/map' );
var naryFunction = require( '@stdlib/utils/nary-function' );
var pickArguments = require( '@stdlib/utils/pick-arguments' );
var logEachMap = require( '@stdlib/console/log-each-map' );
var float16ToFloat32 = require( '@stdlib/number/float16/base/to-float32' );

// Generate an array of random numbers:
var f64 = uniform( 100, 0.0, 100.0 );

// Convert each value to a half-precision floating-point number:
var f16 = map( f64, naryFunction( float64ToFloat16, 1 ) );

// Convert each half-precision floating-point number to the nearest single-precision floating-point number:
logEachMap( 'float64: %f => float16: %f => float32: %f', f64, f16, pickArguments( float16ToFloat32, [ 1 ] ) );

C APIs

Usage

#include "stdlib/number/float16/base/to_float32.h"

stdlib_base_float16_to_float32( x )

Convert a half-precision floating-point number to the nearest single-precision floating-point number.

#include "stdlib/number/float16/ctor.h"

stdlib_float16_t v = stdlib_float16_from_bits( 51648 ); // => -11.5
float x = stdlib_base_float16_to_float32( v );

The function accepts the following arguments:

  • x: [in] stdlib_float16_t half-precision floating-point number.
float stdlib_base_float16_to_float32( const stdlib_float16_t x );

Examples

#include "stdlib/number/float16/base/to_float32.h"
#include "stdlib/number/float16/ctor.h"
#include <stdint.h>
#include <stdio.h>

int main( void ) {
    const stdlib_float16_t x[] = {
        stdlib_float16_from_bits( 51648 ), // -11.5
        stdlib_float16_from_bits( 18880 )  // 11.5
    };

    float v;
    int i;
    for ( i = 0; i < 2; i++ ) {
        v = stdlib_base_float16_to_float32( x[ i ] );
        printf( "float16 bits: %u => float32: %f\n", stdlib_float16_to_bits( x[ i ] ), v );
    }
}