Convert a half-precision floating-point number to the nearest single-precision floating-point number.
var float16ToFloat32 = require( '@stdlib/number/float16/base/to-float32' );Convert a half-precision floating-point number to the nearest single-precision floating-point number.
var float64ToFloat16 = require( '@stdlib/number/float64/base/to-float16' );
var y = float16ToFloat32( float64ToFloat16( 1.337 ) );
// returns 1.3369140625var float64ToFloat16 = require( '@stdlib/number/float64/base/to-float16' );
var uniform = require( '@stdlib/random/array/uniform' );
var map = require( '@stdlib/array/base/map' );
var naryFunction = require( '@stdlib/utils/nary-function' );
var pickArguments = require( '@stdlib/utils/pick-arguments' );
var logEachMap = require( '@stdlib/console/log-each-map' );
var float16ToFloat32 = require( '@stdlib/number/float16/base/to-float32' );
// Generate an array of random numbers:
var f64 = uniform( 100, 0.0, 100.0 );
// Convert each value to a half-precision floating-point number:
var f16 = map( f64, naryFunction( float64ToFloat16, 1 ) );
// Convert each half-precision floating-point number to the nearest single-precision floating-point number:
logEachMap( 'float64: %f => float16: %f => float32: %f', f64, f16, pickArguments( float16ToFloat32, [ 1 ] ) );#include "stdlib/number/float16/base/to_float32.h"Convert a half-precision floating-point number to the nearest single-precision floating-point number.
#include "stdlib/number/float16/ctor.h"
stdlib_float16_t v = stdlib_float16_from_bits( 51648 ); // => -11.5
float x = stdlib_base_float16_to_float32( v );The function accepts the following arguments:
- x:
[in] stdlib_float16_thalf-precision floating-point number.
float stdlib_base_float16_to_float32( const stdlib_float16_t x );#include "stdlib/number/float16/base/to_float32.h"
#include "stdlib/number/float16/ctor.h"
#include <stdint.h>
#include <stdio.h>
int main( void ) {
const stdlib_float16_t x[] = {
stdlib_float16_from_bits( 51648 ), // -11.5
stdlib_float16_from_bits( 18880 ) // 11.5
};
float v;
int i;
for ( i = 0; i < 2; i++ ) {
v = stdlib_base_float16_to_float32( x[ i ] );
printf( "float16 bits: %u => float32: %f\n", stdlib_float16_to_bits( x[ i ] ), v );
}
}