/// efficient double to text conversion using the GRISU-1 algorithm // - as a complement to SynCommons, which tended to increase too much // - licensed under a MPL/GPL/LGPL tri-license; version 1.18 { Implement 64-bit floating point (double) to ASCII conversion using the GRISU-1 efficient algorithm. Original Code in flt_core.inc flt_conv.inc flt_pack.inc from FPC RTL. Copyright (C) 2013 by Max Nazhalov Licenced with LGPL 2 with the linking exception. If you don't agree with these License terms, disable this feature by undefining DOUBLETOSHORT_USEGRISU in Synopse.inc GRISU Original Algorithm Copyright (c) 2009 Florian Loitsch We extracted a double-to-ascii only cut-down version of those files, and made a huge refactoring to reach the best performance, especially tuning the Intel target with some dedicated asm and code rewrite. With Delphi 10.3 on Win32: (no benefit) 100000 FloatToText in 38.11ms i.e. 2,623,570/s, aver. 0us, 47.5 MB/s 100000 str in 43.19ms i.e. 2,315,082/s, aver. 0us, 50.7 MB/s 100000 DoubleToShort in 45.50ms i.e. 2,197,367/s, aver. 0us, 43.8 MB/s 100000 DoubleToAscii in 42.44ms i.e. 2,356,045/s, aver. 0us, 47.8 MB/s With Delphi 10.3 on Win64: 100000 FloatToText in 61.83ms i.e. 1,617,233/s, aver. 0us, 29.3 MB/s 100000 str in 53.20ms i.e. 1,879,663/s, aver. 0us, 41.2 MB/s 100000 DoubleToShort in 18.45ms i.e. 5,417,998/s, aver. 0us, 108 MB/s 100000 DoubleToAscii in 18.19ms i.e. 5,496,921/s, aver. 0us, 111.5 MB/s With FPC on Win32: 100000 FloatToText in 115.62ms i.e. 864,842/s, aver. 1us, 15.6 MB/s 100000 str in 57.30ms i.e. 1,745,109/s, aver. 0us, 39.9 MB/s 100000 DoubleToShort in 23.88ms i.e. 4,187,078/s, aver. 0us, 83.5 MB/s 100000 DoubleToAscii in 23.34ms i.e. 4,284,490/s, aver. 0us, 86.9 MB/s With FPC on Win64: 100000 FloatToText in 76.92ms i.e. 1,300,052/s, aver. 0us, 23.5 MB/s 100000 str in 27.70ms i.e. 3,609,456/s, aver. 0us, 82.6 MB/s 100000 DoubleToShort in 14.73ms i.e. 6,787,944/s, aver. 0us, 135.4 MB/s 100000 DoubleToAscii in 13.78ms i.e. 7,253,735/s, aver. 0us, 147.2 MB/s With FPC on Linux x86_64: 100000 FloatToText in 81.48ms i.e. 1,227,249/s, aver. 0us, 22.2 MB/s 100000 str in 36.98ms i.e. 2,703,871/s, aver. 0us, 61.8 MB/s 100000 DoubleToShort in 13.11ms i.e. 7,626,601/s, aver. 0us, 152.1 MB/s 100000 DoubleToAscii in 12.59ms i.e. 7,942,180/s, aver. 0us, 161.2 MB/s - Our rewrite is twice faster than original flt_conv.inc from FPC RTL (str) - Delphi Win32 has trouble making 64-bit computation - no benefit since it has good optimized i87 asm (but slower than our code with FPC/Win32) - FPC is more efficient when compiling integer arithmetic; we avoided slow division by calling our Div100(), but Delphi Win64 is still far behind - Delphi Win64 has very slow FloatToText and str() } // Controls printing of NaN-sign. // Undefine to print NaN sign during float->ASCII conversion. // IEEE does not interpret the sign of a NaN, so leave it defined. {$define GRISU1_F2A_NAN_SIGNLESS} // Controls rounding of generated digits when formatting with narrowed // width (either fixed or exponential notation). // Traditionally, FPC and BP7/Delphi use "roundTiesToAway" mode. // Undefine to use "roundTiesToEven" approach. {$define GRISU1_F2A_HALF_ROUNDUP} // This one is a hack against Grusu sub-optimality. // It may be used only strictly together with GRISU1_F2A_HALF_ROUNDUP. // It does not violate most general rules due to the fact that it is // applicable only when formatting with narrowed width, where the fine // view is more desirable, and the precision is already lost, so it can // be used in general-purpose applications. // Refer to its implementation. {$define GRISU1_F2A_AGRESSIVE_ROUNDUP} // Defining this fixes several tests. // Undefine to enable SNaN support. // Note: IEEE [754-2008, page 31] requires (1) to recognize "SNaN" during // ASCII->float, and (2) to generate the "invalid FP operation" exception // either when SNaN is printed as "NaN", or "SNaN" is evaluated to QNaN, // so it would be preferable to undefine these settings, // but the FPC RTL is not ready for this right now.. {$define GRISU1_F2A_NO_SNAN} /// If Value=0 would just store '0', whatever frac_digits is supplied. {$define GRISU1_F2A_ZERONOFRACT} {$ifndef FPC} // those functions are intrinsics with FPC :) function BSRdword(c: cardinal): cardinal; asm {$ifdef CPU64} .noframe mov eax, c {$endif} bsr eax, eax end; // in our code below, we are sure that c<>0 function BSRqword(const q: qword): cardinal; asm {$ifdef CPU32} bsr eax, [esp + 8] jz @1 add eax, 32 ret @1: bsr eax, [esp + 4] @2: {$else} .noframe mov rax, q bsr rax, rax {$endif} end; // in our code below, we are sure that q<>0 {$endif FPC} const // TFloatFormatProfile for double nDig_mantissa = 17; nDig_exp10 = 3; type // "Do-It-Yourself Floating Point" structures TDIY_FP = record f: qword; e: integer; end; TDIY_FP_Power_of_10 = record c: TDIY_FP; e10: integer; end; PDIY_FP_Power_of_10 = ^TDIY_FP_Power_of_10; const ROUNDER = $80000000; {$ifdef CPUINTEL} // our faster version using 128-bit x86_64 multiplication procedure d2a_diy_fp_multiply(var x, y: TDIY_FP; normalize: boolean; out result: TDIY_FP); {$ifdef HASINLINE} inline; {$endif} var p: THash128Rec; begin mul64x64(x.f, y.f, p); // fast x86_64 / i386 asm if (p.c1 and ROUNDER) <> 0 then inc(p.h); result.f := p.h; result.e := PtrInt(x.e) + PtrInt(y.e) + 64; if normalize then if (PQWordRec(@result.f)^.h and ROUNDER) = 0 then begin result.f := result.f * 2; dec(result.e); end; end; {$else} // regular Grisu method - optimized for 32-bit CPUs procedure d2a_diy_fp_multiply(var x, y: TDIY_FP; normalize: boolean; out result: TDIY_FP); var _x: TQWordRec absolute x; _y: TQWordRec absolute y; r: TQWordRec absolute result; ac, bc, ad, bd, t1: TQWordRec; begin ac.v := qword(_x.h) * _y.h; bc.v := qword(_x.l) * _y.h; ad.v := qword(_x.h) * _y.l; bd.v := qword(_x.l) * _y.l; t1.v := qword(ROUNDER) + bd.h + bc.l + ad.l; result.f := ac.v + ad.h + bc.h + t1.h; result.e := x.e + y.e + 64; if normalize then if (r.h and ROUNDER) = 0 then begin inc(result.f, result.f); dec(result.e); end; end; {$endif CPUINTEL} const // alpha =-61; gamma = 0 // full cache: 1E-450 .. 1E+432, step = 1E+18 // sparse = 1/10 C_PWR10_DELTA = 18; C_PWR10_COUNT = 50; type TDIY_FP_Cached_Power10 = record base: array [ 0 .. 9 ] of TDIY_FP_Power_of_10; factor_plus: array [ 0 .. 1 ] of TDIY_FP_Power_of_10; factor_minus: array [ 0 .. 1 ] of TDIY_FP_Power_of_10; // extra mantissa correction [ulp; signed] corrector: array [ 0 .. C_PWR10_COUNT - 1 ] of shortint; end; const CACHED_POWER10: TDIY_FP_Cached_Power10 = ( base: ( ( c: ( f: qword($825ECC24C8737830); e: -362 ); e10: -90 ), ( c: ( f: qword($E2280B6C20DD5232); e: -303 ); e10: -72 ), ( c: ( f: qword($C428D05AA4751E4D); e: -243 ); e10: -54 ), ( c: ( f: qword($AA242499697392D3); e: -183 ); e10: -36 ), ( c: ( f: qword($9392EE8E921D5D07); e: -123 ); e10: -18 ), ( c: ( f: qword($8000000000000000); e: -63 ); e10: 0 ), ( c: ( f: qword($DE0B6B3A76400000); e: -4 ); e10: 18 ), ( c: ( f: qword($C097CE7BC90715B3); e: 56 ); e10: 36 ), ( c: ( f: qword($A70C3C40A64E6C52); e: 116 ); e10: 54 ), ( c: ( f: qword($90E40FBEEA1D3A4B); e: 176 ); e10: 72 ) ); factor_plus: ( ( c: ( f: qword($F6C69A72A3989F5C); e: 534 ); e10: 180 ), ( c: ( f: qword($EDE24AE798EC8284); e: 1132 ); e10: 360 ) ); factor_minus: ( ( c: ( f: qword($84C8D4DFD2C63F3B); e: -661 ); e10: -180 ), ( c: ( f: qword($89BF722840327F82); e: -1259 ); e10: -360 ) ); corrector: ( 0, 0, 0, 0, 1, 0, 0, 0, 1, -1, 0, 1, 1, 1, -1, 0, 0, 1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, -1, 0 )); CACHED_POWER10_MIN10 = -90 -360; // = ref.base[low(ref.base)].e10 + ref.factor_minus[high(ref.factor_minus)].e10 // return normalized correctly rounded approximation of the power of 10 // scaling factor, intended to shift a binary exponent of the original number // into selected [ alpha .. gamma ] range procedure d2a_diy_fp_cached_power10(exp10: integer; out factor: TDIY_FP_Power_of_10); var i, xmul: integer; A, B: PDIY_FP_Power_of_10; cx: PtrInt; ref: ^TDIY_FP_Cached_Power10; begin ref := @CACHED_POWER10; // much better code generation on PIC/x86_64 // find non-sparse index if exp10 <= CACHED_POWER10_MIN10 then i := 0 else begin i := (exp10 - CACHED_POWER10_MIN10) div C_PWR10_DELTA; if i * C_PWR10_DELTA + CACHED_POWER10_MIN10 <> exp10 then inc(i); // round-up if i > C_PWR10_COUNT - 1 then i := C_PWR10_COUNT - 1; end; // generate result xmul := i div length(ref.base); A := @ref.base[i - (xmul * length(ref.base))]; // fast mod dec(xmul, length(ref.factor_minus)); if xmul = 0 then begin // base factor := A^; exit; end; // surrogate if xmul > 0 then begin dec(xmul); B := @ref.factor_plus[xmul]; end else begin xmul := -(xmul + 1); B := @ref.factor_minus[xmul]; end; factor.e10 := A.e10 + B.e10; if A.e10 <> 0 then begin d2a_diy_fp_multiply(A.c, B.c, true, factor.c); // adjust mantissa cx := ref.corrector[i]; if cx <> 0 then inc(int64(factor.c.f), int64(cx)); end else // exact factor.c := B^.c; end; procedure d2a_unpack_float(const f: double; out minus: boolean; out result: TDIY_FP); {$ifdef HASINLINE} inline;{$endif} type TSplitFloat = packed record case byte of 0: (f: double); 1: (b: array[0..7] of byte); 2: (w: array[0..3] of word); 3: (d: array[0..1] of cardinal); 4: (l: qword); end; var doublebits: TSplitFloat; begin {$ifdef FPC_DOUBLE_HILO_SWAPPED} // high and low cardinal are swapped when using the arm fpa doublebits.d[0] := TSplitFloat(f).d[1]; doublebits.d[1] := TSplitFloat(f).d[0]; {$else not FPC_DOUBLE_HILO_SWAPPED} doublebits.f := f; {$endif FPC_DOUBLE_HILO_SWAPPED} {$ifdef endian_big} minus := (doublebits.b[0] and $80 <> 0); result.e := (doublebits.w[0] shr 4) and $7FF; {$else endian_little} minus := (doublebits.b[7] and $80 <> 0); result.e := (doublebits.w[3] shr 4) and $7FF; {$endif endian} result.f := doublebits.l and $000FFFFFFFFFFFFF; end; const C_FRAC2_BITS = 52; C_EXP2_BIAS = 1023; C_DIY_FP_Q = 64; C_GRISU_ALPHA = -61; C_GRISU_GAMMA = 0; C_EXP2_SPECIAL = C_EXP2_BIAS * 2 + 1; C_MANT2_INTEGER = qword(1) shl C_FRAC2_BITS; type TAsciiDigits = array[0..47] of byte; PAsciiDigits = ^TAsciiDigits; // convert unsigned integers into decimal digits {$ifdef FPC_64} // leverage efficient FPC 64-bit division as mul reciprocal function d2a_gen_digits_64(buf: PAsciiDigits; x: qword): PtrInt; var tab: PWordArray; P: PAnsiChar; c100: qword; begin tab := @TwoDigitByteLookupW; // 0..99 value -> two byte digits (0..9) P := PAnsiChar(@buf[24]); // append backwards repeat if x >= 100 then begin dec(P, 2); c100 := x div 100; dec(x, c100 * 100); PWord(P)^ := tab[x]; if c100 = 0 then break; x := c100; continue; end; if x < 10 then begin dec(P); P^ := AnsiChar(x); break; end; dec(P, 2); PWord(P)^ := tab[x]; break; until false; PQWordArray(buf)[0] := PQWordArray(P)[0]; // faster than MoveSmall(P,buf,result) PQWordArray(buf)[1] := PQWordArray(P)[1]; PQWordArray(buf)[2] := PQWordArray(P)[2]; result := PAnsiChar(@buf[24]) - P; end; {$else not FPC_64} // use three 32-bit groups of digit function d2a_gen_digits_32(buf: PAsciiDigits; x: dword; pad_9zero: boolean): PtrInt; const digits: array[0..9] of cardinal = ( 0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000); var n: PtrInt; m: cardinal; {$ifdef FPC} z: cardinal; {$else} d100: TDiv100Rec; {$endif FPC} tab: PWordArray; begin // Calculate amount of digits if x = 0 then n := 0 // emit nothing if padding is not required else begin n := integer((BSRdword(x) + 1) * 1233) shr 12; if x >= digits[n] then inc(n); end; if pad_9zero and (n < 9) then n := 9; result := n; if n = 0 then exit; // Emit digits dec(PByte(buf)); tab := @TwoDigitByteLookupW; m := x; while (n >= 2) and (m <> 0) do begin dec(n); {$ifdef FPC} // FPC will use fast mul reciprocal z := m div 100; // compute two 0..9 digits PWord(@buf[n])^ := tab^[m - z * 100]; m := z; {$else} Div100(m, d100); // our asm is faster than Delphi div operation PWord(@buf[n])^ := tab^[d100.M]; m := d100.D; {$endif FPC} dec(n); end; if n = 0 then exit; if m <> 0 then begin if m > 9 then m := m mod 10; // compute last 0..9 digit buf[n] := m; dec(n); if n = 0 then exit; end; repeat buf[n] := 0; // padding with 0 dec(n); until n = 0; end; function d2a_gen_digits_64(buf: PAsciiDigits; const x: qword): PtrInt; var n_digits: PtrInt; temp: qword; splitl, splitm, splith: cardinal; begin // Split X into 3 unsigned 32-bit integers; lower two should be < 10 digits long n_digits := 0; if x < 1000000000 then splitl := x else begin temp := x div 1000000000; splitl := x - temp * 1000000000; if temp < 1000000000 then splitm := temp else begin splith := temp div 1000000000; splitm := cardinal(temp) - splith * 1000000000; n_digits := d2a_gen_digits_32(buf, splith, false); // Generate hi digits end; inc(n_digits, d2a_gen_digits_32(@buf[n_digits], splitm, n_digits <> 0)); end; // Generate digits inc(n_digits, d2a_gen_digits_32(@buf[n_digits], splitl, n_digits <> 0)); result := n_digits; end; {$endif FPC_64} // Performs digit sequence rounding, returns decimal point correction function d2a_round_digits(var buf: TAsciiDigits; var n_current: integer; n_max: PtrInt; half_round_to_even: boolean = true): PtrInt; var n: PtrInt; dig_round, dig_sticky: byte; {$ifdef GRISU1_F2A_AGRESSIVE_ROUNDUP} i: PtrInt; {$endif} begin result := 0; n := n_current; n_current := n_max; // Get round digit dig_round := buf[n_max]; {$ifdef GRISU1_F2A_AGRESSIVE_ROUNDUP} // Detect if rounding-up the second last digit turns the "dig_round" // into "5"; also make sure we have at least 1 digit between "dig_round" // and the second last. if not half_round_to_even then if (dig_round = 4) and (n_max < n - 3) then if buf[n - 2] >= 8 then // somewhat arbitrary... begin // check for only "9" are in between i := n - 2; repeat dec(i); until (i = n_max) or (buf[i] <> 9); if i = n_max then // force round-up dig_round := 9; // any value ">=5" end; {$endif GRISU1_F2A_AGRESSIVE_ROUNDUP} if dig_round < 5 then exit; // Handle "round half to even" case if (dig_round = 5) and half_round_to_even and ((n_max = 0) or (buf[n_max - 1] and 1 = 0)) then begin // even and a half: check if exactly the half dig_sticky := 0; while (n > n_max + 1) and (dig_sticky = 0) do begin dec(n); dig_sticky := buf[n]; end; if dig_sticky = 0 then exit; // exactly a half -> no rounding is required end; // Round-up while n_max > 0 do begin dec(n_max); inc(buf[n_max]); if buf[n_max] < 10 then begin // no more overflow: stop now n_current := n_max + 1; exit; end; // continue rounding end; // Overflow out of the 1st digit, all n_max digits became 0 buf[0] := 1; n_current := 1; result := 1; end; // format the number in the fixed-point representation procedure d2a_return_fixed(str: PAnsiChar; minus: boolean; var digits: TAsciiDigits; n_digits_have, fixed_dot_pos, frac_digits: integer); var p: PAnsiChar; d: PByte; cut_digits_at, n_before_dot, n_before_dot_pad0, n_after_dot_pad0, n_after_dot, n_tail_pad0: integer; begin // Round digits if necessary cut_digits_at := fixed_dot_pos + frac_digits; if cut_digits_at < 0 then // zero n_digits_have := 0 else if cut_digits_at < n_digits_have then // round digits inc(fixed_dot_pos, d2a_round_digits(digits, n_digits_have, cut_digits_at {$ifdef GRISU1_F2A_HALF_ROUNDUP}, false {$endif} )); // Before dot: digits, pad0 if (fixed_dot_pos <= 0) or (n_digits_have = 0) then begin n_before_dot := 0; n_before_dot_pad0 := 1; end else if fixed_dot_pos > n_digits_have then begin n_before_dot := n_digits_have; n_before_dot_pad0 := fixed_dot_pos - n_digits_have; end else begin n_before_dot := fixed_dot_pos; n_before_dot_pad0 := 0; end; // After dot: pad0, digits, pad0 if fixed_dot_pos < 0 then n_after_dot_pad0 := -fixed_dot_pos else n_after_dot_pad0 := 0; if n_after_dot_pad0 > frac_digits then n_after_dot_pad0 := frac_digits; n_after_dot := n_digits_have - n_before_dot; n_tail_pad0 := frac_digits - n_after_dot - n_after_dot_pad0; p := str + 1; // Sign if minus then begin p^ := '-'; inc(p); end; // Integer significant digits d := @digits; if n_before_dot > 0 then repeat p^ := AnsiChar(d^ + ord('0')); inc(p); inc(d); dec(n_before_dot); until n_before_dot = 0; // Integer 0-padding if n_before_dot_pad0 > 0 then repeat p^ := '0'; inc(p); dec(n_before_dot_pad0); until n_before_dot_pad0 = 0; // if frac_digits <> 0 then begin // Dot p^ := '.'; inc(p); // Pre-fraction 0-padding if n_after_dot_pad0 > 0 then repeat p^ := '0'; inc(p); dec(n_after_dot_pad0); until n_after_dot_pad0 = 0; // Fraction significant digits if n_after_dot > 0 then repeat p^ := AnsiChar(d^ + ord('0')); inc(p); inc(d); dec(n_after_dot); until n_after_dot = 0; // Tail 0-padding if n_tail_pad0 > 0 then repeat p^ := '0'; inc(p); dec(n_tail_pad0); until n_tail_pad0 = 0; end; // Store length str[0] := AnsiChar(p - str - 1); end; // formats the number as exponential representation procedure d2a_return_exponential(str: PAnsiChar; minus: boolean; digits: PByte; n_digits_have, n_digits_req, d_exp: PtrInt); var p, exp: PAnsiChar; begin p := str + 1; // Sign if minus then begin p^ := '-'; inc(p); end; // Integer part if n_digits_have > 0 then begin p^ := AnsiChar(digits^ + ord('0')); dec(n_digits_have); end else p^ := '0'; inc(p); // Dot if n_digits_req > 1 then begin p^ := '.'; inc(p); end; // Fraction significant digits if n_digits_req < n_digits_have then n_digits_have := n_digits_req; if n_digits_have > 0 then begin repeat inc(digits); p^ := AnsiChar(digits^ + ord('0')); inc(p); dec(n_digits_have); until n_digits_have = 0; while p[-1] = '0' do dec(p); // trim #.###00000 -> #.### if p[-1] = '.' then dec(p); // #.0 -> # end; // Exponent designator p^ := 'E'; inc(p); // Exponent sign (+ is not stored, as in Delphi) if d_exp < 0 then begin p^ := '-'; d_exp := -d_exp; inc(p); end; // Exponent digits exp := pointer(SmallUInt32UTF8[d_exp]); // 0..999 range is fine PCardinal(p)^ := PCardinal(exp)^; inc(p, PStrLen(exp - _STRLEN)^); // Store length str[0] := AnsiChar(p - str - 1); end; /// set one of special results with proper sign procedure d2a_return_special(str: PAnsiChar; sign: integer; const spec: shortstring); begin // Compute length str[0] := spec[0]; if sign <> 0 then inc(str[0]); inc(str); // Sign if sign <> 0 then begin if sign > 0 then str^ := '+' else str^ := '-'; inc(str); end; // Special text (3 chars) PCardinal(str)^ := PCardinal(@spec[1])^; end; // Calculates the exp10 of a factor required to bring the binary exponent // of the original number into selected [ alpha .. gamma ] range: // result := ceiling[ ( alpha - e ) * log10(2) ] function d2a_k_comp(e, alpha{, gamma}: integer): integer; var dexp: double; const D_LOG10_2: double = 0.301029995663981195213738894724493027; // log10(2) var x, n: integer; begin x := alpha - e; dexp := x * D_LOG10_2; // ceil( dexp ) n := trunc(dexp); if x > 0 then if dexp <> n then inc(n); // round-up result := n; end; /// raw function to convert a 64-bit double into a shortstring, stored in str // - implements Fabian Loitsch's Grisu algorithm dedicated to double values // - currently, SynCommnons only set min_width=0 (for DoubleToShortNoExp to avoid // any scientific notation ) or min_width=C_NO_MIN_WIDTH (for DoubleToShort to // force the scientific notation when the double cannot be represented as // a simple fractinal number) procedure DoubleToAscii(min_width, frac_digits: integer; const v: double; str: PAnsiChar); var w, D: TDIY_FP; c_mk: TDIY_FP_Power_of_10; n, mk, dot_pos, n_digits_need, n_digits_have: integer; n_digits_req, n_digits_sci: integer; minus: boolean; fl, one_maskl: qword; one_e: integer; {$ifdef CPU32} one_mask, f: cardinal; // run a 2nd loop with 32-bit range {$endif CPU32} buf: TAsciiDigits; begin // Limit parameters if frac_digits > 216 then frac_digits := 216; // Delphi compatible if min_width <= C_NO_MIN_WIDTH then min_width := -1 // no minimal width else if min_width < 0 then min_width := 0; // minimal width is as short as possible // Format profile: select "n_digits_need" (and "n_digits_exp") n_digits_req := nDig_mantissa; // number of digits to be calculated by Grisu n_digits_need := nDig_mantissa; if n_digits_req < n_digits_need then n_digits_need := n_digits_req; // number of mantissa digits to be printed in exponential notation if min_width < 0 then n_digits_sci := n_digits_req else begin n_digits_sci := min_width -1 {sign} -1 {dot} -1 {E} -1 {E-sign} - nDig_exp10; if n_digits_sci < 2 then n_digits_sci := 2; // at least 2 digits if n_digits_sci > n_digits_req then n_digits_sci := n_digits_req; // at most requested by real_type end; // Float -> DIY_FP d2a_unpack_float(v, minus, w); // Handle Zero if (w.e = 0) and (w.f = 0) then begin {$ifdef GRISU1_F2A_ZERONOFRACT} PWord(str)^ := 1 + ord('0') shl 8; // just return '0' {$else} if frac_digits >= 0 then d2a_return_fixed(str, minus, buf, 0, 1, frac_digits) else d2a_return_exponential(str, minus, @buf, 0, n_digits_sci, 0); {$endif GRISU1_F2A_ZERONOFRACT} exit; end; // Handle specials if w.e = C_EXP2_SPECIAL then begin n := 1 - ord(minus) * 2; // default special sign [-1|+1] if w.f = 0 then d2a_return_special(str, n, C_STR_INF) else begin // NaN [also pseudo-NaN, pseudo-Inf, non-normal for floatx80] {$ifdef GRISU1_F2A_NAN_SIGNLESS} n := 0; {$endif} {$ifndef GRISU1_F2A_NO_SNAN} if (w.f and (C_MANT2_INTEGER shr 1)) = 0 then return_special(str, n, C_STR_SNAN) else {$endif GRISU1_F2A_NO_SNAN} d2a_return_special(str, n, C_STR_QNAN); end; exit; end; // Handle denormals if w.e <> 0 then begin // normal w.f := w.f or C_MANT2_INTEGER; n := C_DIY_FP_Q - C_FRAC2_BITS - 1; end else begin // denormal (w.e=0) n := 63 - BSRqword(w.f); // we are sure that w.f<>0 - see Handle Zero above inc(w.e); end; // Final normalization w.f := w.f shl n; dec(w.e, C_EXP2_BIAS + n + C_FRAC2_BITS); // 1. Find the normalized "c_mk = f_c * 2^e_c" such that // "alpha <= e_c + e_w + q <= gamma" // 2. Define "V = D * 10^k": multiply the input number by "c_mk", do not // normalize to land into [ alpha .. gamma ] // 3. Generate digits ( n_digits_need + "round" ) if (C_GRISU_ALPHA <= w.e) and (w.e <= C_GRISU_GAMMA) then begin // no scaling required D := w; c_mk.e10 := 0; end else begin mk := d2a_k_comp(w.e, C_GRISU_ALPHA{, C_GRISU_GAMMA} ); d2a_diy_fp_cached_power10(mk, c_mk); // Let "D = f_D * 2^e_D := w (*) c_mk" if c_mk.e10 = 0 then D := w else d2a_diy_fp_multiply(w, c_mk.c, false, D); end; // Generate digits: integer part n_digits_have := d2a_gen_digits_64(@buf, D.f shr (-D.e)); dot_pos := n_digits_have; // Generate digits: fractional part {$ifdef CPU32} f := 0; // "sticky" digit {$endif CPU32} if D.e < 0 then repeat // MOD by ONE one_e := D.e; one_maskl := qword(1) shl (-D.e) - 1; fl := D.f and one_maskl; // 64-bit loop (very efficient on x86_64, slower on i386) while {$ifdef CPU32} (one_e < -29) and {$endif} (n_digits_have < n_digits_need + 1) and (fl <> 0) do begin // f := f * 5; inc(fl, fl shl 2); // one := one / 2 one_maskl := one_maskl shr 1; inc(one_e); // DIV by one buf[n_digits_have] := fl shr (-one_e); // MOD by one fl := fl and one_maskl; // next inc(n_digits_have); end; {$ifdef CPU32} if n_digits_have >= n_digits_need + 1 then begin // only "sticky" digit remains f := ord(fl <> 0); break; end; one_mask := cardinal(one_maskl); f := cardinal(fl); // 32-bit loop while (n_digits_have < n_digits_need + 1) and (f <> 0) do begin // f := f * 5; inc(f, f shl 2); // one := one / 2 one_mask := one_mask shr 1; inc(one_e); // DIV by one buf[n_digits_have] := f shr (-one_e); // MOD by one f := f and one_mask; // next inc(n_digits_have); end; {$endif CPU32} until true; {$ifdef CPU32} // Append "sticky" digit if any if (f <> 0) and (n_digits_have >= n_digits_need + 1) then begin // single "<>0" digit is enough n_digits_have := n_digits_need + 2; buf[n_digits_need + 1] := 1; end; {$endif CPU32} // Round to n_digits_need using "roundTiesToEven" if n_digits_have > n_digits_need then inc(dot_pos, d2a_round_digits(buf, n_digits_have, n_digits_need)); // Generate output if frac_digits >= 0 then begin d2a_return_fixed(str, minus, buf, n_digits_have, dot_pos - c_mk.e10, frac_digits); exit; end; if n_digits_have > n_digits_sci then inc(dot_pos, d2a_round_digits(buf, n_digits_have, n_digits_sci {$ifdef GRISU1_F2A_HALF_ROUNDUP}, false {$endif} )); d2a_return_exponential(str, minus, @buf, n_digits_have, n_digits_sci, dot_pos - c_mk.e10 - 1); end;