All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
diyfp.h
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18 
19 #ifndef RAPIDJSON_DIYFP_H_
20 #define RAPIDJSON_DIYFP_H_
21 
22 #include "../rapidjson.h"
23 
24 #if defined(_MSC_VER) && defined(_M_AMD64)
25 #include <intrin.h>
26 #pragma intrinsic(_BitScanReverse64)
27 #pragma intrinsic(_umul128)
28 #endif
29 
30 RAPIDJSON_NAMESPACE_BEGIN
31 namespace internal {
32 
33 #ifdef __GNUC__
34 RAPIDJSON_DIAG_PUSH
35 RAPIDJSON_DIAG_OFF(effc++)
36 #endif
37 
38 struct DiyFp {
39  DiyFp() {}
40 
41  DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
42 
43  explicit DiyFp(double d) {
44  union {
45  double d;
46  uint64_t u64;
47  } u = { d };
48 
49  int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
50  uint64_t significand = (u.u64 & kDpSignificandMask);
51  if (biased_e != 0) {
52  f = significand + kDpHiddenBit;
53  e = biased_e - kDpExponentBias;
54  }
55  else {
56  f = significand;
57  e = kDpMinExponent + 1;
58  }
59  }
60 
61  DiyFp operator-(const DiyFp& rhs) const {
62  return DiyFp(f - rhs.f, e);
63  }
64 
65  DiyFp operator*(const DiyFp& rhs) const {
66 #if defined(_MSC_VER) && defined(_M_AMD64)
67  uint64_t h;
68  uint64_t l = _umul128(f, rhs.f, &h);
69  if (l & (uint64_t(1) << 63)) // rounding
70  h++;
71  return DiyFp(h, e + rhs.e + 64);
72 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
73  __extension__ typedef unsigned __int128 uint128;
74  uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
75  uint64_t h = static_cast<uint64_t>(p >> 64);
76  uint64_t l = static_cast<uint64_t>(p);
77  if (l & (uint64_t(1) << 63)) // rounding
78  h++;
79  return DiyFp(h, e + rhs.e + 64);
80 #else
81  const uint64_t M32 = 0xFFFFFFFF;
82  const uint64_t a = f >> 32;
83  const uint64_t b = f & M32;
84  const uint64_t c = rhs.f >> 32;
85  const uint64_t d = rhs.f & M32;
86  const uint64_t ac = a * c;
87  const uint64_t bc = b * c;
88  const uint64_t ad = a * d;
89  const uint64_t bd = b * d;
90  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
91  tmp += 1U << 31; /// mult_round
92  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
93 #endif
94  }
95 
96  DiyFp Normalize() const {
97 #if defined(_MSC_VER) && defined(_M_AMD64)
98  unsigned long index;
99  _BitScanReverse64(&index, f);
100  return DiyFp(f << (63 - index), e - (63 - index));
101 #elif defined(__GNUC__) && __GNUC__ >= 4
102  int s = __builtin_clzll(f);
103  return DiyFp(f << s, e - s);
104 #else
105  DiyFp res = *this;
106  while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
107  res.f <<= 1;
108  res.e--;
109  }
110  return res;
111 #endif
112  }
113 
114  DiyFp NormalizeBoundary() const {
115  DiyFp res = *this;
116  while (!(res.f & (kDpHiddenBit << 1))) {
117  res.f <<= 1;
118  res.e--;
119  }
120  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
121  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
122  return res;
123  }
124 
125  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
126  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
127  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
128  mi.f <<= mi.e - pl.e;
129  mi.e = pl.e;
130  *plus = pl;
131  *minus = mi;
132  }
133 
134  double ToDouble() const {
135  union {
136  double d;
137  uint64_t u64;
138  }u;
139  const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
140  static_cast<uint64_t>(e + kDpExponentBias);
141  u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
142  return u.d;
143  }
144 
145  static const int kDiySignificandSize = 64;
146  static const int kDpSignificandSize = 52;
147  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
148  static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
149  static const int kDpMinExponent = -kDpExponentBias;
150  static const int kDpDenormalExponent = -kDpExponentBias + 1;
151  static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
152  static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
153  static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
154 
155  uint64_t f;
156  int e;
157 };
158 
159 inline DiyFp GetCachedPowerByIndex(size_t index) {
160  // 10^-348, 10^-340, ..., 10^340
161  static const uint64_t kCachedPowers_F[] = {
162  RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
163  RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
164  RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
165  RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
166  RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
167  RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
168  RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
169  RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
170  RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
171  RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
172  RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
173  RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
174  RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
175  RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
176  RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
177  RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
178  RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
179  RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
180  RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
181  RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
182  RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
183  RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
184  RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
185  RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
186  RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
187  RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
188  RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
189  RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
190  RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
191  RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
192  RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
193  RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
194  RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
195  RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
196  RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
197  RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
198  RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
199  RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
200  RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
201  RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
202  RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
203  RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
204  RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
205  RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
206  };
207  static const int16_t kCachedPowers_E[] = {
208  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
209  -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
210  -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
211  -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
212  -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
213  109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
214  375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
215  641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
216  907, 933, 960, 986, 1013, 1039, 1066
217  };
218  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
219 }
220 
221 inline DiyFp GetCachedPower(int e, int* K) {
222 
223  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
224  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
225  int k = static_cast<int>(dk);
226  if (dk - k > 0.0)
227  k++;
228 
229  unsigned index = static_cast<unsigned>((k >> 3) + 1);
230  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
231 
232  return GetCachedPowerByIndex(index);
233 }
234 
235 inline DiyFp GetCachedPower10(int exp, int *outExp) {
236  unsigned index = (static_cast<unsigned>(exp) + 348u) / 8u;
237  *outExp = -348 + static_cast<int>(index) * 8;
238  return GetCachedPowerByIndex(index);
239  }
240 
241 #ifdef __GNUC__
242 RAPIDJSON_DIAG_POP
243 #endif
244 
245 } // namespace internal
246 RAPIDJSON_NAMESPACE_END
247 
248 #endif // RAPIDJSON_DIYFP_H_
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:261