All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
encodings.h
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_ENCODINGS_H_
16 #define RAPIDJSON_ENCODINGS_H_
17 
18 #include "rapidjson.h"
19 
20 #ifdef _MSC_VER
21 RAPIDJSON_DIAG_PUSH
22 RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
23 RAPIDJSON_DIAG_OFF(4702) // unreachable code
24 #elif defined(__GNUC__)
25 RAPIDJSON_DIAG_PUSH
26 RAPIDJSON_DIAG_OFF(effc++)
27 RAPIDJSON_DIAG_OFF(overflow)
28 #endif
29 
30 RAPIDJSON_NAMESPACE_BEGIN
31 
32 ///////////////////////////////////////////////////////////////////////////////
33 // Encoding
34 
35 /*! \class rapidjson::Encoding
36  \brief Concept for encoding of Unicode characters.
37 
38 \code
39 concept Encoding {
40  typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition.
41 
42  enum { supportUnicode = 1 }; // or 0 if not supporting unicode
43 
44  //! \brief Encode a Unicode codepoint to an output stream.
45  //! \param os Output stream.
46  //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
47  template<typename OutputStream>
48  static void Encode(OutputStream& os, unsigned codepoint);
49 
50  //! \brief Decode a Unicode codepoint from an input stream.
51  //! \param is Input stream.
52  //! \param codepoint Output of the unicode codepoint.
53  //! \return true if a valid codepoint can be decoded from the stream.
54  template <typename InputStream>
55  static bool Decode(InputStream& is, unsigned* codepoint);
56 
57  //! \brief Validate one Unicode codepoint from an encoded stream.
58  //! \param is Input stream to obtain codepoint.
59  //! \param os Output for copying one codepoint.
60  //! \return true if it is valid.
61  //! \note This function just validating and copying the codepoint without actually decode it.
62  template <typename InputStream, typename OutputStream>
63  static bool Validate(InputStream& is, OutputStream& os);
64 
65  // The following functions are deal with byte streams.
66 
67  //! Take a character from input byte stream, skip BOM if exist.
68  template <typename InputByteStream>
69  static CharType TakeBOM(InputByteStream& is);
70 
71  //! Take a character from input byte stream.
72  template <typename InputByteStream>
73  static Ch Take(InputByteStream& is);
74 
75  //! Put BOM to output byte stream.
76  template <typename OutputByteStream>
77  static void PutBOM(OutputByteStream& os);
78 
79  //! Put a character to output byte stream.
80  template <typename OutputByteStream>
81  static void Put(OutputByteStream& os, Ch c);
82 };
83 \endcode
84 */
85 
86 ///////////////////////////////////////////////////////////////////////////////
87 // UTF8
88 
89 //! UTF-8 encoding.
90 /*! http://en.wikipedia.org/wiki/UTF-8
91  http://tools.ietf.org/html/rfc3629
92  \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
93  \note implements Encoding concept
94 */
95 template<typename CharType = char>
96 struct UTF8 {
97  typedef CharType Ch;
98 
99  enum { supportUnicode = 1 };
100 
101  template<typename OutputStream>
102  static void Encode(OutputStream& os, unsigned codepoint) {
103  if (codepoint <= 0x7F)
104  os.Put(static_cast<Ch>(codepoint & 0xFF));
105  else if (codepoint <= 0x7FF) {
106  os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
107  os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
108  }
109  else if (codepoint <= 0xFFFF) {
110  os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
111  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
112  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
113  }
114  else {
115  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
116  os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
117  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
118  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
119  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
120  }
121  }
122 
123  template <typename InputStream>
124  static bool Decode(InputStream& is, unsigned* codepoint) {
125 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
126 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
127 #define TAIL() COPY(); TRANS(0x70)
128  Ch c = is.Take();
129  if (!(c & 0x80)) {
130  *codepoint = (unsigned char)c;
131  return true;
132  }
133 
134  unsigned char type = GetRange((unsigned char)c);
135  *codepoint = (0xFF >> type) & (unsigned char)c;
136  bool result = true;
137  switch (type) {
138  case 2: TAIL(); return result;
139  case 3: TAIL(); TAIL(); return result;
140  case 4: COPY(); TRANS(0x50); TAIL(); return result;
141  case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
142  case 6: TAIL(); TAIL(); TAIL(); return result;
143  case 10: COPY(); TRANS(0x20); TAIL(); return result;
144  case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
145  default: return false;
146  }
147 #undef COPY
148 #undef TRANS
149 #undef TAIL
150  }
151 
152  template <typename InputStream, typename OutputStream>
153  static bool Validate(InputStream& is, OutputStream& os) {
154 #define COPY() os.Put(c = is.Take())
155 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
156 #define TAIL() COPY(); TRANS(0x70)
157  Ch c;
158  COPY();
159  if (!(c & 0x80))
160  return true;
161 
162  bool result = true;
163  switch (GetRange((unsigned char)c)) {
164  case 2: TAIL(); return result;
165  case 3: TAIL(); TAIL(); return result;
166  case 4: COPY(); TRANS(0x50); TAIL(); return result;
167  case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
168  case 6: TAIL(); TAIL(); TAIL(); return result;
169  case 10: COPY(); TRANS(0x20); TAIL(); return result;
170  case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
171  default: return false;
172  }
173 #undef COPY
174 #undef TRANS
175 #undef TAIL
176  }
177 
178  static unsigned char GetRange(unsigned char c) {
179  // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
180  // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
181  static const unsigned char type[] = {
182  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
183  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
185  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
186  0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
187  0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
188  0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
189  0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
190  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
191  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
192  };
193  return type[c];
194  }
195 
196  template <typename InputByteStream>
197  static CharType TakeBOM(InputByteStream& is) {
198  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
199  Ch c = Take(is);
200  if ((unsigned char)c != 0xEFu) return c;
201  c = is.Take();
202  if ((unsigned char)c != 0xBBu) return c;
203  c = is.Take();
204  if ((unsigned char)c != 0xBFu) return c;
205  c = is.Take();
206  return c;
207  }
208 
209  template <typename InputByteStream>
210  static Ch Take(InputByteStream& is) {
211  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
212  return is.Take();
213  }
214 
215  template <typename OutputByteStream>
216  static void PutBOM(OutputByteStream& os) {
217  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
218  os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
219  }
220 
221  template <typename OutputByteStream>
222  static void Put(OutputByteStream& os, Ch c) {
223  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
224  os.Put(static_cast<typename OutputByteStream::Ch>(c));
225  }
226 };
227 
228 ///////////////////////////////////////////////////////////////////////////////
229 // UTF16
230 
231 //! UTF-16 encoding.
232 /*! http://en.wikipedia.org/wiki/UTF-16
233  http://tools.ietf.org/html/rfc2781
234  \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
235  \note implements Encoding concept
236 
237  \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
238  For streaming, use UTF16LE and UTF16BE, which handle endianness.
239 */
240 template<typename CharType = wchar_t>
241 struct UTF16 {
242  typedef CharType Ch;
243  RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
244 
245  enum { supportUnicode = 1 };
246 
247  template<typename OutputStream>
248  static void Encode(OutputStream& os, unsigned codepoint) {
249  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
250  if (codepoint <= 0xFFFF) {
251  RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
252  os.Put(static_cast<typename OutputStream::Ch>(codepoint));
253  }
254  else {
255  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
256  unsigned v = codepoint - 0x10000;
257  os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
258  os.Put((v & 0x3FF) | 0xDC00);
259  }
260  }
261 
262  template <typename InputStream>
263  static bool Decode(InputStream& is, unsigned* codepoint) {
264  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
265  Ch c = is.Take();
266  if (c < 0xD800 || c > 0xDFFF) {
267  *codepoint = c;
268  return true;
269  }
270  else if (c <= 0xDBFF) {
271  *codepoint = (c & 0x3FF) << 10;
272  c = is.Take();
273  *codepoint |= (c & 0x3FF);
274  *codepoint += 0x10000;
275  return c >= 0xDC00 && c <= 0xDFFF;
276  }
277  return false;
278  }
279 
280  template <typename InputStream, typename OutputStream>
281  static bool Validate(InputStream& is, OutputStream& os) {
282  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
283  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
284  Ch c;
285  os.Put(c = is.Take());
286  if (c < 0xD800 || c > 0xDFFF)
287  return true;
288  else if (c <= 0xDBFF) {
289  os.Put(c = is.Take());
290  return c >= 0xDC00 && c <= 0xDFFF;
291  }
292  return false;
293  }
294 };
295 
296 //! UTF-16 little endian encoding.
297 template<typename CharType = wchar_t>
298 struct UTF16LE : UTF16<CharType> {
299  template <typename InputByteStream>
300  static CharType TakeBOM(InputByteStream& is) {
301  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
302  CharType c = Take(is);
303  return (unsigned short)c == 0xFEFFu ? Take(is) : c;
304  }
305 
306  template <typename InputByteStream>
307  static CharType Take(InputByteStream& is) {
308  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
309  CharType c = (unsigned char)is.Take();
310  c |= (unsigned char)is.Take() << 8;
311  return c;
312  }
313 
314  template <typename OutputByteStream>
315  static void PutBOM(OutputByteStream& os) {
316  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
317  os.Put(0xFFu); os.Put(0xFEu);
318  }
319 
320  template <typename OutputByteStream>
321  static void Put(OutputByteStream& os, CharType c) {
322  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
323  os.Put(c & 0xFFu);
324  os.Put((c >> 8) & 0xFFu);
325  }
326 };
327 
328 //! UTF-16 big endian encoding.
329 template<typename CharType = wchar_t>
330 struct UTF16BE : UTF16<CharType> {
331  template <typename InputByteStream>
332  static CharType TakeBOM(InputByteStream& is) {
333  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
334  CharType c = Take(is);
335  return (unsigned short)c == 0xFEFFu ? Take(is) : c;
336  }
337 
338  template <typename InputByteStream>
339  static CharType Take(InputByteStream& is) {
340  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
341  CharType c = (unsigned char)is.Take() << 8;
342  c |= (unsigned char)is.Take();
343  return c;
344  }
345 
346  template <typename OutputByteStream>
347  static void PutBOM(OutputByteStream& os) {
348  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
349  os.Put(0xFEu); os.Put(0xFFu);
350  }
351 
352  template <typename OutputByteStream>
353  static void Put(OutputByteStream& os, CharType c) {
354  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
355  os.Put((c >> 8) & 0xFFu);
356  os.Put(c & 0xFFu);
357  }
358 };
359 
360 ///////////////////////////////////////////////////////////////////////////////
361 // UTF32
362 
363 //! UTF-32 encoding.
364 /*! http://en.wikipedia.org/wiki/UTF-32
365  \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
366  \note implements Encoding concept
367 
368  \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
369  For streaming, use UTF32LE and UTF32BE, which handle endianness.
370 */
371 template<typename CharType = unsigned>
372 struct UTF32 {
373  typedef CharType Ch;
374  RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
375 
376  enum { supportUnicode = 1 };
377 
378  template<typename OutputStream>
379  static void Encode(OutputStream& os, unsigned codepoint) {
380  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
381  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
382  os.Put(codepoint);
383  }
384 
385  template <typename InputStream>
386  static bool Decode(InputStream& is, unsigned* codepoint) {
387  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
388  Ch c = is.Take();
389  *codepoint = c;
390  return c <= 0x10FFFF;
391  }
392 
393  template <typename InputStream, typename OutputStream>
394  static bool Validate(InputStream& is, OutputStream& os) {
395  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
396  Ch c;
397  os.Put(c = is.Take());
398  return c <= 0x10FFFF;
399  }
400 };
401 
402 //! UTF-32 little endian enocoding.
403 template<typename CharType = unsigned>
404 struct UTF32LE : UTF32<CharType> {
405  template <typename InputByteStream>
406  static CharType TakeBOM(InputByteStream& is) {
407  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
408  CharType c = Take(is);
409  return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
410  }
411 
412  template <typename InputByteStream>
413  static CharType Take(InputByteStream& is) {
414  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
415  CharType c = (unsigned char)is.Take();
416  c |= (unsigned char)is.Take() << 8;
417  c |= (unsigned char)is.Take() << 16;
418  c |= (unsigned char)is.Take() << 24;
419  return c;
420  }
421 
422  template <typename OutputByteStream>
423  static void PutBOM(OutputByteStream& os) {
424  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
425  os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u);
426  }
427 
428  template <typename OutputByteStream>
429  static void Put(OutputByteStream& os, CharType c) {
430  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
431  os.Put(c & 0xFFu);
432  os.Put((c >> 8) & 0xFFu);
433  os.Put((c >> 16) & 0xFFu);
434  os.Put((c >> 24) & 0xFFu);
435  }
436 };
437 
438 //! UTF-32 big endian encoding.
439 template<typename CharType = unsigned>
440 struct UTF32BE : UTF32<CharType> {
441  template <typename InputByteStream>
442  static CharType TakeBOM(InputByteStream& is) {
443  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
444  CharType c = Take(is);
445  return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
446  }
447 
448  template <typename InputByteStream>
449  static CharType Take(InputByteStream& is) {
450  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
451  CharType c = (unsigned char)is.Take() << 24;
452  c |= (unsigned char)is.Take() << 16;
453  c |= (unsigned char)is.Take() << 8;
454  c |= (unsigned char)is.Take();
455  return c;
456  }
457 
458  template <typename OutputByteStream>
459  static void PutBOM(OutputByteStream& os) {
460  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
461  os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu);
462  }
463 
464  template <typename OutputByteStream>
465  static void Put(OutputByteStream& os, CharType c) {
466  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
467  os.Put((c >> 24) & 0xFFu);
468  os.Put((c >> 16) & 0xFFu);
469  os.Put((c >> 8) & 0xFFu);
470  os.Put(c & 0xFFu);
471  }
472 };
473 
474 ///////////////////////////////////////////////////////////////////////////////
475 // ASCII
476 
477 //! ASCII encoding.
478 /*! http://en.wikipedia.org/wiki/ASCII
479  \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
480  \note implements Encoding concept
481 */
482 template<typename CharType = char>
483 struct ASCII {
484  typedef CharType Ch;
485 
486  enum { supportUnicode = 0 };
487 
488  template<typename OutputStream>
489  static void Encode(OutputStream& os, unsigned codepoint) {
490  RAPIDJSON_ASSERT(codepoint <= 0x7F);
491  os.Put(static_cast<Ch>(codepoint & 0xFF));
492  }
493 
494  template <typename InputStream>
495  static bool Decode(InputStream& is, unsigned* codepoint) {
496  unsigned char c = static_cast<unsigned char>(is.Take());
497  *codepoint = c;
498  return c <= 0X7F;
499  }
500 
501  template <typename InputStream, typename OutputStream>
502  static bool Validate(InputStream& is, OutputStream& os) {
503  unsigned char c = is.Take();
504  os.Put(c);
505  return c <= 0x7F;
506  }
507 
508  template <typename InputByteStream>
509  static CharType TakeBOM(InputByteStream& is) {
510  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
511  Ch c = Take(is);
512  return c;
513  }
514 
515  template <typename InputByteStream>
516  static Ch Take(InputByteStream& is) {
517  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
518  return is.Take();
519  }
520 
521  template <typename OutputByteStream>
522  static void PutBOM(OutputByteStream& os) {
523  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
524  (void)os;
525  }
526 
527  template <typename OutputByteStream>
528  static void Put(OutputByteStream& os, Ch c) {
529  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
530  os.Put(static_cast<typename OutputByteStream::Ch>(c));
531  }
532 };
533 
534 ///////////////////////////////////////////////////////////////////////////////
535 // AutoUTF
536 
537 //! Runtime-specified UTF encoding type of a stream.
538 enum UTFType {
539  kUTF8 = 0, //!< UTF-8.
540  kUTF16LE = 1, //!< UTF-16 little endian.
541  kUTF16BE = 2, //!< UTF-16 big endian.
542  kUTF32LE = 3, //!< UTF-32 little endian.
543  kUTF32BE = 4 //!< UTF-32 big endian.
544 };
545 
546 //! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
547 /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
548 */
549 template<typename CharType>
550 struct AutoUTF {
551  typedef CharType Ch;
552 
553  enum { supportUnicode = 1 };
554 
555 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
556 
557  template<typename OutputStream>
558  RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) {
559  typedef void (*EncodeFunc)(OutputStream&, unsigned);
560  static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
561  (*f[os.GetType()])(os, codepoint);
562  }
563 
564  template <typename InputStream>
565  RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
566  typedef bool (*DecodeFunc)(InputStream&, unsigned*);
567  static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
568  return (*f[is.GetType()])(is, codepoint);
569  }
570 
571  template <typename InputStream, typename OutputStream>
572  RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
573  typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
574  static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
575  return (*f[is.GetType()])(is, os);
576  }
577 
578 #undef RAPIDJSON_ENCODINGS_FUNC
579 };
580 
581 ///////////////////////////////////////////////////////////////////////////////
582 // Transcoder
583 
584 //! Encoding conversion.
585 template<typename SourceEncoding, typename TargetEncoding>
586 struct Transcoder {
587  //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
588  template<typename InputStream, typename OutputStream>
589  RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
590  unsigned codepoint;
591  if (!SourceEncoding::Decode(is, &codepoint))
592  return false;
593  TargetEncoding::Encode(os, codepoint);
594  return true;
595  }
596 
597  //! Validate one Unicode codepoint from an encoded stream.
598  template<typename InputStream, typename OutputStream>
599  RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
600  return Transcode(is, os); // Since source/target encoding is different, must transcode.
601  }
602 };
603 
604 //! Specialization of Transcoder with same source and target encoding.
605 template<typename Encoding>
607  template<typename InputStream, typename OutputStream>
608  RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
609  os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class.
610  return true;
611  }
612 
613  template<typename InputStream, typename OutputStream>
614  RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
615  return Encoding::Validate(is, os); // source/target encoding are the same
616  }
617 };
618 
619 RAPIDJSON_NAMESPACE_END
620 
621 #if defined(__GNUC__) || defined(_MSV_VER)
622 RAPIDJSON_DIAG_POP
623 #endif
624 
625 #endif // RAPIDJSON_ENCODINGS_H_
Encoding conversion.
Definition: encodings.h:586
UTF-16 little endian encoding.
Definition: encodings.h:298
UTF-16 little endian.
Definition: encodings.h:540
UTF-16 big endian encoding.
Definition: encodings.h:330
UTF-32 little endian.
Definition: encodings.h:542
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:375
ASCII encoding.
Definition: encodings.h:483
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
Definition: encodings.h:599
Concept for encoding of Unicode characters.
UTF-32 encoding.
Definition: encodings.h:372
UTF-8.
Definition: encodings.h:539
Dynamically select encoding according to stream's runtime-specified UTF encoding type.
Definition: encodings.h:550
UTF-16 big endian.
Definition: encodings.h:541
common definitions and configuration
UTF-8 encoding.
Definition: encodings.h:96
UTF-32 big endian.
Definition: encodings.h:543
UTF-32 little endian enocoding.
Definition: encodings.h:404
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the outp...
Definition: encodings.h:589
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
UTFType
Runtime-specified UTF encoding type of a stream.
Definition: encodings.h:538
UTF-16 encoding.
Definition: encodings.h:241
UTF-32 big endian encoding.
Definition: encodings.h:440