717 lines
		
	
	
		
			28 KiB
		
	
	
	
		
			C
		
	
	
	
		
		
			
		
	
	
			717 lines
		
	
	
		
			28 KiB
		
	
	
	
		
			C
		
	
	
	
|  | // Tencent is pleased to support the open source community by making RapidJSON available.
 | ||
|  | // 
 | ||
|  | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 | ||
|  | //
 | ||
|  | // Licensed under the MIT License (the "License"); you may not use this file except
 | ||
|  | // in compliance with the License. You may obtain a copy of the License at
 | ||
|  | //
 | ||
|  | // http://opensource.org/licenses/MIT
 | ||
|  | //
 | ||
|  | // Unless required by applicable law or agreed to in writing, software distributed 
 | ||
|  | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 | ||
|  | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 | ||
|  | // specific language governing permissions and limitations under the License.
 | ||
|  | 
 | ||
|  | #ifndef RAPIDJSON_ENCODINGS_H_
 | ||
|  | #define RAPIDJSON_ENCODINGS_H_
 | ||
|  | 
 | ||
|  | #include "rapidjson.h"
 | ||
|  | 
 | ||
|  | #ifdef _MSC_VER
 | ||
|  | RAPIDJSON_DIAG_PUSH | ||
|  | RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
 | ||
|  | RAPIDJSON_DIAG_OFF(4702)  // unreachable code
 | ||
|  | #elif defined(__GNUC__)
 | ||
|  | RAPIDJSON_DIAG_PUSH | ||
|  | RAPIDJSON_DIAG_OFF(effc++) | ||
|  | RAPIDJSON_DIAG_OFF(overflow) | ||
|  | #endif
 | ||
|  | 
 | ||
|  | RAPIDJSON_NAMESPACE_BEGIN | ||
|  | 
 | ||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||
|  | // Encoding
 | ||
|  | 
 | ||
|  | /*! \class rapidjson::Encoding
 | ||
|  |     \brief Concept for encoding of Unicode characters. | ||
|  | 
 | ||
|  | \code | ||
|  | concept Encoding { | ||
|  |     typename Ch;    //! Type of character. A "character" is actually a code unit in unicode's definition.
 | ||
|  | 
 | ||
|  |     enum { supportUnicode = 1 }; // or 0 if not supporting unicode
 | ||
|  | 
 | ||
|  |     //! \brief Encode a Unicode codepoint to an output stream.
 | ||
|  |     //! \param os Output stream.
 | ||
|  |     //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void Encode(OutputStream& os, unsigned codepoint); | ||
|  | 
 | ||
|  |     //! \brief Decode a Unicode codepoint from an input stream.
 | ||
|  |     //! \param is Input stream.
 | ||
|  |     //! \param codepoint Output of the unicode codepoint.
 | ||
|  |     //! \return true if a valid codepoint can be decoded from the stream.
 | ||
|  |     template <typename InputStream> | ||
|  |     static bool Decode(InputStream& is, unsigned* codepoint); | ||
|  | 
 | ||
|  |     //! \brief Validate one Unicode codepoint from an encoded stream.
 | ||
|  |     //! \param is Input stream to obtain codepoint.
 | ||
|  |     //! \param os Output for copying one codepoint.
 | ||
|  |     //! \return true if it is valid.
 | ||
|  |     //! \note This function just validating and copying the codepoint without actually decode it.
 | ||
|  |     template <typename InputStream, typename OutputStream> | ||
|  |     static bool Validate(InputStream& is, OutputStream& os); | ||
|  | 
 | ||
|  |     // The following functions are deal with byte streams.
 | ||
|  | 
 | ||
|  |     //! Take a character from input byte stream, skip BOM if exist.
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType TakeBOM(InputByteStream& is); | ||
|  | 
 | ||
|  |     //! Take a character from input byte stream.
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static Ch Take(InputByteStream& is); | ||
|  | 
 | ||
|  |     //! Put BOM to output byte stream.
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void PutBOM(OutputByteStream& os); | ||
|  | 
 | ||
|  |     //! Put a character to output byte stream.
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void Put(OutputByteStream& os, Ch c); | ||
|  | }; | ||
|  | \endcode | ||
|  | */ | ||
|  | 
 | ||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||
|  | // UTF8
 | ||
|  | 
 | ||
|  | //! UTF-8 encoding.
 | ||
|  | /*! http://en.wikipedia.org/wiki/UTF-8
 | ||
|  |     http://tools.ietf.org/html/rfc3629
 | ||
|  |     \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. | ||
|  |     \note implements Encoding concept | ||
|  | */ | ||
|  | template<typename CharType = char> | ||
|  | struct UTF8 { | ||
|  |     typedef CharType Ch; | ||
|  | 
 | ||
|  |     enum { supportUnicode = 1 }; | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void Encode(OutputStream& os, unsigned codepoint) { | ||
|  |         if (codepoint <= 0x7F)  | ||
|  |             os.Put(static_cast<Ch>(codepoint & 0xFF)); | ||
|  |         else if (codepoint <= 0x7FF) { | ||
|  |             os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); | ||
|  |             os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); | ||
|  |         } | ||
|  |         else if (codepoint <= 0xFFFF) { | ||
|  |             os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); | ||
|  |             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); | ||
|  |             os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); | ||
|  |         } | ||
|  |         else { | ||
|  |             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); | ||
|  |             os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); | ||
|  |             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); | ||
|  |             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); | ||
|  |             os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { | ||
|  |         if (codepoint <= 0x7F)  | ||
|  |             PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); | ||
|  |         else if (codepoint <= 0x7FF) { | ||
|  |             PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); | ||
|  |             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); | ||
|  |         } | ||
|  |         else if (codepoint <= 0xFFFF) { | ||
|  |             PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); | ||
|  |             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); | ||
|  |             PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); | ||
|  |         } | ||
|  |         else { | ||
|  |             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); | ||
|  |             PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); | ||
|  |             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); | ||
|  |             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); | ||
|  |             PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream> | ||
|  |     static bool Decode(InputStream& is, unsigned* codepoint) { | ||
|  | #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
 | ||
|  | #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
 | ||
|  | #define TAIL() COPY(); TRANS(0x70)
 | ||
|  |         typename InputStream::Ch c = is.Take(); | ||
|  |         if (!(c & 0x80)) { | ||
|  |             *codepoint = static_cast<unsigned char>(c); | ||
|  |             return true; | ||
|  |         } | ||
|  | 
 | ||
|  |         unsigned char type = GetRange(static_cast<unsigned char>(c)); | ||
|  |         if (type >= 32) { | ||
|  |             *codepoint = 0; | ||
|  |         } else { | ||
|  |             *codepoint = (0xFF >> type) & static_cast<unsigned char>(c); | ||
|  |         } | ||
|  |         bool result = true; | ||
|  |         switch (type) { | ||
|  |         case 2: TAIL(); return result; | ||
|  |         case 3: TAIL(); TAIL(); return result; | ||
|  |         case 4: COPY(); TRANS(0x50); TAIL(); return result; | ||
|  |         case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; | ||
|  |         case 6: TAIL(); TAIL(); TAIL(); return result; | ||
|  |         case 10: COPY(); TRANS(0x20); TAIL(); return result; | ||
|  |         case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; | ||
|  |         default: return false; | ||
|  |         } | ||
|  | #undef COPY
 | ||
|  | #undef TRANS
 | ||
|  | #undef TAIL
 | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream, typename OutputStream> | ||
|  |     static bool Validate(InputStream& is, OutputStream& os) { | ||
|  | #define COPY() os.Put(c = is.Take())
 | ||
|  | #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
 | ||
|  | #define TAIL() COPY(); TRANS(0x70)
 | ||
|  |         Ch c; | ||
|  |         COPY(); | ||
|  |         if (!(c & 0x80)) | ||
|  |             return true; | ||
|  | 
 | ||
|  |         bool result = true; | ||
|  |         switch (GetRange(static_cast<unsigned char>(c))) { | ||
|  |         case 2: TAIL(); return result; | ||
|  |         case 3: TAIL(); TAIL(); return result; | ||
|  |         case 4: COPY(); TRANS(0x50); TAIL(); return result; | ||
|  |         case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; | ||
|  |         case 6: TAIL(); TAIL(); TAIL(); return result; | ||
|  |         case 10: COPY(); TRANS(0x20); TAIL(); return result; | ||
|  |         case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; | ||
|  |         default: return false; | ||
|  |         } | ||
|  | #undef COPY
 | ||
|  | #undef TRANS
 | ||
|  | #undef TAIL
 | ||
|  |     } | ||
|  | 
 | ||
|  |     static unsigned char GetRange(unsigned char c) { | ||
|  |         // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
 | ||
|  |         // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
 | ||
|  |         static const unsigned char type[] = { | ||
|  |             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
|  |             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
|  |             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
|  |             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
|  |             0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, | ||
|  |             0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, | ||
|  |             0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, | ||
|  |             0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, | ||
|  |             8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | ||
|  |             10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, | ||
|  |         }; | ||
|  |         return type[c]; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType TakeBOM(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         typename InputByteStream::Ch c = Take(is); | ||
|  |         if (static_cast<unsigned char>(c) != 0xEFu) return c; | ||
|  |         c = is.Take(); | ||
|  |         if (static_cast<unsigned char>(c) != 0xBBu) return c; | ||
|  |         c = is.Take(); | ||
|  |         if (static_cast<unsigned char>(c) != 0xBFu) return c; | ||
|  |         c = is.Take(); | ||
|  |         return c; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static Ch Take(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         return static_cast<Ch>(is.Take()); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void PutBOM(OutputByteStream& os) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu)); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void Put(OutputByteStream& os, Ch c) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(c)); | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||
|  | // UTF16
 | ||
|  | 
 | ||
|  | //! UTF-16 encoding.
 | ||
|  | /*! http://en.wikipedia.org/wiki/UTF-16
 | ||
|  |     http://tools.ietf.org/html/rfc2781
 | ||
|  |     \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. | ||
|  |     \note implements Encoding concept | ||
|  | 
 | ||
|  |     \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. | ||
|  |     For streaming, use UTF16LE and UTF16BE, which handle endianness. | ||
|  | */ | ||
|  | template<typename CharType = wchar_t> | ||
|  | struct UTF16 { | ||
|  |     typedef CharType Ch; | ||
|  |     RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); | ||
|  | 
 | ||
|  |     enum { supportUnicode = 1 }; | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void Encode(OutputStream& os, unsigned codepoint) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); | ||
|  |         if (codepoint <= 0xFFFF) { | ||
|  |             RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
 | ||
|  |             os.Put(static_cast<typename OutputStream::Ch>(codepoint)); | ||
|  |         } | ||
|  |         else { | ||
|  |             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); | ||
|  |             unsigned v = codepoint - 0x10000; | ||
|  |             os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); | ||
|  |             os.Put((v & 0x3FF) | 0xDC00); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); | ||
|  |         if (codepoint <= 0xFFFF) { | ||
|  |             RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
 | ||
|  |             PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint)); | ||
|  |         } | ||
|  |         else { | ||
|  |             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); | ||
|  |             unsigned v = codepoint - 0x10000; | ||
|  |             PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); | ||
|  |             PutUnsafe(os, (v & 0x3FF) | 0xDC00); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream> | ||
|  |     static bool Decode(InputStream& is, unsigned* codepoint) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); | ||
|  |         typename InputStream::Ch c = is.Take(); | ||
|  |         if (c < 0xD800 || c > 0xDFFF) { | ||
|  |             *codepoint = static_cast<unsigned>(c); | ||
|  |             return true; | ||
|  |         } | ||
|  |         else if (c <= 0xDBFF) { | ||
|  |             *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10; | ||
|  |             c = is.Take(); | ||
|  |             *codepoint |= (static_cast<unsigned>(c) & 0x3FF); | ||
|  |             *codepoint += 0x10000; | ||
|  |             return c >= 0xDC00 && c <= 0xDFFF; | ||
|  |         } | ||
|  |         return false; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream, typename OutputStream> | ||
|  |     static bool Validate(InputStream& is, OutputStream& os) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); | ||
|  |         typename InputStream::Ch c; | ||
|  |         os.Put(static_cast<typename OutputStream::Ch>(c = is.Take())); | ||
|  |         if (c < 0xD800 || c > 0xDFFF) | ||
|  |             return true; | ||
|  |         else if (c <= 0xDBFF) { | ||
|  |             os.Put(c = is.Take()); | ||
|  |             return c >= 0xDC00 && c <= 0xDFFF; | ||
|  |         } | ||
|  |         return false; | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | //! UTF-16 little endian encoding.
 | ||
|  | template<typename CharType = wchar_t> | ||
|  | struct UTF16LE : UTF16<CharType> { | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType TakeBOM(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         CharType c = Take(is); | ||
|  |         return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType Take(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         unsigned c = static_cast<uint8_t>(is.Take()); | ||
|  |         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; | ||
|  |         return static_cast<CharType>(c); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void PutBOM(OutputByteStream& os) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void Put(OutputByteStream& os, CharType c) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | //! UTF-16 big endian encoding.
 | ||
|  | template<typename CharType = wchar_t> | ||
|  | struct UTF16BE : UTF16<CharType> { | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType TakeBOM(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         CharType c = Take(is); | ||
|  |         return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType Take(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; | ||
|  |         c |= static_cast<uint8_t>(is.Take()); | ||
|  |         return static_cast<CharType>(c); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void PutBOM(OutputByteStream& os) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void Put(OutputByteStream& os, CharType c) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||
|  | // UTF32
 | ||
|  | 
 | ||
|  | //! UTF-32 encoding. 
 | ||
|  | /*! http://en.wikipedia.org/wiki/UTF-32
 | ||
|  |     \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. | ||
|  |     \note implements Encoding concept | ||
|  | 
 | ||
|  |     \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. | ||
|  |     For streaming, use UTF32LE and UTF32BE, which handle endianness. | ||
|  | */ | ||
|  | template<typename CharType = unsigned> | ||
|  | struct UTF32 { | ||
|  |     typedef CharType Ch; | ||
|  |     RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); | ||
|  | 
 | ||
|  |     enum { supportUnicode = 1 }; | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void Encode(OutputStream& os, unsigned codepoint) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); | ||
|  |         RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); | ||
|  |         os.Put(codepoint); | ||
|  |     } | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); | ||
|  |         RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); | ||
|  |         PutUnsafe(os, codepoint); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream> | ||
|  |     static bool Decode(InputStream& is, unsigned* codepoint) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); | ||
|  |         Ch c = is.Take(); | ||
|  |         *codepoint = c; | ||
|  |         return c <= 0x10FFFF; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream, typename OutputStream> | ||
|  |     static bool Validate(InputStream& is, OutputStream& os) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); | ||
|  |         Ch c; | ||
|  |         os.Put(c = is.Take()); | ||
|  |         return c <= 0x10FFFF; | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | //! UTF-32 little endian enocoding.
 | ||
|  | template<typename CharType = unsigned> | ||
|  | struct UTF32LE : UTF32<CharType> { | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType TakeBOM(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         CharType c = Take(is); | ||
|  |         return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType Take(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         unsigned c = static_cast<uint8_t>(is.Take()); | ||
|  |         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; | ||
|  |         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; | ||
|  |         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; | ||
|  |         return static_cast<CharType>(c); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void PutBOM(OutputByteStream& os) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void Put(OutputByteStream& os, CharType c) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | //! UTF-32 big endian encoding.
 | ||
|  | template<typename CharType = unsigned> | ||
|  | struct UTF32BE : UTF32<CharType> { | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType TakeBOM(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         CharType c = Take(is); | ||
|  |         return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;  | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType Take(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; | ||
|  |         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; | ||
|  |         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; | ||
|  |         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); | ||
|  |         return static_cast<CharType>(c); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void PutBOM(OutputByteStream& os) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void Put(OutputByteStream& os, CharType c) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||
|  | // ASCII
 | ||
|  | 
 | ||
|  | //! ASCII encoding.
 | ||
|  | /*! http://en.wikipedia.org/wiki/ASCII
 | ||
|  |     \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. | ||
|  |     \note implements Encoding concept | ||
|  | */ | ||
|  | template<typename CharType = char> | ||
|  | struct ASCII { | ||
|  |     typedef CharType Ch; | ||
|  | 
 | ||
|  |     enum { supportUnicode = 0 }; | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void Encode(OutputStream& os, unsigned codepoint) { | ||
|  |         RAPIDJSON_ASSERT(codepoint <= 0x7F); | ||
|  |         os.Put(static_cast<Ch>(codepoint & 0xFF)); | ||
|  |     } | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { | ||
|  |         RAPIDJSON_ASSERT(codepoint <= 0x7F); | ||
|  |         PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream> | ||
|  |     static bool Decode(InputStream& is, unsigned* codepoint) { | ||
|  |         uint8_t c = static_cast<uint8_t>(is.Take()); | ||
|  |         *codepoint = c; | ||
|  |         return c <= 0X7F; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream, typename OutputStream> | ||
|  |     static bool Validate(InputStream& is, OutputStream& os) { | ||
|  |         uint8_t c = static_cast<uint8_t>(is.Take()); | ||
|  |         os.Put(static_cast<typename OutputStream::Ch>(c)); | ||
|  |         return c <= 0x7F; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static CharType TakeBOM(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         uint8_t c = static_cast<uint8_t>(Take(is)); | ||
|  |         return static_cast<Ch>(c); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputByteStream> | ||
|  |     static Ch Take(InputByteStream& is) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  |         return static_cast<Ch>(is.Take()); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void PutBOM(OutputByteStream& os) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         (void)os; | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename OutputByteStream> | ||
|  |     static void Put(OutputByteStream& os, Ch c) { | ||
|  |         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  |         os.Put(static_cast<typename OutputByteStream::Ch>(c)); | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||
|  | // AutoUTF
 | ||
|  | 
 | ||
|  | //! Runtime-specified UTF encoding type of a stream.
 | ||
|  | enum UTFType { | ||
|  |     kUTF8 = 0,      //!< UTF-8.
 | ||
|  |     kUTF16LE = 1,   //!< UTF-16 little endian.
 | ||
|  |     kUTF16BE = 2,   //!< UTF-16 big endian.
 | ||
|  |     kUTF32LE = 3,   //!< UTF-32 little endian.
 | ||
|  |     kUTF32BE = 4    //!< UTF-32 big endian.
 | ||
|  | }; | ||
|  | 
 | ||
|  | //! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
 | ||
|  | /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
 | ||
|  | */ | ||
|  | template<typename CharType> | ||
|  | struct AutoUTF { | ||
|  |     typedef CharType Ch; | ||
|  | 
 | ||
|  |     enum { supportUnicode = 1 }; | ||
|  | 
 | ||
|  | #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
 | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) { | ||
|  |         typedef void (*EncodeFunc)(OutputStream&, unsigned); | ||
|  |         static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; | ||
|  |         (*f[os.GetType()])(os, codepoint); | ||
|  |     } | ||
|  | 
 | ||
|  |     template<typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { | ||
|  |         typedef void (*EncodeFunc)(OutputStream&, unsigned); | ||
|  |         static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) }; | ||
|  |         (*f[os.GetType()])(os, codepoint); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { | ||
|  |         typedef bool (*DecodeFunc)(InputStream&, unsigned*); | ||
|  |         static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; | ||
|  |         return (*f[is.GetType()])(is, codepoint); | ||
|  |     } | ||
|  | 
 | ||
|  |     template <typename InputStream, typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { | ||
|  |         typedef bool (*ValidateFunc)(InputStream&, OutputStream&); | ||
|  |         static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; | ||
|  |         return (*f[is.GetType()])(is, os); | ||
|  |     } | ||
|  | 
 | ||
|  | #undef RAPIDJSON_ENCODINGS_FUNC
 | ||
|  | }; | ||
|  | 
 | ||
|  | ///////////////////////////////////////////////////////////////////////////////
 | ||
|  | // Transcoder
 | ||
|  | 
 | ||
|  | //! Encoding conversion.
 | ||
|  | template<typename SourceEncoding, typename TargetEncoding> | ||
|  | struct Transcoder { | ||
|  |     //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
 | ||
|  |     template<typename InputStream, typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { | ||
|  |         unsigned codepoint; | ||
|  |         if (!SourceEncoding::Decode(is, &codepoint)) | ||
|  |             return false; | ||
|  |         TargetEncoding::Encode(os, codepoint); | ||
|  |         return true; | ||
|  |     } | ||
|  | 
 | ||
|  |     template<typename InputStream, typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { | ||
|  |         unsigned codepoint; | ||
|  |         if (!SourceEncoding::Decode(is, &codepoint)) | ||
|  |             return false; | ||
|  |         TargetEncoding::EncodeUnsafe(os, codepoint); | ||
|  |         return true; | ||
|  |     } | ||
|  | 
 | ||
|  |     //! Validate one Unicode codepoint from an encoded stream.
 | ||
|  |     template<typename InputStream, typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { | ||
|  |         return Transcode(is, os);   // Since source/target encoding is different, must transcode.
 | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | // Forward declaration.
 | ||
|  | template<typename Stream> | ||
|  | inline void PutUnsafe(Stream& stream, typename Stream::Ch c); | ||
|  | 
 | ||
|  | //! Specialization of Transcoder with same source and target encoding.
 | ||
|  | template<typename Encoding> | ||
|  | struct Transcoder<Encoding, Encoding> { | ||
|  |     template<typename InputStream, typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { | ||
|  |         os.Put(is.Take());  // Just copy one code unit. This semantic is different from primary template class.
 | ||
|  |         return true; | ||
|  |     } | ||
|  |      | ||
|  |     template<typename InputStream, typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { | ||
|  |         PutUnsafe(os, is.Take());  // Just copy one code unit. This semantic is different from primary template class.
 | ||
|  |         return true; | ||
|  |     } | ||
|  |      | ||
|  |     template<typename InputStream, typename OutputStream> | ||
|  |     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { | ||
|  |         return Encoding::Validate(is, os);  // source/target encoding are the same
 | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | RAPIDJSON_NAMESPACE_END | ||
|  | 
 | ||
|  | #if defined(__GNUC__) || defined(_MSC_VER)
 | ||
|  | RAPIDJSON_DIAG_POP | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #endif // RAPIDJSON_ENCODINGS_H_
 |