300 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
		
		
			
		
	
	
			300 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
|  | // Tencent is pleased to support the open source community by making RapidJSON available.
 | ||
|  | // 
 | ||
|  | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 | ||
|  | //
 | ||
|  | // Licensed under the MIT License (the "License"); you may not use this file except
 | ||
|  | // in compliance with the License. You may obtain a copy of the License at
 | ||
|  | //
 | ||
|  | // http://opensource.org/licenses/MIT
 | ||
|  | //
 | ||
|  | // Unless required by applicable law or agreed to in writing, software distributed 
 | ||
|  | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 | ||
|  | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 | ||
|  | // specific language governing permissions and limitations under the License.
 | ||
|  | 
 | ||
|  | #ifndef RAPIDJSON_ENCODEDSTREAM_H_
 | ||
|  | #define RAPIDJSON_ENCODEDSTREAM_H_
 | ||
|  | 
 | ||
|  | #include "stream.h"
 | ||
|  | #include "memorystream.h"
 | ||
|  | 
 | ||
|  | #ifdef __GNUC__
 | ||
|  | RAPIDJSON_DIAG_PUSH | ||
|  | RAPIDJSON_DIAG_OFF(effc++) | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #ifdef __clang__
 | ||
|  | RAPIDJSON_DIAG_PUSH | ||
|  | RAPIDJSON_DIAG_OFF(padded) | ||
|  | #endif
 | ||
|  | 
 | ||
|  | RAPIDJSON_NAMESPACE_BEGIN | ||
|  | 
 | ||
|  | //! Input byte stream wrapper with a statically bound encoding.
 | ||
|  | /*!
 | ||
|  |     \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. | ||
|  |     \tparam InputByteStream Type of input byte stream. For example, FileReadStream. | ||
|  | */ | ||
|  | template <typename Encoding, typename InputByteStream> | ||
|  | class EncodedInputStream { | ||
|  |     RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  | public: | ||
|  |     typedef typename Encoding::Ch Ch; | ||
|  | 
 | ||
|  |     EncodedInputStream(InputByteStream& is) : is_(is) {  | ||
|  |         current_ = Encoding::TakeBOM(is_); | ||
|  |     } | ||
|  | 
 | ||
|  |     Ch Peek() const { return current_; } | ||
|  |     Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } | ||
|  |     size_t Tell() const { return is_.Tell(); } | ||
|  | 
 | ||
|  |     // Not implemented
 | ||
|  |     void Put(Ch) { RAPIDJSON_ASSERT(false); } | ||
|  |     void Flush() { RAPIDJSON_ASSERT(false); }  | ||
|  |     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } | ||
|  |     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } | ||
|  | 
 | ||
|  | private: | ||
|  |     EncodedInputStream(const EncodedInputStream&); | ||
|  |     EncodedInputStream& operator=(const EncodedInputStream&); | ||
|  | 
 | ||
|  |     InputByteStream& is_; | ||
|  |     Ch current_; | ||
|  | }; | ||
|  | 
 | ||
|  | //! Specialized for UTF8 MemoryStream.
 | ||
|  | template <> | ||
|  | class EncodedInputStream<UTF8<>, MemoryStream> { | ||
|  | public: | ||
|  |     typedef UTF8<>::Ch Ch; | ||
|  | 
 | ||
|  |     EncodedInputStream(MemoryStream& is) : is_(is) { | ||
|  |         if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take(); | ||
|  |         if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take(); | ||
|  |         if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take(); | ||
|  |     } | ||
|  |     Ch Peek() const { return is_.Peek(); } | ||
|  |     Ch Take() { return is_.Take(); } | ||
|  |     size_t Tell() const { return is_.Tell(); } | ||
|  | 
 | ||
|  |     // Not implemented
 | ||
|  |     void Put(Ch) {} | ||
|  |     void Flush() {}  | ||
|  |     Ch* PutBegin() { return 0; } | ||
|  |     size_t PutEnd(Ch*) { return 0; } | ||
|  | 
 | ||
|  |     MemoryStream& is_; | ||
|  | 
 | ||
|  | private: | ||
|  |     EncodedInputStream(const EncodedInputStream&); | ||
|  |     EncodedInputStream& operator=(const EncodedInputStream&); | ||
|  | }; | ||
|  | 
 | ||
|  | //! Output byte stream wrapper with statically bound encoding.
 | ||
|  | /*!
 | ||
|  |     \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. | ||
|  |     \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream. | ||
|  | */ | ||
|  | template <typename Encoding, typename OutputByteStream> | ||
|  | class EncodedOutputStream { | ||
|  |     RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  | public: | ||
|  |     typedef typename Encoding::Ch Ch; | ||
|  | 
 | ||
|  |     EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) {  | ||
|  |         if (putBOM) | ||
|  |             Encoding::PutBOM(os_); | ||
|  |     } | ||
|  | 
 | ||
|  |     void Put(Ch c) { Encoding::Put(os_, c);  } | ||
|  |     void Flush() { os_.Flush(); } | ||
|  | 
 | ||
|  |     // Not implemented
 | ||
|  |     Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} | ||
|  |     Ch Take() { RAPIDJSON_ASSERT(false); return 0;} | ||
|  |     size_t Tell() const { RAPIDJSON_ASSERT(false);  return 0; } | ||
|  |     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } | ||
|  |     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } | ||
|  | 
 | ||
|  | private: | ||
|  |     EncodedOutputStream(const EncodedOutputStream&); | ||
|  |     EncodedOutputStream& operator=(const EncodedOutputStream&); | ||
|  | 
 | ||
|  |     OutputByteStream& os_; | ||
|  | }; | ||
|  | 
 | ||
|  | #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
 | ||
|  | 
 | ||
|  | //! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
 | ||
|  | /*!
 | ||
|  |     \tparam CharType Type of character for reading. | ||
|  |     \tparam InputByteStream type of input byte stream to be wrapped. | ||
|  | */ | ||
|  | template <typename CharType, typename InputByteStream> | ||
|  | class AutoUTFInputStream { | ||
|  |     RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); | ||
|  | public: | ||
|  |     typedef CharType Ch; | ||
|  | 
 | ||
|  |     //! Constructor.
 | ||
|  |     /*!
 | ||
|  |         \param is input stream to be wrapped. | ||
|  |         \param type UTF encoding type if it is not detected from the stream. | ||
|  |     */ | ||
|  |     AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { | ||
|  |         RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);         | ||
|  |         DetectType(); | ||
|  |         static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; | ||
|  |         takeFunc_ = f[type_]; | ||
|  |         current_ = takeFunc_(*is_); | ||
|  |     } | ||
|  | 
 | ||
|  |     UTFType GetType() const { return type_; } | ||
|  |     bool HasBOM() const { return hasBOM_; } | ||
|  | 
 | ||
|  |     Ch Peek() const { return current_; } | ||
|  |     Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } | ||
|  |     size_t Tell() const { return is_->Tell(); } | ||
|  | 
 | ||
|  |     // Not implemented
 | ||
|  |     void Put(Ch) { RAPIDJSON_ASSERT(false); } | ||
|  |     void Flush() { RAPIDJSON_ASSERT(false); }  | ||
|  |     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } | ||
|  |     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } | ||
|  | 
 | ||
|  | private: | ||
|  |     AutoUTFInputStream(const AutoUTFInputStream&); | ||
|  |     AutoUTFInputStream& operator=(const AutoUTFInputStream&); | ||
|  | 
 | ||
|  |     // Detect encoding type with BOM or RFC 4627
 | ||
|  |     void DetectType() { | ||
|  |         // BOM (Byte Order Mark):
 | ||
|  |         // 00 00 FE FF  UTF-32BE
 | ||
|  |         // FF FE 00 00  UTF-32LE
 | ||
|  |         // FE FF        UTF-16BE
 | ||
|  |         // FF FE        UTF-16LE
 | ||
|  |         // EF BB BF     UTF-8
 | ||
|  | 
 | ||
|  |         const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4()); | ||
|  |         if (!c) | ||
|  |             return; | ||
|  | 
 | ||
|  |         unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24)); | ||
|  |         hasBOM_ = false; | ||
|  |         if (bom == 0xFFFE0000)                  { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } | ||
|  |         else if (bom == 0x0000FEFF)             { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } | ||
|  |         else if ((bom & 0xFFFF) == 0xFFFE)      { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take();                           } | ||
|  |         else if ((bom & 0xFFFF) == 0xFEFF)      { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take();                           } | ||
|  |         else if ((bom & 0xFFFFFF) == 0xBFBBEF)  { type_ = kUTF8;    hasBOM_ = true; is_->Take(); is_->Take(); is_->Take();              } | ||
|  | 
 | ||
|  |         // RFC 4627: Section 3
 | ||
|  |         // "Since the first two characters of a JSON text will always be ASCII
 | ||
|  |         // characters [RFC0020], it is possible to determine whether an octet
 | ||
|  |         // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
 | ||
|  |         // at the pattern of nulls in the first four octets."
 | ||
|  |         // 00 00 00 xx  UTF-32BE
 | ||
|  |         // 00 xx 00 xx  UTF-16BE
 | ||
|  |         // xx 00 00 00  UTF-32LE
 | ||
|  |         // xx 00 xx 00  UTF-16LE
 | ||
|  |         // xx xx xx xx  UTF-8
 | ||
|  | 
 | ||
|  |         if (!hasBOM_) { | ||
|  |             unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); | ||
|  |             switch (pattern) { | ||
|  |             case 0x08: type_ = kUTF32BE; break; | ||
|  |             case 0x0A: type_ = kUTF16BE; break; | ||
|  |             case 0x01: type_ = kUTF32LE; break; | ||
|  |             case 0x05: type_ = kUTF16LE; break; | ||
|  |             case 0x0F: type_ = kUTF8;    break; | ||
|  |             default: break; // Use type defined by user.
 | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
 | ||
|  |         if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); | ||
|  |         if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); | ||
|  |     } | ||
|  | 
 | ||
|  |     typedef Ch (*TakeFunc)(InputByteStream& is); | ||
|  |     InputByteStream* is_; | ||
|  |     UTFType type_; | ||
|  |     Ch current_; | ||
|  |     TakeFunc takeFunc_; | ||
|  |     bool hasBOM_; | ||
|  | }; | ||
|  | 
 | ||
|  | //! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
 | ||
|  | /*!
 | ||
|  |     \tparam CharType Type of character for writing. | ||
|  |     \tparam OutputByteStream type of output byte stream to be wrapped. | ||
|  | */ | ||
|  | template <typename CharType, typename OutputByteStream> | ||
|  | class AutoUTFOutputStream { | ||
|  |     RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); | ||
|  | public: | ||
|  |     typedef CharType Ch; | ||
|  | 
 | ||
|  |     //! Constructor.
 | ||
|  |     /*!
 | ||
|  |         \param os output stream to be wrapped. | ||
|  |         \param type UTF encoding type. | ||
|  |         \param putBOM Whether to write BOM at the beginning of the stream. | ||
|  |     */ | ||
|  |     AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { | ||
|  |         RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); | ||
|  | 
 | ||
|  |         // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
 | ||
|  |         if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); | ||
|  |         if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); | ||
|  | 
 | ||
|  |         static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; | ||
|  |         putFunc_ = f[type_]; | ||
|  | 
 | ||
|  |         if (putBOM) | ||
|  |             PutBOM(); | ||
|  |     } | ||
|  | 
 | ||
|  |     UTFType GetType() const { return type_; } | ||
|  | 
 | ||
|  |     void Put(Ch c) { putFunc_(*os_, c); } | ||
|  |     void Flush() { os_->Flush(); }  | ||
|  | 
 | ||
|  |     // Not implemented
 | ||
|  |     Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} | ||
|  |     Ch Take() { RAPIDJSON_ASSERT(false); return 0;} | ||
|  |     size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } | ||
|  |     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } | ||
|  |     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } | ||
|  | 
 | ||
|  | private: | ||
|  |     AutoUTFOutputStream(const AutoUTFOutputStream&); | ||
|  |     AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); | ||
|  | 
 | ||
|  |     void PutBOM() {  | ||
|  |         typedef void (*PutBOMFunc)(OutputByteStream&); | ||
|  |         static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; | ||
|  |         f[type_](*os_); | ||
|  |     } | ||
|  | 
 | ||
|  |     typedef void (*PutFunc)(OutputByteStream&, Ch); | ||
|  | 
 | ||
|  |     OutputByteStream* os_; | ||
|  |     UTFType type_; | ||
|  |     PutFunc putFunc_; | ||
|  | }; | ||
|  | 
 | ||
|  | #undef RAPIDJSON_ENCODINGS_FUNC
 | ||
|  | 
 | ||
|  | RAPIDJSON_NAMESPACE_END | ||
|  | 
 | ||
|  | #ifdef __clang__
 | ||
|  | RAPIDJSON_DIAG_POP | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #ifdef __GNUC__
 | ||
|  | RAPIDJSON_DIAG_POP | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #endif // RAPIDJSON_FILESTREAM_H_
 |