Extra2D/include/core/string.h

210 lines
6.5 KiB
C
Raw Normal View History

2026-02-11 19:40:26 +08:00
#pragma once
#include <string>
namespace extra2d {
// ============================================================================
// 字符串编码转换工具函数
// 统一使用 std::string (UTF-8) 作为项目标准字符串类型
// ============================================================================
// UTF-8 ↔ UTF-16 转换
std::u16string utf8ToUtf16(const std::string& utf8);
std::string utf16ToUtf8(const std::u16string& utf16);
// UTF-8 ↔ UTF-32 转换
std::u32string utf8ToUtf32(const std::string& utf8);
std::string utf32ToUtf8(const std::u32string& utf32);
// UTF-8 ↔ Wide String 转换
std::wstring utf8ToWide(const std::string& utf8);
std::string wideToUtf8(const std::wstring& wide);
// UTF-8 ↔ GBK/GB2312 转换Windows 中文系统常用)
std::string utf8ToGbk(const std::string& utf8);
std::string gbkToUtf8(const std::string& gbk);
// ============================================================================
// 内联实现
// ============================================================================
inline std::u16string utf8ToUtf16(const std::string& utf8) {
if (utf8.empty()) return std::u16string();
// UTF-8 → UTF-32 → UTF-16 (with surrogate pairs)
std::u32string u32 = utf8ToUtf32(utf8);
std::u16string result;
result.reserve(u32.size());
for (char32_t ch : u32) {
if (ch <= 0xFFFF) {
result.push_back(static_cast<char16_t>(ch));
} else if (ch <= 0x10FFFF) {
// Surrogate pair
ch -= 0x10000;
result.push_back(static_cast<char16_t>(0xD800 | (ch >> 10)));
result.push_back(static_cast<char16_t>(0xDC00 | (ch & 0x3FF)));
}
}
return result;
}
inline std::string utf16ToUtf8(const std::u16string& utf16) {
if (utf16.empty()) return std::string();
// UTF-16 → UTF-32 → UTF-8
std::u32string u32;
u32.reserve(utf16.size());
for (size_t i = 0; i < utf16.size(); ++i) {
char16_t cu = utf16[i];
char32_t ch;
if (cu >= 0xD800 && cu <= 0xDBFF && i + 1 < utf16.size()) {
// High surrogate
char16_t cl = utf16[i + 1];
if (cl >= 0xDC00 && cl <= 0xDFFF) {
ch = 0x10000 + ((static_cast<char32_t>(cu - 0xD800) << 10) |
(cl - 0xDC00));
++i;
} else {
ch = cu; // Invalid, pass through
}
} else {
ch = cu;
}
u32.push_back(ch);
}
return utf32ToUtf8(u32);
}
inline std::u32string utf8ToUtf32(const std::string& utf8) {
std::u32string result;
result.reserve(utf8.size());
const char* ptr = utf8.c_str();
const char* end = ptr + utf8.size();
while (ptr < end) {
char32_t ch = 0;
unsigned char byte = static_cast<unsigned char>(*ptr);
if ((byte & 0x80) == 0) {
// 1-byte sequence
ch = byte;
ptr += 1;
} else if ((byte & 0xE0) == 0xC0) {
// 2-byte sequence
ch = (byte & 0x1F) << 6;
ch |= (static_cast<unsigned char>(ptr[1]) & 0x3F);
ptr += 2;
} else if ((byte & 0xF0) == 0xE0) {
// 3-byte sequence
ch = (byte & 0x0F) << 12;
ch |= (static_cast<unsigned char>(ptr[1]) & 0x3F) << 6;
ch |= (static_cast<unsigned char>(ptr[2]) & 0x3F);
ptr += 3;
} else if ((byte & 0xF8) == 0xF0) {
// 4-byte sequence
ch = (byte & 0x07) << 18;
ch |= (static_cast<unsigned char>(ptr[1]) & 0x3F) << 12;
ch |= (static_cast<unsigned char>(ptr[2]) & 0x3F) << 6;
ch |= (static_cast<unsigned char>(ptr[3]) & 0x3F);
ptr += 4;
} else {
// Invalid UTF-8, skip
ptr += 1;
continue;
}
result.push_back(ch);
}
return result;
}
inline std::string utf32ToUtf8(const std::u32string& utf32) {
std::string result;
for (char32_t ch : utf32) {
if (ch <= 0x7F) {
// 1-byte
result.push_back(static_cast<char>(ch));
} else if (ch <= 0x7FF) {
// 2-byte
result.push_back(static_cast<char>(0xC0 | ((ch >> 6) & 0x1F)));
result.push_back(static_cast<char>(0x80 | (ch & 0x3F)));
} else if (ch <= 0xFFFF) {
// 3-byte
result.push_back(static_cast<char>(0xE0 | ((ch >> 12) & 0x0F)));
result.push_back(static_cast<char>(0x80 | ((ch >> 6) & 0x3F)));
result.push_back(static_cast<char>(0x80 | (ch & 0x3F)));
} else if (ch <= 0x10FFFF) {
// 4-byte
result.push_back(static_cast<char>(0xF0 | ((ch >> 18) & 0x07)));
result.push_back(static_cast<char>(0x80 | ((ch >> 12) & 0x3F)));
result.push_back(static_cast<char>(0x80 | ((ch >> 6) & 0x3F)));
result.push_back(static_cast<char>(0x80 | (ch & 0x3F)));
}
}
return result;
}
inline std::wstring utf8ToWide(const std::string& utf8) {
if (utf8.empty()) return std::wstring();
if constexpr (sizeof(wchar_t) == 4) {
// wchar_t is 32-bit (Linux/Switch): same as UTF-32
std::u32string u32 = utf8ToUtf32(utf8);
return std::wstring(u32.begin(), u32.end());
} else {
// wchar_t is 16-bit (Windows): same as UTF-16
std::u16string u16 = utf8ToUtf16(utf8);
return std::wstring(u16.begin(), u16.end());
}
}
inline std::string wideToUtf8(const std::wstring& wide) {
if (wide.empty()) return std::string();
if constexpr (sizeof(wchar_t) == 4) {
std::u32string u32(wide.begin(), wide.end());
return utf32ToUtf8(u32);
} else {
std::u16string u16(wide.begin(), wide.end());
return utf16ToUtf8(u16);
}
}
// GBK/GB2312 转换Windows 平台实现)
// 注意Windows 实现在 .cpp 文件中,避免头文件包含 windows.h 导致冲突
#ifdef _WIN32
// 前向声明,实现在 .cpp 文件中
std::string utf8ToGbkImpl(const std::string& utf8);
std::string gbkToUtf8Impl(const std::string& gbk);
inline std::string utf8ToGbk(const std::string& utf8) {
return utf8ToGbkImpl(utf8);
}
inline std::string gbkToUtf8(const std::string& gbk) {
return gbkToUtf8Impl(gbk);
}
#else
// 非 Windows 平台GBK 转换使用 iconv 或返回原字符串
inline std::string utf8ToGbk(const std::string& utf8) {
// TODO: 使用 iconv 实现
return utf8;
}
inline std::string gbkToUtf8(const std::string& gbk) {
// TODO: 使用 iconv 实现
return gbk;
}
#endif
} // namespace extra2d