Extra2D/Easy2D/include/easy2d/core/string.h

508 lines
16 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#include <easy2d/core/types.h>
#include <string>
#include <vector>
#include <cstring>
namespace easy2d {
// ============================================================================
// String 类 - 跨平台字符串,内部统一使用 UTF-8 存储
// ============================================================================
class String {
public:
// ------------------------------------------------------------------------
// 构造函数
// ------------------------------------------------------------------------
String() = default;
String(const char* utf8) : data_(utf8 ? utf8 : "") {}
String(const std::string& utf8) : data_(utf8) {}
explicit String(const wchar_t* wide);
explicit String(const std::wstring& wide);
explicit String(const char16_t* utf16);
explicit String(const std::u16string& utf16);
explicit String(const char32_t* utf32);
explicit String(const std::u32string& utf32);
// ------------------------------------------------------------------------
// 静态工厂方法
// ------------------------------------------------------------------------
static String fromUtf8(const char* utf8);
static String fromUtf8(const std::string& utf8);
static String fromWide(const wchar_t* wide);
static String fromWide(const std::wstring& wide);
static String fromUtf16(const char16_t* utf16);
static String fromUtf16(const std::u16string& utf16);
static String fromUtf32(const char32_t* utf32);
static String fromUtf32(const std::u32string& utf32);
/// 从 GBK/GB2312 编码构造Windows 中文系统常用)
static String fromGBK(const char* gbk);
static String fromGBK(const std::string& gbk);
// ------------------------------------------------------------------------
// 编码转换
// ------------------------------------------------------------------------
const std::string& toUtf8() const { return data_; }
std::string toUtf8String() const { return data_; }
std::wstring toWide() const;
std::u16string toUtf16() const;
std::u32string toUtf32() const;
/// 转换为 GBK/GB2312 编码Windows 中文系统常用)
std::string toGBK() const;
// ------------------------------------------------------------------------
// 基础操作
// ------------------------------------------------------------------------
/// 返回 Unicode 字符数(不是字节数)
size_t length() const;
/// 返回 UTF-8 字节数
size_t byteSize() const { return data_.size(); }
bool empty() const { return data_.empty(); }
const char* c_str() const { return data_.c_str(); }
const std::string& str() const { return data_; }
std::string& str() { return data_; }
// ------------------------------------------------------------------------
// 字符串操作
// ------------------------------------------------------------------------
void clear() { data_.clear(); }
String& append(const String& other);
String& append(const char* utf8);
String substring(size_t start, size_t len = npos) const;
/// 查找子串,返回 Unicode 字符索引,未找到返回 npos
size_t find(const String& substr, size_t start = 0) const;
/// 是否以指定字符串开头
bool startsWith(const String& prefix) const;
/// 是否以指定字符串结尾
bool endsWith(const String& suffix) const;
/// 去除首尾空白字符
String trim() const;
/// 分割字符串
std::vector<String> split(const String& delimiter) const;
/// 替换所有匹配子串
String replaceAll(const String& from, const String& to) const;
// ------------------------------------------------------------------------
// 运算符重载
// ------------------------------------------------------------------------
String operator+(const String& other) const;
String& operator+=(const String& other);
bool operator==(const String& other) const { return data_ == other.data_; }
bool operator!=(const String& other) const { return data_ != other.data_; }
bool operator<(const String& other) const { return data_ < other.data_; }
bool operator>(const String& other) const { return data_ > other.data_; }
char operator[](size_t index) const { return data_[index]; }
// ------------------------------------------------------------------------
// 迭代器支持
// ------------------------------------------------------------------------
auto begin() { return data_.begin(); }
auto end() { return data_.end(); }
auto begin() const { return data_.begin(); }
auto end() const { return data_.end(); }
auto cbegin() const { return data_.cbegin(); }
auto cend() const { return data_.cend(); }
// ------------------------------------------------------------------------
// 静态常量
// ------------------------------------------------------------------------
static constexpr size_t npos = static_cast<size_t>(-1);
// ------------------------------------------------------------------------
// 格式化字符串(类似 sprintf
// ------------------------------------------------------------------------
template<typename... Args>
static String format(const char* fmt, Args&&... args);
private:
std::string data_; // 内部使用 UTF-8 存储
// UTF-8 辅助函数
static size_t utf8Length(const std::string& str);
static std::string utf8Substring(const std::string& str, size_t start, size_t len);
static size_t utf8CharIndexToByteIndex(const std::string& str, size_t charIndex);
};
// ============================================================================
// 内联实现
// ============================================================================
inline String::String(const wchar_t* wide) {
if (wide) {
std::wstring wstr(wide);
*this = fromWide(wstr);
}
}
inline String::String(const std::wstring& wide) {
*this = fromWide(wide);
}
inline String::String(const char16_t* utf16) {
if (utf16) {
std::u16string u16str(utf16);
*this = fromUtf16(u16str);
}
}
inline String::String(const std::u16string& utf16) {
*this = fromUtf16(utf16);
}
inline String::String(const char32_t* utf32) {
if (utf32) {
std::u32string u32str(utf32);
*this = fromUtf32(u32str);
}
}
inline String::String(const std::u32string& utf32) {
*this = fromUtf32(utf32);
}
// 静态工厂方法
inline String String::fromUtf8(const char* utf8) {
return String(utf8 ? utf8 : "");
}
inline String String::fromUtf8(const std::string& utf8) {
return String(utf8);
}
// 编码转换实现
inline std::wstring String::toWide() const {
if (data_.empty()) return std::wstring();
if constexpr (sizeof(wchar_t) == 4) {
// wchar_t is 32-bit (Linux/Switch): same as UTF-32
std::u32string u32 = toUtf32();
return std::wstring(u32.begin(), u32.end());
} else {
// wchar_t is 16-bit (Windows): same as UTF-16
std::u16string u16 = toUtf16();
return std::wstring(u16.begin(), u16.end());
}
}
inline String String::fromWide(const std::wstring& wide) {
if (wide.empty()) return String();
if constexpr (sizeof(wchar_t) == 4) {
std::u32string u32(wide.begin(), wide.end());
return fromUtf32(u32);
} else {
std::u16string u16(wide.begin(), wide.end());
return fromUtf16(u16);
}
}
inline String String::fromWide(const wchar_t* wide) {
return wide ? fromWide(std::wstring(wide)) : String();
}
inline std::u16string String::toUtf16() const {
if (data_.empty()) return std::u16string();
// UTF-8 → UTF-32 → UTF-16 (with surrogate pairs)
std::u32string u32 = toUtf32();
std::u16string result;
result.reserve(u32.size());
for (char32_t ch : u32) {
if (ch <= 0xFFFF) {
result.push_back(static_cast<char16_t>(ch));
} else if (ch <= 0x10FFFF) {
// Surrogate pair
ch -= 0x10000;
result.push_back(static_cast<char16_t>(0xD800 | (ch >> 10)));
result.push_back(static_cast<char16_t>(0xDC00 | (ch & 0x3FF)));
}
}
return result;
}
inline String String::fromUtf16(const std::u16string& utf16) {
if (utf16.empty()) return String();
// UTF-16 → UTF-32 → UTF-8
std::u32string u32;
u32.reserve(utf16.size());
for (size_t i = 0; i < utf16.size(); ++i) {
char16_t cu = utf16[i];
char32_t ch;
if (cu >= 0xD800 && cu <= 0xDBFF && i + 1 < utf16.size()) {
// High surrogate
char16_t cl = utf16[i + 1];
if (cl >= 0xDC00 && cl <= 0xDFFF) {
ch = 0x10000 + ((static_cast<char32_t>(cu - 0xD800) << 10) |
(cl - 0xDC00));
++i;
} else {
ch = cu; // Invalid, pass through
}
} else {
ch = cu;
}
u32.push_back(ch);
}
return fromUtf32(u32);
}
inline String String::fromUtf16(const char16_t* utf16) {
return utf16 ? fromUtf16(std::u16string(utf16)) : String();
}
inline std::u32string String::toUtf32() const {
std::u32string result;
result.reserve(length());
const char* ptr = data_.c_str();
const char* end = ptr + data_.size();
while (ptr < end) {
char32_t ch = 0;
unsigned char byte = static_cast<unsigned char>(*ptr);
if ((byte & 0x80) == 0) {
// 1-byte sequence
ch = byte;
ptr += 1;
} else if ((byte & 0xE0) == 0xC0) {
// 2-byte sequence
ch = (byte & 0x1F) << 6;
ch |= (static_cast<unsigned char>(ptr[1]) & 0x3F);
ptr += 2;
} else if ((byte & 0xF0) == 0xE0) {
// 3-byte sequence
ch = (byte & 0x0F) << 12;
ch |= (static_cast<unsigned char>(ptr[1]) & 0x3F) << 6;
ch |= (static_cast<unsigned char>(ptr[2]) & 0x3F);
ptr += 3;
} else if ((byte & 0xF8) == 0xF0) {
// 4-byte sequence
ch = (byte & 0x07) << 18;
ch |= (static_cast<unsigned char>(ptr[1]) & 0x3F) << 12;
ch |= (static_cast<unsigned char>(ptr[2]) & 0x3F) << 6;
ch |= (static_cast<unsigned char>(ptr[3]) & 0x3F);
ptr += 4;
} else {
// Invalid UTF-8, skip
ptr += 1;
continue;
}
result.push_back(ch);
}
return result;
}
inline String String::fromUtf32(const std::u32string& utf32) {
std::string result;
for (char32_t ch : utf32) {
if (ch <= 0x7F) {
// 1-byte
result.push_back(static_cast<char>(ch));
} else if (ch <= 0x7FF) {
// 2-byte
result.push_back(static_cast<char>(0xC0 | ((ch >> 6) & 0x1F)));
result.push_back(static_cast<char>(0x80 | (ch & 0x3F)));
} else if (ch <= 0xFFFF) {
// 3-byte
result.push_back(static_cast<char>(0xE0 | ((ch >> 12) & 0x0F)));
result.push_back(static_cast<char>(0x80 | ((ch >> 6) & 0x3F)));
result.push_back(static_cast<char>(0x80 | (ch & 0x3F)));
} else if (ch <= 0x10FFFF) {
// 4-byte
result.push_back(static_cast<char>(0xF0 | ((ch >> 18) & 0x07)));
result.push_back(static_cast<char>(0x80 | ((ch >> 12) & 0x3F)));
result.push_back(static_cast<char>(0x80 | ((ch >> 6) & 0x3F)));
result.push_back(static_cast<char>(0x80 | (ch & 0x3F)));
}
}
return String(result);
}
inline String String::fromUtf32(const char32_t* utf32) {
return utf32 ? fromUtf32(std::u32string(utf32)) : String();
}
// UTF-8 辅助函数
inline size_t String::utf8Length(const std::string& str) {
size_t len = 0;
for (unsigned char c : str) {
if ((c & 0xC0) != 0x80) {
++len;
}
}
return len;
}
inline size_t String::length() const {
return utf8Length(data_);
}
inline size_t String::utf8CharIndexToByteIndex(const std::string& str, size_t charIndex) {
size_t charCount = 0;
size_t byteIndex = 0;
while (byteIndex < str.size() && charCount < charIndex) {
unsigned char c = static_cast<unsigned char>(str[byteIndex]);
if ((c & 0xC0) != 0x80) {
++charCount;
}
++byteIndex;
}
return byteIndex;
}
inline std::string String::utf8Substring(const std::string& str, size_t start, size_t len) {
size_t startByte = utf8CharIndexToByteIndex(str, start);
size_t endByte = (len == npos) ? str.size() : utf8CharIndexToByteIndex(str, start + len);
return str.substr(startByte, endByte - startByte);
}
inline String String::substring(size_t start, size_t len) const {
return String(utf8Substring(data_, start, len));
}
// 字符串操作
inline String& String::append(const String& other) {
data_.append(other.data_);
return *this;
}
inline String& String::append(const char* utf8) {
if (utf8) data_.append(utf8);
return *this;
}
inline size_t String::find(const String& substr, size_t start) const {
if (substr.empty() || start >= length()) return npos;
size_t startByte = utf8CharIndexToByteIndex(data_, start);
size_t pos = data_.find(substr.data_, startByte);
if (pos == std::string::npos) return npos;
// 转换字节索引到字符索引
return utf8Length(data_.substr(0, pos));
}
inline bool String::startsWith(const String& prefix) const {
if (prefix.data_.size() > data_.size()) return false;
return data_.compare(0, prefix.data_.size(), prefix.data_) == 0;
}
inline bool String::endsWith(const String& suffix) const {
if (suffix.data_.size() > data_.size()) return false;
return data_.compare(data_.size() - suffix.data_.size(), suffix.data_.size(), suffix.data_) == 0;
}
inline String String::trim() const {
size_t start = 0;
while (start < data_.size() && std::isspace(static_cast<unsigned char>(data_[start]))) {
++start;
}
size_t end = data_.size();
while (end > start && std::isspace(static_cast<unsigned char>(data_[end - 1]))) {
--end;
}
return String(data_.substr(start, end - start));
}
inline std::vector<String> String::split(const String& delimiter) const {
std::vector<String> result;
if (delimiter.empty()) {
result.push_back(*this);
return result;
}
size_t start = 0;
size_t end = data_.find(delimiter.data_, start);
while (end != std::string::npos) {
result.push_back(String(data_.substr(start, end - start)));
start = end + delimiter.data_.size();
end = data_.find(delimiter.data_, start);
}
result.push_back(String(data_.substr(start)));
return result;
}
inline String String::replaceAll(const String& from, const String& to) const {
if (from.empty()) return *this;
std::string result = data_;
size_t pos = 0;
while ((pos = result.find(from.data_, pos)) != std::string::npos) {
result.replace(pos, from.data_.size(), to.data_);
pos += to.data_.size();
}
return String(result);
}
// 运算符重载
inline String String::operator+(const String& other) const {
String result(*this);
result.append(other);
return result;
}
inline String& String::operator+=(const String& other) {
return append(other);
}
// 格式化字符串
#include <cstdio>
template<typename... Args>
String String::format(const char* fmt, Args&&... args) {
int size = std::snprintf(nullptr, 0, fmt, std::forward<Args>(args)...);
if (size <= 0) return String();
std::string result(size, '\0');
std::snprintf(&result[0], size + 1, fmt, std::forward<Args>(args)...);
return String(result);
}
// 全局运算符
inline String operator+(const char* lhs, const String& rhs) {
return String(lhs) + rhs;
}
} // namespace easy2d