diff --git a/Extra2D/include/extra2d/app/application.h b/Extra2D/include/extra2d/app/application.h index 088ca22..52774e5 100644 --- a/Extra2D/include/extra2d/app/application.h +++ b/Extra2D/include/extra2d/app/application.h @@ -1,80 +1,113 @@ #pragma once -#include +#include +#include +#include #include -#include -#include +#include namespace extra2d { -class Renderer; -class Input; -class EventQueue; -class EventDispatcher; - -struct AppConfig { - std::string title = "Extra2D Application"; - int width = 1280; - int height = 720; - bool fullscreen = true; - bool vsync = true; - int fpsLimit = 60; -}; +class GLFWWindow; +class WindowModule; +/** + * @brief 应用程序类 + */ class Application { public: - static Application &instance(); + static Application &get(); Application(const Application &) = delete; Application &operator=(const Application &) = delete; - bool init(const AppConfig &config); + /** + * @brief 应用信息 + */ + std::string appName = "Extra2D App"; + std::string appVersion = "1.0.0"; + std::string organization = ""; + + /** + * @brief 注册模块 + * @tparam T 模块类型 + * @tparam Args 构造函数参数 + * @return 模块指针 + */ + template T *use(Args &&...args) { + return Registry::instance().use(std::forward(args)...); + } + + /** + * @brief 获取模块 + * @tparam T 模块类型 + * @return 模块指针 + */ + template T *get() const { return Registry::instance().get(); } + + /** + * @brief 初始化 + * @return 初始化成功返回 true + */ + bool init(); + + /** + * @brief 关闭 + */ void shutdown(); + + /** + * @brief 运行主循环 + */ void run(); + + /** + * @brief 请求退出 + */ void quit(); + /** + * @brief 暂停 + */ void pause(); + + /** + * @brief 恢复 + */ void resume(); - bool isPaused() const { return paused_; } - bool isRunning() const { return running_; } - Window &window() { return *window_; } - Renderer &renderer() { return *renderer_; } - Input &input(); - EventQueue &eventQueue(); - EventDispatcher &eventDispatcher(); + bool paused() const { return paused_; } + bool running() const { return running_; } - float deltaTime() const { return deltaTime_; } - float totalTime() const { return totalTime_; } - int fps() const { return currentFps_; } + /** + * @brief 获取窗口 + * @return 窗口指针 + */ + GLFWWindow *window(); - const AppConfig &getConfig() const { return config_; } + f32 dt() const { return dt_; } + f32 totalTime() const { return totalTime_; } + int fps() const { return fps_; } private: - Application() = default; + Application(); ~Application(); void mainLoop(); void update(); void render(); - AppConfig config_; - - UniquePtr window_; - UniquePtr renderer_; - UniquePtr eventQueue_; - UniquePtr eventDispatcher_; - bool initialized_ = false; bool running_ = false; bool paused_ = false; + bool shouldQuit_ = false; - float deltaTime_ = 0.0f; - float totalTime_ = 0.0f; - double lastFrameTime_ = 0.0; + f32 dt_ = 0.0f; + f32 totalTime_ = 0.0f; + f64 lastFrameTime_ = 0.0; int frameCount_ = 0; - float fpsTimer_ = 0.0f; - int currentFps_ = 0; + f32 fpsTimer_ = 0.0f; + int fps_ = 0; }; } // namespace extra2d diff --git a/Extra2D/include/extra2d/asset/asset.h b/Extra2D/include/extra2d/asset/asset.h new file mode 100644 index 0000000..b6e929e --- /dev/null +++ b/Extra2D/include/extra2d/asset/asset.h @@ -0,0 +1,477 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// Asset - 资源基类 +// --------------------------------------------------------------------------- + +/** + * @brief 资源基类 + * + * 所有资源类型的基类,继承自 enable_shared_from_this 支持自动引用计数。 + * 提供资源的基本属性和生命周期管理接口。 + */ +class Asset : public std::enable_shared_from_this { +public: + virtual ~Asset() = default; + + /** + * @brief 获取资源类型 + * @return 资源类型枚举值 + */ + virtual AssetType type() const = 0; + + /** + * @brief 检查资源是否已加载 + * @return 已加载返回 true + */ + virtual bool loaded() const = 0; + + /** + * @brief 获取资源内存占用大小 + * @return 内存占用字节数 + */ + virtual size_t memSize() const = 0; + + /** + * @brief 获取资源ID + * @return 资源ID + */ + const AssetID &id() const { return id_; } + + /** + * @brief 获取资源路径 + * @return 资源路径 + */ + const std::string &path() const { return path_; } + + /** + * @brief 获取资源状态 + * @return 资源状态 + */ + AssetState state() const { return state_; } + + /** + * @brief 获取当前引用计数 + * @return 引用计数(用于调试和监控) + */ + long refs() const { return shared_from_this().use_count(); } + +protected: + AssetID id_; + std::string path_; + std::atomic state_{AssetState::Unloaded}; + + /** + * @brief 设置资源状态 + * @param state 新状态 + */ + void setState(AssetState state) { + state_.store(state, std::memory_order_release); + } + + /** + * @brief 设置资源ID + * @param id 资源ID + */ + void setId(const AssetID &id) { id_ = id; } + + /** + * @brief 设置资源路径 + * @param path 资源路径 + */ + void setPath(const std::string &path) { path_ = path; } + + friend class AssetCache; + friend class AssetService; +}; + +// --------------------------------------------------------------------------- +// TextureAsset - 纹理资源 +// --------------------------------------------------------------------------- + +/** + * @brief 纹理资源类 + * + * 存储纹理图像数据,支持多种像素格式。 + */ +class TextureAsset : public Asset { +public: + AssetType type() const override { return AssetType::Texture; } + + bool loaded() const override { + return state_.load(std::memory_order_acquire) == AssetState::Loaded && + data_ != nullptr; + } + + size_t memSize() const override { + return static_cast(width_) * height_ * channels_; + } + + /** + * @brief 获取纹理宽度 + * @return 宽度(像素) + */ + int width() const { return width_; } + + /** + * @brief 获取纹理高度 + * @return 高度(像素) + */ + int height() const { return height_; } + + /** + * @brief 获取通道数 + * @return 通道数(1-4) + */ + int channels() const { return channels_; } + + /** + * @brief 获取像素数据 + * @return 像素数据指针 + */ + const u8 *data() const { return data_.get(); } + + /** + * @brief 获取像素数据大小 + * @return 数据大小(字节) + */ + size_t dataSize() const { return memSize(); } + + /** + * @brief 设置纹理数据 + * @param width 宽度 + * @param height 高度 + * @param channels 通道数 + * @param data 像素数据(转移所有权) + */ + void setData(int width, int height, int channels, Unique data) { + width_ = width; + height_ = height; + channels_ = channels; + data_ = std::move(data); + setState(AssetState::Loaded); + } + + /** + * @brief 释放纹理数据 + */ + void release() { + data_.reset(); + width_ = 0; + height_ = 0; + channels_ = 0; + setState(AssetState::Unloaded); + } + +private: + int width_ = 0; + int height_ = 0; + int channels_ = 0; + Unique data_; +}; + +// --------------------------------------------------------------------------- +// FontAsset - 字体资源 +// --------------------------------------------------------------------------- + +/** + * @brief 字体资源类 + * + * 存储TrueType字体数据,支持字形渲染。 + * 使用 Pimpl 模式隐藏 stbtt_fontinfo 实现细节。 + */ +class FontAsset : public Asset { +public: + FontAsset(); + ~FontAsset() override; + + AssetType type() const override { return AssetType::Font; } + + bool loaded() const override; + + size_t memSize() const override { return data_.size(); } + + /** + * @brief 获取指定像素高度的缩放因子 + * @param pixels 像素高度 + * @return 缩放因子 + */ + float scaleForPixelHeight(float pixels) const; + + /** + * @brief 获取字体数据 + * @return 字体数据指针 + */ + const u8 *data() const { return data_.data(); } + + /** + * @brief 获取字体数据大小 + * @return 数据大小(字节) + */ + size_t dataSize() const { return data_.size(); } + + /** + * @brief 设置字体数据 + * @param data 字体数据 + * @return 成功返回 true + */ + bool setData(std::vector data); + + /** + * @brief 释放字体数据 + */ + void release(); + +private: + std::vector data_; + class Impl; + Unique impl_; +}; + +// --------------------------------------------------------------------------- +// ShaderAsset - 着色器资源 +// --------------------------------------------------------------------------- + +/** + * @brief 着色器资源类 + * + * 存储顶点和片段着色器源代码。 + */ +class ShaderAsset : public Asset { +public: + AssetType type() const override { return AssetType::Shader; } + + bool loaded() const override { + return state_.load(std::memory_order_acquire) == AssetState::Loaded; + } + + size_t memSize() const override { + return vertexSrc_.size() + fragmentSrc_.size(); + } + + /** + * @brief 获取顶点着色器源码 + * @return 顶点着色器源码 + */ + const std::string &vertexSource() const { return vertexSrc_; } + + /** + * @brief 获取片段着色器源码 + * @return 片段着色器源码 + */ + const std::string &fragmentSource() const { return fragmentSrc_; } + + /** + * @brief 设置着色器源码 + * @param vertex 顶点着色器源码 + * @param fragment 片段着色器源码 + */ + void setSource(std::string vertex, std::string fragment) { + vertexSrc_ = std::move(vertex); + fragmentSrc_ = std::move(fragment); + setState(AssetState::Loaded); + } + + /** + * @brief 释放着色器源码 + */ + void release() { + vertexSrc_.clear(); + fragmentSrc_.clear(); + setState(AssetState::Unloaded); + } + +private: + std::string vertexSrc_; + std::string fragmentSrc_; +}; + +// --------------------------------------------------------------------------- +// AudioAsset - 音频资源 +// --------------------------------------------------------------------------- + +/** + * @brief 音频格式枚举 + */ +enum class AudioFormat : u8 { PCM = 0, MP3 = 1, OGG = 2, WAV = 3 }; + +/** + * @brief 音频资源类 + * + * 存储音频数据,支持多种格式。 + */ +class AudioAsset : public Asset { +public: + AssetType type() const override { return AssetType::Audio; } + + bool loaded() const override { + return state_.load(std::memory_order_acquire) == AssetState::Loaded && + !data_.empty(); + } + + size_t memSize() const override { return data_.size(); } + + /** + * @brief 获取音频格式 + * @return 音频格式 + */ + AudioFormat format() const { return format_; } + + /** + * @brief 获取声道数 + * @return 声道数 + */ + int channels() const { return channels_; } + + /** + * @brief 获取采样率 + * @return 采样率 + */ + int sampleRate() const { return sampleRate_; } + + /** + * @brief 获取每样本位数 + * @return 每样本位数 + */ + int bitsPerSample() const { return bitsPerSample_; } + + /** + * @brief 获取时长(秒) + * @return 时长 + */ + float duration() const { return duration_; } + + /** + * @brief 获取音频数据 + * @return 音频数据指针 + */ + const u8 *data() const { return data_.data(); } + + /** + * @brief 获取音频数据大小 + * @return 数据大小(字节) + */ + size_t dataSize() const { return data_.size(); } + + /** + * @brief 是否为流式音频 + * @return 流式音频返回 true + */ + bool streaming() const { return streaming_; } + + /** + * @brief 设置音频数据 + * @param format 音频格式 + * @param channels 声道数 + * @param sampleRate 采样率 + * @param bitsPerSample 每样本位数 + * @param data 音频数据 + */ + void setData(AudioFormat format, int channels, int sampleRate, + int bitsPerSample, std::vector data) { + format_ = format; + channels_ = channels; + sampleRate_ = sampleRate; + bitsPerSample_ = bitsPerSample; + data_ = std::move(data); + + if (sampleRate > 0 && channels > 0 && bitsPerSample > 0) { + size_t bytesPerSecond = + static_cast(sampleRate) * channels * (bitsPerSample / 8); + if (bytesPerSecond > 0) { + duration_ = static_cast(data_.size()) / + static_cast(bytesPerSecond); + } + } + + streaming_ = duration_ > 5.0f; + setState(AssetState::Loaded); + } + + /** + * @brief 释放音频数据 + */ + void release() { + data_.clear(); + format_ = AudioFormat::PCM; + channels_ = 0; + sampleRate_ = 0; + bitsPerSample_ = 0; + duration_ = 0.0f; + streaming_ = false; + setState(AssetState::Unloaded); + } + +private: + AudioFormat format_ = AudioFormat::PCM; + int channels_ = 0; + int sampleRate_ = 0; + int bitsPerSample_ = 0; + float duration_ = 0.0f; + std::vector data_; + bool streaming_ = false; +}; + +// --------------------------------------------------------------------------- +// DataAsset - 通用数据资源 +// --------------------------------------------------------------------------- + +/** + * @brief 通用数据资源类 + * + * 存储任意二进制数据。 + */ +class DataAsset : public Asset { +public: + AssetType type() const override { return AssetType::Data; } + + bool loaded() const override { + return state_.load(std::memory_order_acquire) == AssetState::Loaded; + } + + size_t memSize() const override { return data_.size(); } + + /** + * @brief 获取数据 + * @return 数据指针 + */ + const u8 *data() const { return data_.data(); } + + /** + * @brief 获取数据大小 + * @return 数据大小(字节) + */ + size_t size() const { return data_.size(); } + + /** + * @brief 设置数据 + * @param data 数据 + */ + void setData(std::vector data) { + data_ = std::move(data); + setState(AssetState::Loaded); + } + + /** + * @brief 释放数据 + */ + void release() { + data_.clear(); + setState(AssetState::Unloaded); + } + +private: + std::vector data_; +}; + +} // namespace extra2d diff --git a/Extra2D/include/extra2d/asset/asset_cache.h b/Extra2D/include/extra2d/asset/asset_cache.h new file mode 100644 index 0000000..4fb5d14 --- /dev/null +++ b/Extra2D/include/extra2d/asset/asset_cache.h @@ -0,0 +1,263 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// CacheEntry - 缓存条目 +// --------------------------------------------------------------------------- + +/** + * @brief 缓存条目结构 + * + * 存储资源引用和访问信息,用于LRU缓存管理。 + */ +struct CacheEntry { + Ref asset; + std::chrono::steady_clock::time_point lastAccess; + size_t accessCount = 0; + + /** + * @brief 构造缓存条目 + * @param a 资源引用 + */ + explicit CacheEntry(Ref a) + : asset(std::move(a)), + lastAccess(std::chrono::steady_clock::now()), + accessCount(1) {} + + /** + * @brief 更新访问信息 + */ + void touch() { + lastAccess = std::chrono::steady_clock::now(); + ++accessCount; + } +}; + +// --------------------------------------------------------------------------- +// AssetCache - 资源缓存 +// --------------------------------------------------------------------------- + +/** + * @brief 资源缓存类 + * + * 实现享元模式,提供资源共享和缓存管理功能。 + * + * 特性: + * - LRU缓存淘汰策略 + * - 线程安全(读写锁) + * - 引用计数自动回收 + * - 缓存统计和监控 + * - 内存上限管理 + */ +class AssetCache { +public: + /** + * @brief 构造函数 + * @param limit 缓存内存上限(字节),0表示无限制 + */ + explicit AssetCache(size_t limit = 0); + + ~AssetCache() = default; + + AssetCache(const AssetCache&) = delete; + AssetCache& operator=(const AssetCache&) = delete; + + // ------------------------------------------------------------------------- + // 资源管理 + // ------------------------------------------------------------------------- + + /** + * @brief 添加资源到缓存 + * @tparam T 资源类型 + * @param asset 资源引用 + * @return 资源句柄 + */ + template + AssetHandle add(Ref asset) { + static_assert(std::is_base_of_v, + "T must derive from Asset"); + + if (!asset) { + return AssetHandle(); + } + + std::unique_lock lock(mutex_); + + AssetID id = asset->id(); + + size_t memSize = asset->memSize(); + bytes_ += memSize; + + auto it = lruList_.insert(lruList_.end(), id); + + entries_[id] = CacheEntryData{ + ptr::makeUnique(std::static_pointer_cast(asset)), + it + }; + + ++stats_.count; + + if (limit_ > 0 && bytes_ > limit_) { + evict(); + } + + return AssetHandle(id, Weak(asset)); + } + + /** + * @brief 从缓存获取资源 + * @tparam T 资源类型 + * @param id 资源ID + * @return 资源句柄 + */ + template + AssetHandle get(const AssetID& id) { + static_assert(std::is_base_of_v, + "T must derive from Asset"); + + std::unique_lock lock(mutex_); + + auto it = entries_.find(id); + if (it == entries_.end()) { + ++stats_.misses; + return AssetHandle(); + } + + it->second.entry->touch(); + + lruList_.erase(it->second.lruIterator); + it->second.lruIterator = lruList_.insert(lruList_.end(), id); + + ++stats_.hits; + + auto typedAsset = std::static_pointer_cast(it->second.entry->asset); + return AssetHandle(id, Weak(typedAsset)); + } + + /** + * @brief 检查缓存是否包含资源 + * @param id 资源ID + * @return 包含返回 true + */ + bool has(const AssetID& id) const; + + /** + * @brief 从缓存移除资源 + * @param id 资源ID + * @return 移除成功返回 true + */ + bool remove(const AssetID& id); + + // ------------------------------------------------------------------------- + // 缓存管理 + // ------------------------------------------------------------------------- + + /** + * @brief 设置缓存内存上限 + * @param limit 上限(字节),0表示无限制 + */ + void setLimit(size_t limit); + + /** + * @brief 获取缓存内存上限 + * @return 上限(字节) + */ + size_t limit() const { return limit_; } + + /** + * @brief 获取当前缓存内存使用量 + * @return 使用量(字节) + */ + size_t bytes() const { return bytes_; } + + /** + * @brief 获取缓存条目数量 + * @return 条目数量 + */ + size_t count() const; + + /** + * @brief 获取当前缓存内存使用量(别名) + * @return 使用量(字节) + */ + size_t size() const { return bytes_; } + + /** + * @brief 记录缓存命中 + */ + void hit() { ++stats_.hits; } + + /** + * @brief 记录缓存未命中 + */ + void miss() { ++stats_.misses; } + + /** + * @brief 清理无外部引用的资源 + * @return 清理的资源数量 + */ + size_t purge(); + + /** + * @brief 清空所有缓存 + */ + void clear(); + + /** + * @brief 获取缓存统计信息 + * @return 统计信息 + */ + CacheStats stats() const; + + /** + * @brief 重置统计信息 + */ + void resetStats(); + +private: + /** + * @brief 缓存条目数据(包含LRU迭代器) + */ + struct CacheEntryData { + Ref entry; + std::list::iterator lruIterator; + }; + + /** + * @brief 淘汰资源(LRU策略) + */ + void evict(); + + /** + * @brief 检查资源是否可以被淘汰 + * @param entry 缓存条目 + * @return 可淘汰返回 true + */ + bool canEvict(const CacheEntry& entry) const; + + mutable std::shared_mutex mutex_; + std::unordered_map entries_; + std::list lruList_; + + size_t limit_ = 0; + size_t bytes_ = 0; + mutable CacheStats stats_; +}; + +} diff --git a/Extra2D/include/extra2d/asset/asset_handle.h b/Extra2D/include/extra2d/asset/asset_handle.h new file mode 100644 index 0000000..7bdce63 --- /dev/null +++ b/Extra2D/include/extra2d/asset/asset_handle.h @@ -0,0 +1,304 @@ +#pragma once + +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// AssetHandleBase - 资源句柄非模板基类 +// --------------------------------------------------------------------------- + +/** + * @brief 资源句柄非模板基类 + * + * 用于类型擦除,允许在容器中存储不同类型的句柄。 + */ +class AssetHandleBase { +public: + AssetHandleBase() = default; + + /** + * @brief 从资源ID和弱引用构造 + * @param id 资源ID + * @param ref 资源弱引用 + */ + AssetHandleBase(const AssetID& id, Weak ref) + : id_(id), cacheRef_(std::move(ref)) {} + + /** + * @brief 从强引用构造 + * @param ptr 资源强引用 + */ + explicit AssetHandleBase(Ref ptr) + : id_(ptr ? ptr->id() : AssetID()), + cacheRef_(ptr) {} + + /** + * @brief 检查句柄是否有效 + * @return 有效返回 true + */ + bool valid() const { + return !cacheRef_.expired(); + } + + /** + * @brief 获取资源强引用 + * @return 资源强引用 + */ + Ref get() const { + return cacheRef_.lock(); + } + + /** + * @brief 获取资源ID + * @return 资源ID + */ + const AssetID& id() const { return id_; } + + /** + * @brief 获取资源路径 + * @return 资源路径 + */ + const std::string& path() const { return id_.path; } + + /** + * @brief 隐式转换为bool + */ + explicit operator bool() const { return valid(); } + + /** + * @brief 重置句柄 + */ + void reset() { + id_ = AssetID(); + cacheRef_.reset(); + } + +protected: + AssetID id_; + Weak cacheRef_; +}; + +// --------------------------------------------------------------------------- +// AssetHandle - 资源句柄 +// --------------------------------------------------------------------------- + +/** + * @brief 资源句柄模板类 + * + * 使用强类型句柄替代裸指针,提供类型安全和自动生命周期管理。 + * 内部使用 weak_ptr 弱引用,不阻止资源回收。 + * + * 特性: + * - 类型安全:编译期检查资源类型 + * - 自动生命周期:资源无引用时自动回收 + * - 弱引用:不阻止缓存清理 + * - 空安全:使用前检查 valid() + * + * @tparam T 资源类型,必须继承自 Asset + */ +template +class AssetHandle : public AssetHandleBase { + static_assert(std::is_base_of_v, + "T must derive from Asset"); + +public: + /** + * @brief 默认构造函数 + */ + AssetHandle() = default; + + /** + * @brief 从基类句柄构造 + * @param base 基类句柄 + */ + explicit AssetHandle(const AssetHandleBase& base) + : AssetHandleBase(base) {} + + /** + * @brief 从资源ID和弱引用构造 + * @param id 资源ID + * @param ref 资源弱引用 + */ + AssetHandle(const AssetID& id, Weak ref) + : AssetHandleBase(id, std::move(ref)) {} + + /** + * @brief 从强引用构造 + * @param ptr 资源强引用 + */ + explicit AssetHandle(Ref ptr) + : AssetHandleBase(ptr) {} + + /** + * @brief 获取资源强引用 + * @return 资源强引用,如果资源已被回收返回 nullptr + */ + Ref get() const { + return std::static_pointer_cast(cacheRef_.lock()); + } + + /** + * @brief 解引用操作符 + * @return 资源指针 + * @note 使用前应检查 valid() + */ + T* operator->() const { + auto ptr = get(); + return ptr ? ptr.get() : nullptr; + } + + /** + * @brief 解引用操作符 + * @return 资源引用 + * @note 使用前应检查 valid() + */ + T& operator*() const { + auto ptr = get(); + return *ptr; + } + + /** + * @brief 相等比较 + * @param other 其他句柄 + * @return 相等返回 true + */ + bool operator==(const AssetHandle& other) const { + return id_ == other.id_; + } + + /** + * @brief 不等比较 + * @param other 其他句柄 + * @return 不等返回 true + */ + bool operator!=(const AssetHandle& other) const { + return id_ != other.id_; + } + + /** + * @brief 小于比较(用于有序容器) + * @param other 其他句柄 + * @return 小于返回 true + */ + bool operator<(const AssetHandle& other) const { + return id_ < other.id_; + } + + /** + * @brief 重置句柄 + */ + void reset() { + id_ = AssetID(); + cacheRef_.reset(); + } + + /** + * @brief 检查资源是否已加载 + * @return 已加载返回 true + */ + bool loaded() const { + auto ptr = get(); + return ptr && ptr->loaded(); + } + + /** + * @brief 获取资源状态 + * @return 资源状态 + */ + AssetState state() const { + auto ptr = get(); + return ptr ? ptr->state() : AssetState::Unloaded; + } +}; + +// --------------------------------------------------------------------------- +// AssetLoadResult - 资源加载结果 +// --------------------------------------------------------------------------- + +/** + * @brief 资源加载结果 + * + * 封装资源加载的结果状态,支持成功、失败、加载中等状态。 + * + * @tparam T 资源类型 + */ +template +struct AssetLoadResult { + AssetHandle handle; + AssetState state = AssetState::Unloaded; + std::string error; + + /** + * @brief 检查是否成功 + * @return 成功返回 true + */ + bool success() const { + return state == AssetState::Loaded && handle.valid(); + } + + /** + * @brief 检查是否失败 + * @return 失败返回 true + */ + bool failed() const { + return state == AssetState::Failed; + } + + /** + * @brief 检查是否正在加载 + * @return 正在加载返回 true + */ + bool loading() const { + return state == AssetState::Loading; + } + + /** + * @brief 创建成功结果 + * @param handle 资源句柄 + * @return 加载结果 + */ + static AssetLoadResult ok(AssetHandle handle) { + return { std::move(handle), AssetState::Loaded, "" }; + } + + /** + * @brief 创建失败结果 + * @param error 错误信息 + * @return 加载结果 + */ + static AssetLoadResult err(const std::string& error) { + return { AssetHandle(), AssetState::Failed, error }; + } + + /** + * @brief 创建加载中结果 + * @param handle 资源句柄(可能为空) + * @return 加载结果 + */ + static AssetLoadResult pending(AssetHandle handle = {}) { + return { std::move(handle), AssetState::Loading, "" }; + } +}; + +// --------------------------------------------------------------------------- +// AssetLoadCallback - 资源加载回调 +// --------------------------------------------------------------------------- + +/** + * @brief 资源加载回调类型 + * @tparam T 资源类型 + */ +template +using AssetLoadCallback = Fn)>; + +/** + * @brief 资源加载结果回调类型 + * @tparam T 资源类型 + */ +template +using AssetLoadResultCallback = Fn)>; + +} diff --git a/Extra2D/include/extra2d/asset/asset_loader.h b/Extra2D/include/extra2d/asset/asset_loader.h new file mode 100644 index 0000000..158a5ee --- /dev/null +++ b/Extra2D/include/extra2d/asset/asset_loader.h @@ -0,0 +1,312 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// AssetLoaderBase - 资源加载器非模板基类 +// --------------------------------------------------------------------------- + +/** + * @brief 资源加载器非模板基类 + * + * 用于类型擦除,允许在容器中存储不同类型的加载器。 + */ +class AssetLoaderBase { +public: + virtual ~AssetLoaderBase() = default; + + /** + * @brief 从文件加载资源(返回 Asset 基类指针) + * @param path 文件路径 + * @return 资源实例 + */ + virtual Ref loadBase(const std::string& path) = 0; + + /** + * @brief 从内存数据加载资源 + * @param data 数据指针 + * @param size 数据大小 + * @return 资源实例 + */ + virtual Ref loadFromMemoryBase(const u8* data, size_t size) = 0; + + /** + * @brief 检查是否能加载指定路径的资源 + * @param path 文件路径 + * @return 能加载返回 true + */ + virtual bool canLoad(const std::string& path) const = 0; + + /** + * @brief 获取资源类型 + * @return 资源类型枚举值 + */ + virtual AssetType type() const = 0; + + /** + * @brief 获取支持的文件扩展名列表 + * @return 扩展名列表 + */ + virtual std::vector extensions() const = 0; +}; + +// --------------------------------------------------------------------------- +// AssetLoader - 资源加载器接口 +// --------------------------------------------------------------------------- + +/** + * @brief 资源加载器接口模板 + * + * 使用策略模式支持不同资源类型的加载。 + * 每种资源类型可以实现自己的加载器。 + * + * @tparam T 资源类型 + */ +template +class AssetLoader : public AssetLoaderBase { + static_assert(std::is_base_of_v, + "T must derive from Asset"); + +public: + virtual ~AssetLoader() = default; + + /** + * @brief 从文件加载资源 + * @param path 文件路径 + * @return 资源实例,失败返回 nullptr + */ + virtual Ref load(const std::string& path) = 0; + + /** + * @brief 从内存数据加载资源 + * @param data 数据指针 + * @param size 数据大小 + * @return 资源实例,失败返回 nullptr + */ + virtual Ref loadFromMemory(const u8* data, size_t size) = 0; + + Ref loadBase(const std::string& path) override { + return load(path); + } + + Ref loadFromMemoryBase(const u8* data, size_t size) override { + return loadFromMemory(data, size); + } +}; + +// --------------------------------------------------------------------------- +// TextureLoader - 纹理加载器 +// --------------------------------------------------------------------------- + +/** + * @brief 纹理加载器 + * + * 使用 stb_image 加载各种格式的图片文件。 + * 支持 PNG, JPG, BMP, TGA, GIF, PSD, HDR, PIC 等格式。 + */ +class TextureLoader : public AssetLoader { +public: + TextureLoader(); + ~TextureLoader() override; + + Ref load(const std::string& path) override; + Ref loadFromMemory(const u8* data, size_t size) override; + bool canLoad(const std::string& path) const override; + AssetType type() const override { return AssetType::Texture; } + std::vector extensions() const override; + + /** + * @brief 设置期望的通道数 + * @param channels 通道数(1-4),0表示自动 + */ + void setDesiredChannels(int channels); + + /** + * @brief 获取期望的通道数 + * @return 通道数 + */ + int desiredChannels() const { return desiredChannels_; } + +private: + int desiredChannels_ = 4; +}; + +// --------------------------------------------------------------------------- +// FontLoader - 字体加载器 +// --------------------------------------------------------------------------- + +/** + * @brief 字体加载器 + * + * 加载 TrueType 字体文件(.ttf, .otf)。 + */ +class FontLoader : public AssetLoader { +public: + Ref load(const std::string& path) override; + Ref loadFromMemory(const u8* data, size_t size) override; + bool canLoad(const std::string& path) const override; + AssetType type() const override { return AssetType::Font; } + std::vector extensions() const override; +}; + +// --------------------------------------------------------------------------- +// ShaderLoader - 着色器加载器 +// --------------------------------------------------------------------------- + +/** + * @brief 着色器加载器 + * + * 加载着色器源文件,支持以下格式: + * - .vert/.frag: 分离的顶点/片段着色器 + * - .glsl: 合并的着色器文件(使用标记分隔) + */ +class ShaderLoader : public AssetLoader { +public: + Ref load(const std::string& path) override; + Ref loadFromMemory(const u8* data, size_t size) override; + bool canLoad(const std::string& path) const override; + AssetType type() const override { return AssetType::Shader; } + std::vector extensions() const override; + + /** + * @brief 设置顶点着色器标记 + * @param marker 标记字符串(默认 "[VERTEX]") + */ + void setVertexMarker(const std::string& marker) { vertexMarker_ = marker; } + + /** + * @brief 设置片段着色器标记 + * @param marker 标记字符串(默认 "[FRAGMENT]") + */ + void setFragmentMarker(const std::string& marker) { fragmentMarker_ = marker; } + +private: + std::string vertexMarker_ = "[VERTEX]"; + std::string fragmentMarker_ = "[FRAGMENT]"; + + /** + * @brief 解析合并的着色器文件 + * @param content 文件内容 + * @param vertex 输出顶点着色器源码 + * @param fragment 输出片段着色器源码 + * @return 成功返回 true + */ + bool parseCombined(const std::string& content, + std::string& vertex, + std::string& fragment); +}; + +// --------------------------------------------------------------------------- +// AudioLoader - 音频加载器 +// --------------------------------------------------------------------------- + +/** + * @brief 音频加载器 + * + * 加载音频文件,支持 WAV 格式。 + * 可扩展支持 MP3, OGG 等格式。 + */ +class AudioLoader : public AssetLoader { +public: + Ref load(const std::string& path) override; + Ref loadFromMemory(const u8* data, size_t size) override; + bool canLoad(const std::string& path) const override; + AssetType type() const override { return AssetType::Audio; } + std::vector extensions() const override; + +private: + /** + * @brief 加载 WAV 格式音频 + * @param data 数据指针 + * @param size 数据大小 + * @return 音频资源 + */ + Ref loadWav(const u8* data, size_t size); +}; + +// --------------------------------------------------------------------------- +// DataLoader - 通用数据加载器 +// --------------------------------------------------------------------------- + +/** + * @brief 通用数据加载器 + * + * 加载任意二进制数据文件。 + */ +class DataLoader : public AssetLoader { +public: + Ref load(const std::string& path) override; + Ref loadFromMemory(const u8* data, size_t size) override; + bool canLoad(const std::string& path) const override; + AssetType type() const override { return AssetType::Data; } + std::vector extensions() const override; +}; + +// --------------------------------------------------------------------------- +// AssetLoaderFactory - 加载器工厂 +// --------------------------------------------------------------------------- + +/** + * @brief 加载器工厂 + * + * 根据资源类型或文件扩展名创建对应的加载器。 + * 使用模板方法返回具体类型的加载器。 + */ +class AssetLoaderFactory { +public: + /** + * @brief 根据资源类型创建纹理加载器 + * @return 纹理加载器实例 + */ + static Unique createTextureLoader() { + return ptr::makeUnique(); + } + + /** + * @brief 根据资源类型创建字体加载器 + * @return 字体加载器实例 + */ + static Unique createFontLoader() { + return ptr::makeUnique(); + } + + /** + * @brief 根据资源类型创建着色器加载器 + * @return 着色器加载器实例 + */ + static Unique createShaderLoader() { + return ptr::makeUnique(); + } + + /** + * @brief 根据资源类型创建音频加载器 + * @return 音频加载器实例 + */ + static Unique createAudioLoader() { + return ptr::makeUnique(); + } + + /** + * @brief 根据资源类型创建数据加载器 + * @return 数据加载器实例 + */ + static Unique createDataLoader() { + return ptr::makeUnique(); + } + + /** + * @brief 根据文件扩展名获取资源类型 + * @param extension 文件扩展名(包含点,如 ".png") + * @return 资源类型,无法识别返回 AssetType::Unknown + */ + static AssetType getTypeByExtension(const std::string& extension); +}; + +} diff --git a/Extra2D/include/extra2d/asset/asset_pack.h b/Extra2D/include/extra2d/asset/asset_pack.h new file mode 100644 index 0000000..a2801bb --- /dev/null +++ b/Extra2D/include/extra2d/asset/asset_pack.h @@ -0,0 +1,399 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// AssetPack - 资源包 +// --------------------------------------------------------------------------- + +/** + * @brief 资源包类 + * + * 支持从打包文件中加载资源。 + * 资源包格式: + * - 头部:AssetPackageHeader + * - 索引表:条目数量 + AssetPackageEntry 数组 + * - 数据区:按索引顺序存储的资源数据 + */ +class AssetPack { +public: + AssetPack() = default; + + /** + * @brief 移动构造函数 + * @param other 其他资源包 + */ + AssetPack(AssetPack&& other) noexcept; + + /** + * @brief 移动赋值操作符 + * @param other 其他资源包 + * @return 当前资源包引用 + */ + AssetPack& operator=(AssetPack&& other) noexcept; + + /** + * @brief 析构函数,自动关闭文件 + */ + ~AssetPack(); + + /** + * @brief 打开资源包 + * @param path 资源包文件路径 + * @return 成功返回 true + */ + bool open(const std::string& path); + + /** + * @brief 关闭资源包 + */ + void close(); + + /** + * @brief 检查资源包是否已打开 + * @return 已打开返回 true + */ + bool isOpen() const { return file_.is_open(); } + + /** + * @brief 检查资源是否存在 + * @param id 资源ID + * @return 存在返回 true + */ + bool has(const AssetID& id) const; + + /** + * @brief 检查资源是否存在 + * @param path 资源路径 + * @return 存在返回 true + */ + bool has(const std::string& path) const; + + /** + * @brief 读取资源数据(自动解压/解密) + * @param id 资源ID + * @return 资源数据,失败返回空 + */ + std::vector read(const AssetID& id); + + /** + * @brief 读取资源数据(自动解压/解密) + * @param path 资源路径 + * @return 资源数据,失败返回空 + */ + std::vector read(const std::string& path); + + /** + * @brief 读取原始资源数据(不解压/不解密) + * @param id 资源ID + * @return 原始资源数据 + */ + std::vector readRaw(const AssetID& id); + + /** + * @brief 获取所有资源ID + * @return 资源ID列表 + */ + std::vector assets() const; + + /** + * @brief 获取资源包路径 + * @return 资源包路径 + */ + const std::string& path() const { return path_; } + + /** + * @brief 获取资源包头部 + * @return 头部信息 + */ + const AssetPackageHeader& header() const { return header_; } + + /** + * @brief 获取资源条目数量 + * @return 条目数量 + */ + size_t count() const { return entries_.size(); } + + /** + * @brief 设置数据处理管道 + * @param pipe 处理管道 + */ + void setPipe(DataPipe pipe) { pipe_ = std::move(pipe); } + + /** + * @brief 获取资源条目信息 + * @param id 资源ID + * @return 条目指针,不存在返回 nullptr + */ + const AssetPackageEntry* getEntry(const AssetID& id) const; + +private: + std::string path_; + mutable std::ifstream file_; + AssetPackageHeader header_; + std::unordered_map entries_; + DataPipe pipe_; + + /** + * @brief 读取并解析头部 + * @return 成功返回 true + */ + bool readHeader(); + + /** + * @brief 读取并解析索引表 + * @return 成功返回 true + */ + bool readIndex(); + + /** + * @brief 读取条目数据 + * @param entry 条目信息 + * @return 原始数据 + */ + std::vector readEntryData(const AssetPackageEntry& entry); + + AssetPack(const AssetPack&) = delete; + AssetPack& operator=(const AssetPack&) = delete; +}; + +// --------------------------------------------------------------------------- +// PackManager - 资源包管理器 +// --------------------------------------------------------------------------- + +/** + * @brief 资源包管理器 + * + * 管理多个资源包,支持资源查找和加载。 + * 支持挂载/卸载资源包。 + */ +class PackManager { +public: + PackManager() = default; + + /** + * @brief 析构函数,自动卸载所有资源包 + */ + ~PackManager() = default; + + /** + * @brief 挂载资源包 + * @param path 资源包路径 + * @return 成功返回 true + */ + bool mount(const std::string& path); + + /** + * @brief 卸载资源包 + * @param path 资源包路径 + */ + void unmount(const std::string& path); + + /** + * @brief 卸载所有资源包 + */ + void unmountAll(); + + /** + * @brief 查找资源所在的包 + * @param id 资源ID + * @return 资源包指针,未找到返回 nullptr + */ + AssetPack* find(const AssetID& id); + + /** + * @brief 查找资源所在的包 + * @param path 资源路径 + * @return 资源包指针,未找到返回 nullptr + */ + AssetPack* find(const std::string& path); + + /** + * @brief 检查资源是否存在 + * @param id 资源ID + * @return 存在返回 true + */ + bool has(const AssetID& id) const; + + /** + * @brief 检查资源是否存在 + * @param path 资源路径 + * @return 存在返回 true + */ + bool has(const std::string& path) const; + + /** + * @brief 读取资源数据 + * @param id 资源ID + * @return 资源数据 + */ + std::vector read(const AssetID& id); + + /** + * @brief 读取资源数据 + * @param path 资源路径 + * @return 资源数据 + */ + std::vector read(const std::string& path); + + /** + * @brief 设置默认处理管道 + * @param pipe 处理管道 + */ + void setPipe(DataPipe pipe) { defaultPipe_ = std::move(pipe); } + + /** + * @brief 获取默认处理管道 + * @return 处理管道引用 + */ + const DataPipe& pipe() const { return defaultPipe_; } + + /** + * @brief 获取已挂载的资源包数量 + * @return 资源包数量 + */ + size_t count() const { return packs_.size(); } + + /** + * @brief 获取所有资源ID + * @return 资源ID列表 + */ + std::vector allAssets() const; + + /** + * @brief 获取已挂载的资源包路径列表 + * @return 路径列表 + */ + std::vector mountedPacks() const; + +private: + std::vector> packs_; + DataPipe defaultPipe_; + + PackManager(const PackManager&) = delete; + PackManager& operator=(const PackManager&) = delete; +}; + +// --------------------------------------------------------------------------- +// AssetPackBuilder - 资源包构建器(用于打包工具) +// --------------------------------------------------------------------------- + +/** + * @brief 资源包构建器 + * + * 用于创建资源包文件。 + * 支持添加资源、压缩、加密等操作。 + */ +class AssetPackBuilder { +public: + /** + * @brief 构造函数 + * @param compression 压缩算法 + * @param level 压缩级别 + */ + explicit AssetPackBuilder(Compression compression = Compression::None, int level = 3); + + /** + * @brief 添加资源 + * @param path 资源路径(包内路径) + * @param data 资源数据 + */ + void add(const std::string& path, const std::vector& data); + + /** + * @brief 添加资源(移动语义) + * @param path 资源路径 + * @param data 资源数据 + */ + void add(const std::string& path, std::vector&& data); + + /** + * @brief 添加文件 + * @param filePath 文件路径 + * @param packPath 包内路径(可选,默认使用文件名) + * @return 成功返回 true + */ + bool addFile(const std::string& filePath, const std::string& packPath = ""); + + /** + * @brief 添加目录 + * @param dirPath 目录路径 + * @param prefix 包内路径前缀(可选) + * @return 添加的文件数量 + */ + size_t addDirectory(const std::string& dirPath, const std::string& prefix = ""); + + /** + * @brief 设置加密密钥 + * @param key 加密密钥 + * @param type 加密类型 + */ + void setEncryption(const std::string& key, Decryptor::Type type = Decryptor::Type::XOR); + + /** + * @brief 构建资源包 + * @param outputPath 输出文件路径 + * @return 成功返回 true + */ + bool build(const std::string& outputPath); + + /** + * @brief 清空所有资源 + */ + void clear(); + + /** + * @brief 获取资源数量 + * @return 资源数量 + */ + size_t count() const { return entries_.size(); } + + /** + * @brief 获取总原始大小 + * @return 总原始大小 + */ + size_t totalOriginalSize() const { return totalOriginalSize_; } + + /** + * @brief 获取总压缩大小 + * @return 总压缩大小 + */ + size_t totalCompressedSize() const { return totalCompressedSize_; } + +private: + struct BuilderEntry { + AssetID id; + std::vector data; + std::vector compressedData; + u64 offset; + u32 compression; + u32 flags; + }; + + Compression compression_; + int level_; + std::string encryptKey_; + Decryptor::Type encryptType_ = Decryptor::Type::None; + std::vector entries_; + size_t totalOriginalSize_ = 0; + size_t totalCompressedSize_ = 0; + + /** + * @brief 处理数据(压缩/加密) + * @param data 原始数据 + * @return 处理后的数据 + */ + std::vector processData(const std::vector& data); +}; + +} diff --git a/Extra2D/include/extra2d/asset/asset_types.h b/Extra2D/include/extra2d/asset/asset_types.h new file mode 100644 index 0000000..b621b08 --- /dev/null +++ b/Extra2D/include/extra2d/asset/asset_types.h @@ -0,0 +1,270 @@ +#pragma once + +#include +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// AssetID - 强类型资源标识符 +// --------------------------------------------------------------------------- + +/** + * @brief 强类型资源ID + * + * 使用类型安全的ID替代裸字符串,支持: + * - 哈希计算(用于快速比较和查找) + * - 原始路径存储(用于调试和日志) + * - 隐式转换和比较操作 + */ +struct AssetID { + u64 hash = 0; + std::string path; + + AssetID() = default; + + /** + * @brief 从路径构造资源ID + * @param p 资源路径 + */ + explicit AssetID(const std::string& p) + : hash(hashPath(p)), path(p) {} + + /** + * @brief 从路径构造资源ID(移动语义) + * @param p 资源路径 + */ + explicit AssetID(std::string&& p) + : hash(hashPath(p)), path(std::move(p)) {} + + /** + * @brief 计算路径哈希值 + * @param p 路径字符串 + * @return 64位哈希值 + */ + static u64 hashPath(const std::string& p) { + u64 result = 14695981039346656037ULL; + for (char c : p) { + result ^= static_cast(static_cast(c)); + result *= 1099511628211ULL; + } + return result; + } + + /** + * @brief 检查ID是否有效 + * @return 有效返回 true + */ + bool valid() const { return hash != 0 || !path.empty(); } + + /** + * @brief 布尔转换操作符 + */ + explicit operator bool() const { return valid(); } + + /** + * @brief 相等比较 + */ + bool operator==(const AssetID& other) const { return hash == other.hash; } + + /** + * @brief 不等比较 + */ + bool operator!=(const AssetID& other) const { return hash != other.hash; } + + /** + * @brief 小于比较(用于有序容器) + */ + bool operator<(const AssetID& other) const { return hash < other.hash; } +}; + +// --------------------------------------------------------------------------- +// AssetType - 资源类型枚举 +// --------------------------------------------------------------------------- + +/** + * @brief 资源类型枚举 + * 定义支持的资源类型,用于类型安全的资源加载和管理 + */ +enum class AssetType : u8 { + Unknown = 0, + Texture = 1, + Font = 2, + Shader = 3, + Audio = 4, + Data = 5, + Custom = 255 +}; + +/** + * @brief 获取资源类型名称 + * @param type 资源类型 + * @return 类型名称字符串 + */ +inline const char* assetTypeName(AssetType type) { + switch (type) { + case AssetType::Texture: return "Texture"; + case AssetType::Font: return "Font"; + case AssetType::Shader: return "Shader"; + case AssetType::Audio: return "Audio"; + case AssetType::Data: return "Data"; + case AssetType::Custom: return "Custom"; + default: return "Unknown"; + } +} + +// --------------------------------------------------------------------------- +// AssetState - 资源状态枚举 +// --------------------------------------------------------------------------- + +/** + * @brief 资源状态枚举 + * 定义资源的生命周期状态 + */ +enum class AssetState : u8 { + Unloaded = 0, + Loading = 1, + Loaded = 2, + Failed = 3, + Unloading = 4 +}; + +/** + * @brief 获取资源状态名称 + * @param state 资源状态 + * @return 状态名称字符串 + */ +inline const char* assetStateName(AssetState state) { + switch (state) { + case AssetState::Unloaded: return "Unloaded"; + case AssetState::Loading: return "Loading"; + case AssetState::Loaded: return "Loaded"; + case AssetState::Failed: return "Failed"; + case AssetState::Unloading: return "Unloading"; + default: return "Unknown"; + } +} + +// --------------------------------------------------------------------------- +// Compression - 压缩算法枚举 +// --------------------------------------------------------------------------- + +/** + * @brief 压缩算法枚举 + * 定义支持的压缩算法类型 + */ +enum class Compression : u8 { + None = 0, + Zstd = 1, + LZ4 = 2, + Zlib = 3 +}; + +/** + * @brief 获取压缩算法名称 + * @param comp 压缩算法 + * @return 算法名称字符串 + */ +inline const char* compressionName(Compression comp) { + switch (comp) { + case Compression::Zstd: return "Zstd"; + case Compression::LZ4: return "LZ4"; + case Compression::Zlib: return "Zlib"; + default: return "None"; + } +} + +// --------------------------------------------------------------------------- +// CacheStats - 缓存统计结构 +// --------------------------------------------------------------------------- + +/** + * @brief 缓存统计信息 + * 用于监控资源缓存的使用情况和性能 + */ +struct CacheStats { + size_t bytes = 0; + size_t limit = 0; + size_t count = 0; + size_t hits = 0; + size_t misses = 0; + + /** + * @brief 计算缓存命中率 + * @return 命中率(0.0 - 1.0) + */ + float hitRate() const { + size_t total = hits + misses; + return total > 0 ? static_cast(hits) / static_cast(total) : 0.0f; + } + + /** + * @brief 计算缓存使用率 + * @return 使用率(0.0 - 1.0) + */ + float usage() const { + return limit > 0 ? static_cast(bytes) / static_cast(limit) : 0.0f; + } +}; + +// --------------------------------------------------------------------------- +// AssetPackageHeader - 资源包头部信息 +// --------------------------------------------------------------------------- + +/** + * @brief 资源包头部信息 + * 用于识别压缩和加密类型 + */ +struct AssetPackageHeader { + u32 magic = 0; + u32 version = 0; + u32 compressionType = 0; + u32 encryptionType = 0; + u64 originalSize = 0; + u64 compressedSize = 0; + u8 checksum[32] = {}; + + static constexpr u32 MAGIC = 0x4B325045; // 'E2PK' + + /** + * @brief 检查头部是否有效 + * @return 有效返回 true + */ + bool valid() const { return magic == MAGIC; } +}; + +// --------------------------------------------------------------------------- +// AssetPackageEntry - 资源包索引项 +// --------------------------------------------------------------------------- + +/** + * @brief 资源包索引项 + * 描述资源在包中的位置和属性 + */ +struct AssetPackageEntry { + AssetID id; + u64 offset = 0; + u64 size = 0; + u64 originalSize = 0; + u32 compression = 0; + u32 flags = 0; +}; + +} + +// --------------------------------------------------------------------------- +// std::hash 特化 - 支持在 unordered_map/unordered_set 中使用 AssetID +// --------------------------------------------------------------------------- + +namespace std { + +template<> +struct hash { + size_t operator()(const extra2d::AssetID& id) const noexcept { + return static_cast(id.hash); + } +}; + +} diff --git a/Extra2D/include/extra2d/asset/data_processor.h b/Extra2D/include/extra2d/asset/data_processor.h new file mode 100644 index 0000000..5311572 --- /dev/null +++ b/Extra2D/include/extra2d/asset/data_processor.h @@ -0,0 +1,397 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// DataProcessor - 数据处理器接口(装饰器模式) +// --------------------------------------------------------------------------- + +/** + * @brief 数据处理器接口 + * + * 使用装饰器模式支持链式处理数据流。 + * 支持压缩、解压、加密、解密等操作的组合。 + */ +class DataProcessor { +public: + virtual ~DataProcessor() = default; + + /** + * @brief 处理输入数据 + * @param input 输入数据 + * @return 处理后的数据 + */ + virtual std::vector process(const std::vector& input) = 0; + + /** + * @brief 设置下一个处理器 + * @param next 下一个处理器 + */ + void setNext(Unique next) { next_ = std::move(next); } + + /** + * @brief 获取下一个处理器 + * @return 下一个处理器的指针 + */ + DataProcessor* next() const { return next_.get(); } + +protected: + /** + * @brief 调用下一个处理器处理数据 + * @param input 输入数据 + * @return 处理后的数据,如果没有下一个处理器则返回原数据 + */ + std::vector processNext(const std::vector& input) { + return next_ ? next_->process(input) : input; + } + + Unique next_; +}; + +// --------------------------------------------------------------------------- +// Decryptor - 解密器 +// --------------------------------------------------------------------------- + +/** + * @brief 解密处理器 + * + * 使用 XOR 或 AES 算法解密数据。 + * 支持简单的 XOR 加密和 AES-256 加密。 + */ +class Decryptor : public DataProcessor { +public: + /** + * @brief 加密类型枚举 + */ + enum class Type : u8 { + None = 0, + XOR = 1, + AES256 = 2 + }; + + /** + * @brief 构造解密器 + * @param key 解密密钥 + * @param type 加密类型 + */ + explicit Decryptor(const std::string& key, Type type = Type::XOR); + + /** + * @brief 处理(解密)输入数据 + * @param input 加密的输入数据 + * @return 解密后的数据 + */ + std::vector process(const std::vector& input) override; + + /** + * @brief 获取加密类型 + * @return 加密类型 + */ + Type type() const { return type_; } + +private: + std::string key_; + Type type_; + + /** + * @brief XOR 解密 + * @param input 输入数据 + * @return 解密后的数据 + */ + std::vector decryptXOR(const std::vector& input); + + /** + * @brief AES-256 解密 + * @param input 输入数据 + * @return 解密后的数据 + */ + std::vector decryptAES256(const std::vector& input); +}; + +// --------------------------------------------------------------------------- +// Decompressor - 解压器 +// --------------------------------------------------------------------------- + +/** + * @brief 解压处理器 + * + * 支持多种压缩算法的解压操作。 + * 目前支持 Zstd、LZ4 和 Zlib。 + */ +class Decompressor : public DataProcessor { +public: + /** + * @brief 构造解压器 + * @param algo 压缩算法 + */ + explicit Decompressor(Compression algo = Compression::Zstd); + + /** + * @brief 处理(解压)输入数据 + * @param input 压缩的输入数据 + * @return 解压后的数据 + */ + std::vector process(const std::vector& input) override; + + /** + * @brief 获取压缩算法 + * @return 压缩算法 + */ + Compression algorithm() const { return algo_; } + +private: + Compression algo_; + + /** + * @brief Zstd 解压 + * @param input 压缩数据 + * @return 解压后的数据 + */ + std::vector decompressZstd(const std::vector& input); + + /** + * @brief LZ4 解压 + * @param input 压缩数据 + * @return 解压后的数据 + */ + std::vector decompressLZ4(const std::vector& input); + + /** + * @brief Zlib 解压 + * @param input 压缩数据 + * @return 解压后的数据 + */ + std::vector decompressZlib(const std::vector& input); +}; + +// --------------------------------------------------------------------------- +// Encryptor - 加密器 +// --------------------------------------------------------------------------- + +/** + * @brief 加密处理器 + * + * 使用 XOR 或 AES 算法加密数据。 + */ +class Encryptor : public DataProcessor { +public: + /** + * @brief 构造加密器 + * @param key 加密密钥 + * @param type 加密类型 + */ + explicit Encryptor(const std::string& key, Decryptor::Type type = Decryptor::Type::XOR); + + /** + * @brief 处理(加密)输入数据 + * @param input 原始输入数据 + * @return 加密后的数据 + */ + std::vector process(const std::vector& input) override; + + /** + * @brief 获取加密类型 + * @return 加密类型 + */ + Decryptor::Type type() const { return type_; } + +private: + std::string key_; + Decryptor::Type type_; + + /** + * @brief XOR 加密 + * @param input 输入数据 + * @return 加密后的数据 + */ + std::vector encryptXOR(const std::vector& input); + + /** + * @brief AES-256 加密 + * @param input 输入数据 + * @return 加密后的数据 + */ + std::vector encryptAES256(const std::vector& input); +}; + +// --------------------------------------------------------------------------- +// Compressor - 压缩器 +// --------------------------------------------------------------------------- + +/** + * @brief 压缩处理器 + * + * 支持多种压缩算法的压缩操作。 + */ +class Compressor : public DataProcessor { +public: + /** + * @brief 构造压缩器 + * @param algo 压缩算法 + * @param level 压缩级别(1-22,仅对某些算法有效) + */ + explicit Compressor(Compression algo = Compression::Zstd, int level = 3); + + /** + * @brief 处理(压缩)输入数据 + * @param input 原始输入数据 + * @return 压缩后的数据 + */ + std::vector process(const std::vector& input) override; + + /** + * @brief 获取压缩算法 + * @return 压缩算法 + */ + Compression algorithm() const { return algo_; } + +private: + Compression algo_; + int level_; + + /** + * @brief Zstd 压缩 + * @param input 原始数据 + * @return 压缩后的数据 + */ + std::vector compressZstd(const std::vector& input); + + /** + * @brief LZ4 压缩 + * @param input 原始数据 + * @return 压缩后的数据 + */ + std::vector compressLZ4(const std::vector& input); + + /** + * @brief Zlib 压缩 + * @param input 原始数据 + * @return 压缩后的数据 + */ + std::vector compressZlib(const std::vector& input); +}; + +// --------------------------------------------------------------------------- +// DataPipe - 数据处理管道 +// --------------------------------------------------------------------------- + +/** + * @brief 数据处理管道 + * + * 使用流式 API 构建数据处理链。 + * 支持链式调用添加多个处理器。 + * + * @example + * DataPipe pipe; + * pipe.decrypt("secret-key").decompress(Compression::Zstd); + * auto result = pipe.process(data); + */ +class DataPipe { +public: + DataPipe() = default; + + /** + * @brief 移动构造函数 + * @param other 其他管道 + */ + DataPipe(DataPipe&& other) noexcept = default; + + /** + * @brief 移动赋值操作符 + * @param other 其他管道 + * @return 当前管道引用 + */ + DataPipe& operator=(DataPipe&& other) noexcept = default; + + /** + * @brief 添加解密处理器 + * @param key 解密密钥 + * @param type 加密类型 + * @return 当前管道引用(支持链式调用) + */ + DataPipe& decrypt(const std::string& key, Decryptor::Type type = Decryptor::Type::XOR); + + /** + * @brief 添加解压处理器 + * @param algo 压缩算法 + * @return 当前管道引用(支持链式调用) + */ + DataPipe& decompress(Compression algo); + + /** + * @brief 添加加密处理器 + * @param key 加密密钥 + * @param type 加密类型 + * @return 当前管道引用(支持链式调用) + */ + DataPipe& encrypt(const std::string& key, Decryptor::Type type = Decryptor::Type::XOR); + + /** + * @brief 添加压缩处理器 + * @param algo 压缩算法 + * @param level 压缩级别 + * @return 当前管道引用(支持链式调用) + */ + DataPipe& compress(Compression algo, int level = 3); + + /** + * @brief 添加自定义处理器 + * @param processor 处理器 + * @return 当前管道引用(支持链式调用) + */ + DataPipe& add(Unique processor); + + /** + * @brief 处理数据 + * @param input 输入数据 + * @return 处理后的数据 + */ + std::vector process(const std::vector& input); + + /** + * @brief 清空所有处理器 + */ + void clear(); + + /** + * @brief 检查管道是否为空 + * @return 为空返回 true + */ + bool empty() const { return processors_.empty(); } + + /** + * @brief 获取处理器数量 + * @return 处理器数量 + */ + size_t size() const { return processors_.size(); } + +private: + std::vector> processors_; +}; + +// --------------------------------------------------------------------------- +// 工具函数 +// --------------------------------------------------------------------------- + +/** + * @brief 计算数据的 SHA-256 校验和 + * @param data 输入数据 + * @return 32字节的校验和 + */ +std::vector computeChecksum(const std::vector& data); + +/** + * @brief 验证数据的 SHA-256 校验和 + * @param data 输入数据 + * @param checksum 预期的校验和 + * @return 匹配返回 true + */ +bool verifyChecksum(const std::vector& data, const std::vector& checksum); + +} diff --git a/Extra2D/include/extra2d/core/color.h b/Extra2D/include/extra2d/core/color.h index 6797137..8729586 100644 --- a/Extra2D/include/extra2d/core/color.h +++ b/Extra2D/include/extra2d/core/color.h @@ -155,17 +155,4 @@ inline constexpr Color Coral{1.0f, 0.498f, 0.314f, 1.0f}; inline constexpr Color Transparent{0.0f, 0.0f, 0.0f, 0.0f}; } // namespace Colors -// 为了向后兼容,在 Color 结构体内提供静态引用 -struct ColorConstants { - static const Color &White; - static const Color &Black; - static const Color &Red; - static const Color &Green; - static const Color &Blue; - static const Color &Yellow; - static const Color &Cyan; - static const Color &Magenta; - static const Color &Transparent; -}; - } // namespace extra2d diff --git a/Extra2D/include/extra2d/core/math_types.h b/Extra2D/include/extra2d/core/math_types.h index 2c4d68e..ef63602 100644 --- a/Extra2D/include/extra2d/core/math_types.h +++ b/Extra2D/include/extra2d/core/math_types.h @@ -330,6 +330,221 @@ inline float degrees(float radians) { return radians * RAD_TO_DEG; } inline float radians(float degrees) { return degrees * DEG_TO_RAD; } +// --------------------------------------------------------------------------- +// 角度工具函数 +// --------------------------------------------------------------------------- + +/** + * @brief 规范化角度到 [0, 360) 范围 + * @param degrees 输入角度(度数) + * @return 规范化后的角度,范围 [0, 360) + */ +inline float normalizeAngle360(float degrees) { + degrees = std::fmod(degrees, 360.0f); + if (degrees < 0.0f) { + degrees += 360.0f; + } + return degrees; +} + +/** + * @brief 规范化角度到 [-180, 180) 范围 + * @param degrees 输入角度(度数) + * @return 规范化后的角度,范围 [-180, 180) + */ +inline float normalizeAngle180(float degrees) { + degrees = std::fmod(degrees + 180.0f, 360.0f); + if (degrees < 0.0f) { + degrees += 360.0f; + } + return degrees - 180.0f; +} + +/** + * @brief 计算两个角度之间的最短差值 + * @param from 起始角度(度数) + * @param to 目标角度(度数) + * @return 从 from 到 to 的最短角度差,范围 [-180, 180] + */ +inline float angleDifference(float from, float to) { + float diff = normalizeAngle360(to - from); + if (diff > 180.0f) { + diff -= 360.0f; + } + return diff; +} + +/** + * @brief 线性插值角度 + * @param from 起始角度(度数) + * @param to 目标角度(度数) + * @param t 插值因子 [0, 1] + * @return 插值后的角度 + */ +inline float lerpAngle(float from, float to, float t) { + return from + angleDifference(from, to) * t; +} + +// --------------------------------------------------------------------------- +// 向量工具函数 +// --------------------------------------------------------------------------- + +/** + * @brief 计算方向向量(从 from 指向 to 的单位向量) + * @param from 起始点 + * @param to 目标点 + * @return 归一化的方向向量 + */ +inline Vec2 direction(const Vec2 &from, const Vec2 &to) { + return (to - from).normalized(); +} + +/** + * @brief 计算两点之间的角度 + * @param from 起始点 + * @param to 目标点 + * @return 角度(度数),范围 [-180, 180] + */ +inline float angleBetween(const Vec2 &from, const Vec2 &to) { + Vec2 dir = to - from; + return std::atan2(dir.y, dir.x) * RAD_TO_DEG; +} + +/** + * @brief 根据角度创建方向向量 + * @param degrees 角度(度数),0度指向右方,逆时针为正 + * @return 单位方向向量 + */ +inline Vec2 angleToVector(float degrees) { + float rad = degrees * DEG_TO_RAD; + return {std::cos(rad), std::sin(rad)}; +} + +/** + * @brief 将向量旋转指定角度 + * @param v 原始向量 + * @param degrees 旋转角度(度数),正值为逆时针旋转 + * @return 旋转后的向量 + */ +inline Vec2 rotateVector(const Vec2 &v, float degrees) { + float rad = degrees * DEG_TO_RAD; + float cosA = std::cos(rad); + float sinA = std::sin(rad); + return {v.x * cosA - v.y * sinA, v.x * sinA + v.y * cosA}; +} + +// --------------------------------------------------------------------------- +// 坐标系转换工具 +// --------------------------------------------------------------------------- + +/** + * @brief Y轴向上坐标转Y轴向下坐标 + * @param pos Y轴向上坐标系中的位置 + * @param height 画布/屏幕高度 + * @return Y轴向下坐标系中的位置 + */ +inline Vec2 flipY(const Vec2 &pos, float height) { + return {pos.x, height - pos.y}; +} + +/** + * @brief Y轴向下坐标转Y轴向上坐标 + * @param pos Y轴向下坐标系中的位置 + * @param height 画布/屏幕高度 + * @return Y轴向上坐标系中的位置 + */ +inline Vec2 unflipY(const Vec2 &pos, float height) { + return {pos.x, height - pos.y}; +} + +// --------------------------------------------------------------------------- +// 矩阵工具函数 +// --------------------------------------------------------------------------- + +/** + * @brief 从变换矩阵提取位置 + * @param matrix 4x4变换矩阵 + * @return 提取的位置向量 + */ +inline Vec2 extractPosition(const glm::mat4 &matrix) { + return {matrix[3][0], matrix[3][1]}; +} + +/** + * @brief 从变换矩阵提取缩放 + * @param matrix 4x4变换矩阵 + * @return 提取的缩放向量 + */ +inline Vec2 extractScale(const glm::mat4 &matrix) { + float scaleX = std::sqrt(matrix[0][0] * matrix[0][0] + matrix[0][1] * matrix[0][1]); + float scaleY = std::sqrt(matrix[1][0] * matrix[1][0] + matrix[1][1] * matrix[1][1]); + return {scaleX, scaleY}; +} + +/** + * @brief 从变换矩阵提取旋转角度 + * @param matrix 4x4变换矩阵 + * @return 提取的旋转角度(度数) + */ +inline float extractRotation(const glm::mat4 &matrix) { + return std::atan2(matrix[0][1], matrix[0][0]) * RAD_TO_DEG; +} + +// --------------------------------------------------------------------------- +// 碰撞检测工具 +// --------------------------------------------------------------------------- + +/** + * @brief 判断点是否在矩形内 + * @param point 要检测的点 + * @param rect 矩形区域 + * @return 如果点在矩形内返回 true,否则返回 false + */ +inline bool pointInRect(const Vec2 &point, const Rect &rect) { + return point.x >= rect.left() && point.x <= rect.right() && + point.y >= rect.top() && point.y <= rect.bottom(); +} + +/** + * @brief 判断点是否在圆内 + * @param point 要检测的点 + * @param center 圆心 + * @param radius 圆的半径 + * @return 如果点在圆内返回 true,否则返回 false + */ +inline bool pointInCircle(const Vec2 &point, const Vec2 ¢er, float radius) { + float dx = point.x - center.x; + float dy = point.y - center.y; + return (dx * dx + dy * dy) <= (radius * radius); +} + +/** + * @brief 判断两个矩形是否相交 + * @param a 第一个矩形 + * @param b 第二个矩形 + * @return 如果矩形相交返回 true,否则返回 false + */ +inline bool rectsIntersect(const Rect &a, const Rect &b) { + return a.intersects(b); +} + +/** + * @brief 判断两个圆是否相交 + * @param center1 第一个圆的圆心 + * @param radius1 第一个圆的半径 + * @param center2 第二个圆的圆心 + * @param radius2 第二个圆的半径 + * @return 如果圆相交返回 true,否则返回 false + */ +inline bool circlesIntersect(const Vec2 ¢er1, float radius1, + const Vec2 ¢er2, float radius2) { + float dx = center2.x - center1.x; + float dy = center2.y - center1.y; + float distSq = dx * dx + dy * dy; + float radiusSum = radius1 + radius2; + return distSq <= (radiusSum * radiusSum); +} + } // namespace math } // namespace extra2d diff --git a/Extra2D/include/extra2d/core/module.h b/Extra2D/include/extra2d/core/module.h new file mode 100644 index 0000000..8eb5b17 --- /dev/null +++ b/Extra2D/include/extra2d/core/module.h @@ -0,0 +1,82 @@ +#pragma once + +#include +#include +#include +#include + +namespace extra2d { + +class Application; + +/** + * @brief 模块基类 + * 所有模块必须继承此类 + */ +class Module { +public: + virtual ~Module() = default; + + /** + * @brief 初始化模块 + * @return 初始化成功返回 true + */ + virtual bool init() = 0; + + /** + * @brief 关闭模块 + */ + virtual void shutdown() = 0; + + /** + * @brief 检查模块是否已初始化 + * @return 已初始化返回 true + */ + virtual bool ok() const = 0; + + /** + * @brief 获取模块名称 + * @return 模块名称 + */ + virtual const char* name() const = 0; + + /** + * @brief 获取模块优先级(数值越小越优先) + * @return 优先级值 + */ + virtual int priority() const { return 100; } + + /** + * @brief 获取模块依赖列表 + * @return 依赖模块类型列表 + */ + virtual std::vector deps() const { return {}; } + + /** + * @brief 检查模块是否支持并行初始化 + * @return 支持并行初始化返回 true + */ + virtual bool parallel() const { return true; } + + /** + * @brief 设置所属Application + * @param app Application指针 + */ + void setApp(class Application* app) { app_ = app; } + + /** + * @brief 获取Application + * @return Application指针 + */ + class Application* app() const { return app_; } + +protected: + class Application* app_ = nullptr; +}; + +/** + * @brief 模块工厂函数类型 + */ +using ModuleFactory = std::function()>; + +} // namespace extra2d diff --git a/Extra2D/include/extra2d/core/object_pool.h b/Extra2D/include/extra2d/core/object_pool.h new file mode 100644 index 0000000..ea6305f --- /dev/null +++ b/Extra2D/include/extra2d/core/object_pool.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +#include + +namespace extra2d { + +/** + * @brief 固定大小对象池 + * @tparam T 对象类型 + * @tparam Size 池大小 + */ +template class ObjectPool { +public: + ObjectPool() { + for (size_t i = 0; i < Size; ++i) { + available_.push(&pool_[i]); + } + } + + /** + * @brief 获取对象 + * @return 对象指针,池耗尽返回nullptr + */ + T *acquire() { + if (available_.empty()) { + return nullptr; + } + T *obj = available_.front(); + available_.pop(); + return obj; + } + + /** + * @brief 释放对象回池 + * @param obj 对象指针 + */ + void release(T *obj) { + if (obj >= pool_ && obj < pool_ + Size) { + obj->~T(); + new (obj) T(); + available_.push(obj); + } + } + + /** + * @brief 获取可用对象数量 + */ + size_t available() const { return available_.size(); } + + /** + * @brief 获取池总大小 + */ + static constexpr size_t capacity() { return Size; } + +private: + alignas(alignof(T)) std::array storage_; + T *pool_ = reinterpret_cast(storage_.data()); + std::queue available_; +}; + +} // namespace extra2d diff --git a/Extra2D/include/extra2d/core/registry.h b/Extra2D/include/extra2d/core/registry.h new file mode 100644 index 0000000..dd19253 --- /dev/null +++ b/Extra2D/include/extra2d/core/registry.h @@ -0,0 +1,170 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace extra2d { + +class Application; + +/** + * @brief 编译期类型ID生成器 + */ +using TypeId = size_t; + +namespace detail { + inline TypeId nextTypeId() { + static TypeId id = 0; + return ++id; + } + + template + inline TypeId getTypeId() { + static TypeId id = nextTypeId(); + return id; + } +} + +/** + * @brief 模块注册表 + * 管理模块的注册、拓扑排序和生命周期 + */ +class Registry { +public: + static constexpr size_t MAX_MODULES = 64; + + static Registry& instance(); + + Registry(const Registry&) = delete; + Registry& operator=(const Registry&) = delete; + + /** + * @brief 注册模块 + * @tparam T 模块类型 + * @tparam Args 构造函数参数类型 + * @param args 构造函数参数 + * @return 模块指针 + */ + template + T* use(Args&&... args) { + static_assert(std::is_base_of_v, "T must derive from Module"); + + TypeId typeId = detail::getTypeId(); + + // 数组查找,O(n) 但 n 很小,缓存友好 + for (size_t i = 0; i < moduleCount_; ++i) { + if (modules_[i].id == typeId) { + return static_cast(modules_[i].module.get()); + } + } + + // 添加新模块 + if (moduleCount_ >= MAX_MODULES) { + return nullptr; // 模块数量超过上限 + } + + auto module = ptr::makeUnique(std::forward(args)...); + T* ptr = module.get(); + module->setApp(app_); + + modules_[moduleCount_].id = typeId; + modules_[moduleCount_].module = std::move(module); + modules_[moduleCount_].valid = true; + ++moduleCount_; + + return ptr; + } + + /** + * @brief 获取模块 + * @tparam T 模块类型 + * @return 模块指针,不存在返回 nullptr + */ + template + T* get() const { + TypeId typeId = detail::getTypeId(); + + for (size_t i = 0; i < moduleCount_; ++i) { + if (modules_[i].id == typeId && modules_[i].valid) { + return static_cast(modules_[i].module.get()); + } + } + return nullptr; + } + + /** + * @brief 获取模块(基类版本) + * @param typeIdx 类型索引 + * @return 模块指针 + */ + Module* get(std::type_index typeIdx) const { + // 这里仍然使用type_index作为后备方案 + for (size_t i = 0; i < moduleCount_; ++i) { + if (modules_[i].valid && + std::type_index(typeid(*modules_[i].module)) == typeIdx) { + return modules_[i].module.get(); + } + } + return nullptr; + } + + /** + * @brief 设置Application + */ + void setApp(Application* app) { app_ = app; } + + /** + * @brief 初始化所有模块(按优先级拓扑排序,支持并行初始化) + * @return 初始化成功返回 true + */ + bool init(); + + /** + * @brief 关闭所有模块 + */ + void shutdown(); + + /** + * @brief 清空所有模块 + */ + void clear(); + + /** + * @brief 获取模块数量 + */ + size_t size() const { return moduleCount_; } + +private: + Registry() = default; + ~Registry() = default; + + struct ModuleEntry { + TypeId id = 0; + Unique module; + bool valid = false; + }; + + /** + * @brief 拓扑排序模块 + * @return 排序后的模块列表 + */ + std::vector sort(); + + /** + * @brief 按层级对模块进行分组 + * 同一层级的模块没有相互依赖,可以并行初始化 + * @return 按层级分组的模块列表 + */ + std::vector> group(); + + std::array modules_; + size_t moduleCount_ = 0; + Application* app_ = nullptr; +}; + +} // namespace extra2d diff --git a/Extra2D/include/extra2d/core/result.h b/Extra2D/include/extra2d/core/result.h new file mode 100644 index 0000000..44385d6 --- /dev/null +++ b/Extra2D/include/extra2d/core/result.h @@ -0,0 +1,398 @@ +#pragma once + +#include +#include +#include + +namespace extra2d { + +/** + * @brief 错误码枚举 + */ +enum class ErrorCode { + None = 0, + Unknown = 1, + InvalidArgument = 2, + OutOfMemory = 3, + FileNotFound = 4, + PermissionDenied = 5, + NotImplemented = 6, + AlreadyExists = 7, + NotInitialized = 8, + AlreadyInitialized = 9, + OperationFailed = 10, + Timeout = 11, + Cancelled = 12, + InvalidState = 13, + ResourceExhausted = 14, + Unavailable = 15, + DataLoss = 16, + Unauthenticated = 17, + PermissionDenied2 = 18, + ResourceNotFound = 19, + Aborted = 20, + OutOfRange = 21, + Unimplemented = 22, + Internal = 23, + DataCorrupted = 24, + RequestTooLarge = 25, + ResourceBusy = 26, + QuotaExceeded = 27, + DeadlineExceeded = 28, + LoadBalancing = 29, + NetworkError = 30, + ProtocolError = 31, + ServiceUnavailable = 32, + GatewayError = 33, + RateLimited = 34, + BadRequest = 35, + Unauthorized = 36, + Forbidden = 37, + NotFound = 38, + MethodNotAllowed = 39, + Conflict = 40, + Gone = 41, + LengthRequired = 42, + PreconditionFailed = 43, + PayloadTooLarge = 44, + UriTooLong = 45, + UnsupportedMediaType = 46, + RangeNotSatisfiable = 47, + ExpectationFailed = 48, + ImATeapot = 49, + MisdirectedRequest = 50, + UnprocessableEntity = 51, + Locked = 52, + FailedDependency = 53, + TooEarly = 54, + UpgradeRequired = 55, + PreconditionRequired = 56, + TooManyRequests = 57, + RequestHeaderFieldsTooLarge = 58, + UnavailableForLegalReasons = 59 +}; + +/** + * @brief 错误信息结构 + */ +struct Error { + ErrorCode code = ErrorCode::None; + std::string message; + std::string file; + int line = 0; + + Error() = default; + Error(ErrorCode c, const std::string& msg) : code(c), message(msg) {} + Error(ErrorCode c, const std::string& msg, const std::string& f, int l) + : code(c), message(msg), file(f), line(l) {} + + bool ok() const { return code == ErrorCode::None; } + + static Error none() { return Error(); } + static Error unknown(const std::string& msg) { return Error(ErrorCode::Unknown, msg); } + static Error invalidArgument(const std::string& msg) { return Error(ErrorCode::InvalidArgument, msg); } + static Error outOfMemory(const std::string& msg) { return Error(ErrorCode::OutOfMemory, msg); } + static Error fileNotFound(const std::string& msg) { return Error(ErrorCode::FileNotFound, msg); } + static Error permissionDenied(const std::string& msg) { return Error(ErrorCode::PermissionDenied, msg); } + static Error notImplemented(const std::string& msg) { return Error(ErrorCode::NotImplemented, msg); } + static Error alreadyExists(const std::string& msg) { return Error(ErrorCode::AlreadyExists, msg); } + static Error notInitialized(const std::string& msg) { return Error(ErrorCode::NotInitialized, msg); } + static Error alreadyInitialized(const std::string& msg) { return Error(ErrorCode::AlreadyInitialized, msg); } + static Error operationFailed(const std::string& msg) { return Error(ErrorCode::OperationFailed, msg); } + static Error timeout(const std::string& msg) { return Error(ErrorCode::Timeout, msg); } + static Error cancelled(const std::string& msg) { return Error(ErrorCode::Cancelled, msg); } + static Error invalidState(const std::string& msg) { return Error(ErrorCode::InvalidState, msg); } + static Error resourceExhausted(const std::string& msg) { return Error(ErrorCode::ResourceExhausted, msg); } + static Error unavailable(const std::string& msg) { return Error(ErrorCode::Unavailable, msg); } + static Error dataLoss(const std::string& msg) { return Error(ErrorCode::DataLoss, msg); } + static Error unauthenticated(const std::string& msg) { return Error(ErrorCode::Unauthenticated, msg); } + static Error permissionDenied2(const std::string& msg) { return Error(ErrorCode::PermissionDenied2, msg); } + static Error resourceNotFound(const std::string& msg) { return Error(ErrorCode::ResourceNotFound, msg); } + static Error aborted(const std::string& msg) { return Error(ErrorCode::Aborted, msg); } + static Error outOfRange(const std::string& msg) { return Error(ErrorCode::OutOfRange, msg); } + static Error unimplemented(const std::string& msg) { return Error(ErrorCode::Unimplemented, msg); } + static Error internal(const std::string& msg) { return Error(ErrorCode::Internal, msg); } + static Error dataCorrupted(const std::string& msg) { return Error(ErrorCode::DataCorrupted, msg); } + static Error requestTooLarge(const std::string& msg) { return Error(ErrorCode::RequestTooLarge, msg); } + static Error resourceBusy(const std::string& msg) { return Error(ErrorCode::ResourceBusy, msg); } + static Error quotaExceeded(const std::string& msg) { return Error(ErrorCode::QuotaExceeded, msg); } + static Error deadlineExceeded(const std::string& msg) { return Error(ErrorCode::DeadlineExceeded, msg); } + static Error loadBalancing(const std::string& msg) { return Error(ErrorCode::LoadBalancing, msg); } + static Error networkError(const std::string& msg) { return Error(ErrorCode::NetworkError, msg); } + static Error protocolError(const std::string& msg) { return Error(ErrorCode::ProtocolError, msg); } + static Error serviceUnavailable(const std::string& msg) { return Error(ErrorCode::ServiceUnavailable, msg); } + static Error gatewayError(const std::string& msg) { return Error(ErrorCode::GatewayError, msg); } + static Error rateLimited(const std::string& msg) { return Error(ErrorCode::RateLimited, msg); } + static Error badRequest(const std::string& msg) { return Error(ErrorCode::BadRequest, msg); } + static Error unauthorized(const std::string& msg) { return Error(ErrorCode::Unauthorized, msg); } + static Error forbidden(const std::string& msg) { return Error(ErrorCode::Forbidden, msg); } + static Error notFound(const std::string& msg) { return Error(ErrorCode::NotFound, msg); } + static Error methodNotAllowed(const std::string& msg) { return Error(ErrorCode::MethodNotAllowed, msg); } + static Error conflict(const std::string& msg) { return Error(ErrorCode::Conflict, msg); } + static Error gone(const std::string& msg) { return Error(ErrorCode::Gone, msg); } + static Error lengthRequired(const std::string& msg) { return Error(ErrorCode::LengthRequired, msg); } + static Error preconditionFailed(const std::string& msg) { return Error(ErrorCode::PreconditionFailed, msg); } + static Error payloadTooLarge(const std::string& msg) { return Error(ErrorCode::PayloadTooLarge, msg); } + static Error uriTooLong(const std::string& msg) { return Error(ErrorCode::UriTooLong, msg); } + static Error unsupportedMediaType(const std::string& msg) { return Error(ErrorCode::UnsupportedMediaType, msg); } + static Error rangeNotSatisfiable(const std::string& msg) { return Error(ErrorCode::RangeNotSatisfiable, msg); } + static Error expectationFailed(const std::string& msg) { return Error(ErrorCode::ExpectationFailed, msg); } + static Error imATeapot(const std::string& msg) { return Error(ErrorCode::ImATeapot, msg); } + static Error misdirectedRequest(const std::string& msg) { return Error(ErrorCode::MisdirectedRequest, msg); } + static Error unprocessableEntity(const std::string& msg) { return Error(ErrorCode::UnprocessableEntity, msg); } + static Error locked(const std::string& msg) { return Error(ErrorCode::Locked, msg); } + static Error failedDependency(const std::string& msg) { return Error(ErrorCode::FailedDependency, msg); } + static Error tooEarly(const std::string& msg) { return Error(ErrorCode::TooEarly, msg); } + static Error upgradeRequired(const std::string& msg) { return Error(ErrorCode::UpgradeRequired, msg); } + static Error preconditionRequired(const std::string& msg) { return Error(ErrorCode::PreconditionRequired, msg); } + static Error tooManyRequests(const std::string& msg) { return Error(ErrorCode::TooManyRequests, msg); } + static Error requestHeaderFieldsTooLarge(const std::string& msg) { return Error(ErrorCode::RequestHeaderFieldsTooLarge, msg); } + static Error unavailableForLegalReasons(const std::string& msg) { return Error(ErrorCode::UnavailableForLegalReasons, msg); } +}; + +/** + * @brief Result类型,用于错误处理 + * @tparam T 成功时的值类型 + * @tparam E 错误类型,默认为Error + */ +template +class Result { +public: + Result() : hasValue_(false) { + new (&storage_.error) E(); + } + + ~Result() { + if (hasValue_) { + storage_.value.~T(); + } else { + storage_.error.~E(); + } + } + + Result(const Result& other) : hasValue_(other.hasValue_) { + if (hasValue_) { + new (&storage_.value) T(other.storage_.value); + } else { + new (&storage_.error) E(other.storage_.error); + } + } + + Result(Result&& other) noexcept : hasValue_(other.hasValue_) { + if (hasValue_) { + new (&storage_.value) T(std::move(other.storage_.value)); + } else { + new (&storage_.error) E(std::move(other.storage_.error)); + } + } + + Result& operator=(const Result& other) { + if (this != &other) { + this->~Result(); + hasValue_ = other.hasValue_; + if (hasValue_) { + new (&storage_.value) T(other.storage_.value); + } else { + new (&storage_.error) E(other.storage_.error); + } + } + return *this; + } + + Result& operator=(Result&& other) noexcept { + if (this != &other) { + this->~Result(); + hasValue_ = other.hasValue_; + if (hasValue_) { + new (&storage_.value) T(std::move(other.storage_.value)); + } else { + new (&storage_.error) E(std::move(other.storage_.error)); + } + } + return *this; + } + + static Result ok(T value) { + Result result; + result.hasValue_ = true; + new (&result.storage_.value) T(std::move(value)); + return result; + } + + static Result err(E error) { + Result result; + result.hasValue_ = false; + new (&result.storage_.error) E(std::move(error)); + return result; + } + + bool ok() const { return hasValue_; } + bool isOk() const { return hasValue_; } + bool isErr() const { return !hasValue_; } + + T& value() & { + return storage_.value; + } + + const T& value() const & { + return storage_.value; + } + + T&& value() && { + return std::move(storage_.value); + } + + E& error() & { + return storage_.error; + } + + const E& error() const & { + return storage_.error; + } + + E&& error() && { + return std::move(storage_.error); + } + + T valueOr(T defaultValue) const { + return hasValue_ ? storage_.value : std::move(defaultValue); + } + + template + Result map(F&& f) { + if (hasValue_) { + return Result::ok(f(storage_.value)); + } + return *this; + } + + template + Result mapErr(F&& f) { + if (!hasValue_) { + return Result::err(f(storage_.error)); + } + return *this; + } + + template + auto andThen(F&& f) -> decltype(f(std::declval())) { + if (hasValue_) { + return f(storage_.value); + } + return Result()))::ValueType, E>::err(storage_.error); + } + + template + Result orElse(F&& f) { + if (!hasValue_) { + return f(storage_.error); + } + return *this; + } + +private: + union Storage { + T value; + E error; + + Storage() {} + ~Storage() {} + } storage_; + + bool hasValue_; +}; + +// 特化void类型 +template +class Result { +public: + Result() : hasValue_(true) {} + + ~Result() { + if (!hasValue_) { + storage_.error.~E(); + } + } + + Result(const Result& other) : hasValue_(other.hasValue_) { + if (!hasValue_) { + new (&storage_.error) E(other.storage_.error); + } + } + + Result(Result&& other) noexcept : hasValue_(other.hasValue_) { + if (!hasValue_) { + new (&storage_.error) E(std::move(other.storage_.error)); + } + } + + Result& operator=(const Result& other) { + if (this != &other) { + this->~Result(); + hasValue_ = other.hasValue_; + if (!hasValue_) { + new (&storage_.error) E(other.storage_.error); + } + } + return *this; + } + + Result& operator=(Result&& other) noexcept { + if (this != &other) { + this->~Result(); + hasValue_ = other.hasValue_; + if (!hasValue_) { + new (&storage_.error) E(std::move(other.storage_.error)); + } + } + return *this; + } + + static Result ok() { + return Result(); + } + + static Result err(E error) { + Result result; + result.hasValue_ = false; + new (&result.storage_.error) E(std::move(error)); + return result; + } + + bool ok() const { return hasValue_; } + bool isOk() const { return hasValue_; } + bool isErr() const { return !hasValue_; } + + E& error() & { + return storage_.error; + } + + const E& error() const & { + return storage_.error; + } + + E&& error() && { + return std::move(storage_.error); + } + +private: + union Storage { + E error; + + Storage() {} + ~Storage() {} + } storage_; + + bool hasValue_; +}; + +// 便捷宏 +#define E2D_TRY(result) \ + do { \ + auto _res = (result); \ + if (!_res.ok()) { \ + return Result::err(_res.error()); \ + } \ + } while(0) + +} // namespace extra2d diff --git a/Extra2D/include/extra2d/core/ring_buffer.h b/Extra2D/include/extra2d/core/ring_buffer.h new file mode 100644 index 0000000..15ede7a --- /dev/null +++ b/Extra2D/include/extra2d/core/ring_buffer.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include + +namespace extra2d { + +/** + * @brief 无锁环形缓冲区(单生产者单消费者) + * @tparam T 元素类型 + * @tparam Size 缓冲区大小(必须是2的幂) + */ +template +class RingBuffer { + static_assert((Size & (Size - 1)) == 0, "Size must be a power of 2"); + +public: + RingBuffer() = default; + + /** + * @brief 入队 + * @param item 元素 + * @return 成功返回true,缓冲区满返回false + */ + bool push(const T& item) { + const size_t currentHead = head_.load(std::memory_order_relaxed); + const size_t currentTail = tail_.load(std::memory_order_acquire); + + if ((currentHead - currentTail) >= Size) { + return false; // 缓冲区满 + } + + buffer_[currentHead & mask_] = item; + head_.store(currentHead + 1, std::memory_order_release); + return true; + } + + /** + * @brief 入队(移动语义) + * @param item 元素 + * @return 成功返回true,缓冲区满返回false + */ + bool push(T&& item) { + const size_t currentHead = head_.load(std::memory_order_relaxed); + const size_t currentTail = tail_.load(std::memory_order_acquire); + + if ((currentHead - currentTail) >= Size) { + return false; // 缓冲区满 + } + + buffer_[currentHead & mask_] = std::move(item); + head_.store(currentHead + 1, std::memory_order_release); + return true; + } + + /** + * @brief 出队 + * @param item 输出元素 + * @return 成功返回true,缓冲区空返回false + */ + bool pop(T& item) { + const size_t currentTail = tail_.load(std::memory_order_relaxed); + const size_t currentHead = head_.load(std::memory_order_acquire); + + if (currentTail == currentHead) { + return false; // 缓冲区空 + } + + item = std::move(buffer_[currentTail & mask_]); + tail_.store(currentTail + 1, std::memory_order_release); + return true; + } + + /** + * @brief 检查是否为空 + */ + bool empty() const { + return head_.load(std::memory_order_acquire) == + tail_.load(std::memory_order_acquire); + } + + /** + * @brief 获取当前大小 + */ + size_t size() const { + return head_.load(std::memory_order_acquire) - + tail_.load(std::memory_order_acquire); + } + + /** + * @brief 获取容量 + */ + static constexpr size_t capacity() { return Size; } + +private: + static constexpr size_t mask_ = Size - 1; + alignas(64) std::array buffer_; + alignas(64) std::atomic head_{0}; + alignas(64) std::atomic tail_{0}; +}; + +} // namespace extra2d diff --git a/Extra2D/include/extra2d/core/service_interface.h b/Extra2D/include/extra2d/core/service_interface.h new file mode 100644 index 0000000..dcc3416 --- /dev/null +++ b/Extra2D/include/extra2d/core/service_interface.h @@ -0,0 +1,144 @@ +#pragma once + +#include +#include + +namespace extra2d { + +/** + * @brief 服务优先级枚举 + * 定义服务的初始化顺序,数值越小越先初始化 + */ +enum class ServicePriority : i32 { + Core = 0, + Event = 100, + Timer = 200, + Scene = 300, + Camera = 400, + Resource = 500, + Audio = 600, + User = 1000 +}; + +/** + * @brief 服务状态枚举 + */ +enum class ServiceState { + Uninitialized, + Initializing, + Running, + Paused, + Stopping, + Stopped +}; + +/** + * @brief 服务信息结构体 + */ +struct ServiceInfo { + std::string name; + ServicePriority priority = ServicePriority::User; + ServiceState state = ServiceState::Uninitialized; + bool enabled = true; +}; + +/** + * @brief 服务接口基类 + * 所有服务必须实现此接口,支持依赖注入和生命周期管理 + */ +class IService { + friend class ServiceLocator; + +public: + virtual ~IService() = default; + + /** + * @brief 获取服务信息 + * @return 服务信息结构体 + */ + virtual ServiceInfo info() const = 0; + + /** + * @brief 初始化服务 + * @return 初始化成功返回 true + */ + virtual bool init() = 0; + + /** + * @brief 关闭服务 + */ + virtual void shutdown() = 0; + + /** + * @brief 暂停服务 + */ + virtual void pause() { + info_.state = ServiceState::Paused; + } + + /** + * @brief 恢复服务 + */ + virtual void resume() { + if (info_.state == ServiceState::Paused) { + info_.state = ServiceState::Running; + } + } + + /** + * @brief 更新服务 + * @param dt 帧间隔时间 + */ + virtual void update(f32 dt) { } + + /** + * @brief 检查服务是否已初始化 + * @return 已初始化返回 true + */ + virtual bool initialized() const { + return info_.state == ServiceState::Running || + info_.state == ServiceState::Paused; + } + + /** + * @brief 获取服务状态 + * @return 当前服务状态 + */ + ServiceState state() const { return info_.state; } + + /** + * @brief 获取服务名称 + * @return 服务名称 + */ + const std::string& name() const { return info_.name; } + +protected: + ServiceInfo info_; + + /** + * @brief 设置服务状态 + * @param state 新状态 + */ + void setState(ServiceState state) { info_.state = state; } +}; + +/** + * @brief 类型ID生成器 + * 用于为每种服务类型生成唯一ID + */ +using ServiceTypeId = size_t; + +namespace detail { + inline ServiceTypeId nextServiceTypeId() { + static ServiceTypeId id = 0; + return ++id; + } + + template + ServiceTypeId getServiceTypeId() { + static ServiceTypeId id = nextServiceTypeId(); + return id; + } +} + +} diff --git a/Extra2D/include/extra2d/core/service_locator.h b/Extra2D/include/extra2d/core/service_locator.h new file mode 100644 index 0000000..565627e --- /dev/null +++ b/Extra2D/include/extra2d/core/service_locator.h @@ -0,0 +1,307 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace extra2d { + +/** + * @brief 服务工厂函数类型 + */ +template using ServiceFactory = Fn()>; + +/** + * @brief 服务定位器 + * 实现依赖注入和服务发现模式,解耦模块间依赖 + * + * 特性: + * - 类型安全的服务注册和获取 + * - 支持服务工厂延迟创建 + * - 支持服务依赖声明 + * - 线程安全(读写锁) + * - 支持 Mock 测试 + */ +class ServiceLocator { +public: + /** + * @brief 获取单例实例 + * @return 服务定位器实例引用 + */ + static ServiceLocator &instance(); + + ServiceLocator(const ServiceLocator &) = delete; + ServiceLocator &operator=(const ServiceLocator &) = delete; + + /** + * @brief 注册服务实例 + * @tparam T 服务接口类型 + * @param svc 服务实例 + */ + template void add(Ref svc) { + static_assert(std::is_base_of_v, + "T must derive from IService"); + + std::unique_lock lock(mutex_); + auto typeId = std::type_index(typeid(T)); + services_[typeId] = std::static_pointer_cast(svc); + orderedServices_.push_back(svc); + sort(); + } + + /** + * @brief 注册服务工厂 + * @tparam T 服务接口类型 + * @param fn 服务工厂函数 + */ + template void setFactory(ServiceFactory fn) { + static_assert(std::is_base_of_v, + "T must derive from IService"); + + std::unique_lock lock(mutex_); + auto typeId = std::type_index(typeid(T)); + factories_[typeId] = [fn]() -> Ref { + return std::static_pointer_cast(fn()); + }; + + // 立即创建服务实例并添加到有序列表 + auto svc = factories_[typeId](); + services_[typeId] = svc; + orderedServices_.push_back(svc); + sort(); + } + + /** + * @brief 获取服务实例 + * @tparam T 服务接口类型 + * @return 服务实例,不存在返回 nullptr + */ + template Ref get() const { + static_assert(std::is_base_of_v, + "T must derive from IService"); + + auto typeId = std::type_index(typeid(T)); + + // 读锁查询 + std::shared_lock lock(mutex_); + + auto it = services_.find(typeId); + if (it != services_.end()) { + return std::static_pointer_cast(it->second); + } + + auto factoryIt = factories_.find(typeId); + if (factoryIt != factories_.end()) { + auto svc = factoryIt->second(); + services_[typeId] = svc; + return std::static_pointer_cast(svc); + } + + return nullptr; + } + + /** + * @brief 尝试获取服务实例(不创建) + * @tparam T 服务接口类型 + * @return 服务实例,不存在返回 nullptr + */ + template Ref tryGet() const { + static_assert(std::is_base_of_v, + "T must derive from IService"); + + auto typeId = std::type_index(typeid(T)); + + // 读锁查询 + std::shared_lock lock(mutex_); + auto it = services_.find(typeId); + if (it != services_.end()) { + return std::static_pointer_cast(it->second); + } + return nullptr; + } + + /** + * @brief 检查服务是否已注册 + * @tparam T 服务接口类型 + * @return 已注册返回 true + */ + template bool has() const { + std::shared_lock lock(mutex_); + auto typeId = std::type_index(typeid(T)); + return services_.find(typeId) != services_.end() || + factories_.find(typeId) != factories_.end(); + } + + /** + * @brief 注销服务 + * @tparam T 服务接口类型 + */ + template void remove() { + std::unique_lock lock(mutex_); + auto typeId = std::type_index(typeid(T)); + + auto it = services_.find(typeId); + if (it != services_.end()) { + auto svc = it->second; + services_.erase(it); + + auto orderIt = + std::find(orderedServices_.begin(), orderedServices_.end(), svc); + if (orderIt != orderedServices_.end()) { + orderedServices_.erase(orderIt); + } + } + + factories_.erase(typeId); + } + + /** + * @brief 初始化所有已注册的服务 + * @return 所有服务初始化成功返回 true + */ + bool init(); + + /** + * @brief 关闭所有服务 + */ + void shutdown(); + + /** + * @brief 更新所有服务 + * @param dt 帧间隔时间 + */ + void update(f32 dt); + + /** + * @brief 暂停所有服务 + */ + void pause(); + + /** + * @brief 恢复所有服务 + */ + void resume(); + + /** + * @brief 获取所有服务(按优先级排序) + * @return 服务列表 + */ + std::vector> all() const; + + /** + * @brief 清空所有服务和工厂 + */ + void clear(); + + /** + * @brief 获取已注册服务数量 + * @return 服务数量 + */ + size_t size() const { return services_.size(); } + +private: + ServiceLocator() = default; + ~ServiceLocator() = default; + + /** + * @brief 按优先级排序服务 + */ + void sort(); + + mutable std::unordered_map> services_; + std::unordered_map()>> + factories_; + std::vector> orderedServices_; + mutable std::shared_mutex mutex_; +}; + +/** + * @brief 服务注册器 + * 用于静态注册服务 + */ +template class ServiceRegistrar { +public: + explicit ServiceRegistrar(ServiceFactory fn = nullptr) { + if (fn) { + ServiceLocator::instance().setFactory(fn); + } else { + ServiceLocator::instance().setFactory( + []() -> Ref { + return ptr::make(); + }); + } + } +}; + +/** + * @brief 服务注册元数据模板 + * 使用模板元编程实现编译期服务注册 + * 通过静态成员变量的初始化触发注册 + */ +template struct ServiceAutoReg { + /** + * @brief 注册标记,访问此变量时触发服务注册 + */ + static const bool registered; + + /** + * @brief 执行实际的服务注册 + * @return true 表示注册成功 + */ + static bool doRegister() { + ::extra2d::ServiceLocator::instance().setFactory( + []() -> ::extra2d::Ref { + return ::extra2d::ptr::make(); + }); + return true; + } +}; + +// 静态成员定义,在此处触发注册 +template +const bool ServiceAutoReg::registered = + ServiceAutoReg::doRegister(); + +/** + * @brief 服务注册元数据(带自定义工厂) + */ +template struct ServiceAutoRegFactory { + template struct Impl { + static const bool registered; + + static bool doRegister(Factory fn) { + ::extra2d::ServiceLocator::instance().setFactory(fn); + return true; + } + }; +}; + +template +template +const bool ServiceAutoRegFactory::Impl::registered = + ServiceAutoRegFactory::Impl::doRegister(Factory{}); + +/** + * @brief 自动注册服务宏(元数据驱动) + * 在服务实现类中使用,通过模板元编程实现自动注册 + * 比静态对象更可靠,不易被编译器优化 + */ +#define E2D_AUTO_REGISTER_SERVICE(Interface, Implementation) \ + static inline const bool E2D_CONCAT(_service_reg_, __LINE__) = \ + ServiceAutoReg::registered + +/** + * @brief 带自定义工厂的自动注册服务宏(元数据驱动) + */ +#define E2D_AUTO_REGISTER_SERVICE_FACTORY(Interface, Factory) \ + static inline const bool E2D_CONCAT(_service_factory_reg_, __LINE__) = \ + ServiceAutoRegFactory::Impl::registered + +} // namespace extra2d diff --git a/Extra2D/include/extra2d/core/service_registry.h b/Extra2D/include/extra2d/core/service_registry.h new file mode 100644 index 0000000..3a855f2 --- /dev/null +++ b/Extra2D/include/extra2d/core/service_registry.h @@ -0,0 +1,137 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace extra2d { + +/** + * @brief 服务注册信息 + */ +struct ServiceRegistration { + std::string name; + ServicePriority priority; + std::function()> factory; + bool enabled = true; +}; + +/** + * @brief 服务注册表 + * 管理服务的注册信息,支持延迟创建和配置 + */ +class ServiceRegistry { +public: + /** + * @brief 获取单例实例 + * @return 服务注册表实例引用 + */ + static ServiceRegistry& instance(); + + ServiceRegistry(const ServiceRegistry&) = delete; + ServiceRegistry& operator=(const ServiceRegistry&) = delete; + + /** + * @brief 注册服务 + * @tparam T 服务接口类型 + * @tparam Impl 服务实现类型 + * @param name 服务名称 + * @param priority 服务优先级 + */ + template + void add(const std::string& name, ServicePriority priority) { + static_assert(std::is_base_of_v, + "T must derive from IService"); + static_assert(std::is_base_of_v, + "Impl must derive from T"); + + ServiceRegistration reg; + reg.name = name; + reg.priority = priority; + reg.factory = []() -> Ref { + return std::static_pointer_cast(ptr::make()); + }; + registrations_.push_back(reg); + } + + /** + * @brief 注册服务(带工厂函数) + * @tparam T 服务接口类型 + * @param name 服务名称 + * @param priority 服务优先级 + * @param factory 工厂函数 + */ + template + void addWithFactory( + const std::string& name, + ServicePriority priority, + std::function()> factory) { + static_assert(std::is_base_of_v, + "T must derive from IService"); + + ServiceRegistration reg; + reg.name = name; + reg.priority = priority; + reg.factory = [factory]() -> Ref { + return std::static_pointer_cast(factory()); + }; + registrations_.push_back(reg); + } + + /** + * @brief 启用/禁用服务 + * @param name 服务名称 + * @param enabled 是否启用 + */ + void setEnabled(const std::string& name, bool enabled); + + /** + * @brief 创建所有已注册的服务 + * 并注册到 ServiceLocator + */ + void createAll(); + + /** + * @brief 获取所有注册信息 + * @return 注册信息列表 + */ + const std::vector& all() const { + return registrations_; + } + + /** + * @brief 清空所有注册 + */ + void clear() { + registrations_.clear(); + } + +private: + ServiceRegistry() = default; + ~ServiceRegistry() = default; + + std::vector registrations_; +}; + +/** + * @brief 自动服务注册器 + * 在全局作用域使用,自动注册服务 + */ +template +class AutoServiceRegistrar { +public: + AutoServiceRegistrar(const std::string& name, ServicePriority priority) { + ServiceRegistry::instance().add( + name, priority); + } +}; + +} + +#define E2D_REGISTER_SERVICE_AUTO(Interface, Implementation, Name, Priority) \ + namespace { \ + static ::extra2d::AutoServiceRegistrar \ + E2D_CONCAT(auto_service_registrar_, __LINE__)(Name, Priority); \ + } diff --git a/Extra2D/include/extra2d/core/string.h b/Extra2D/include/extra2d/core/string.h deleted file mode 100644 index ddd51f5..0000000 --- a/Extra2D/include/extra2d/core/string.h +++ /dev/null @@ -1,209 +0,0 @@ -#pragma once - -#include - -namespace extra2d { - -// ============================================================================ -// 字符串编码转换工具函数 -// 统一使用 std::string (UTF-8) 作为项目标准字符串类型 -// ============================================================================ - -// UTF-8 ↔ UTF-16 转换 -std::u16string utf8ToUtf16(const std::string& utf8); -std::string utf16ToUtf8(const std::u16string& utf16); - -// UTF-8 ↔ UTF-32 转换 -std::u32string utf8ToUtf32(const std::string& utf8); -std::string utf32ToUtf8(const std::u32string& utf32); - -// UTF-8 ↔ Wide String 转换 -std::wstring utf8ToWide(const std::string& utf8); -std::string wideToUtf8(const std::wstring& wide); - -// UTF-8 ↔ GBK/GB2312 转换(Windows 中文系统常用) -std::string utf8ToGbk(const std::string& utf8); -std::string gbkToUtf8(const std::string& gbk); - -// ============================================================================ -// 内联实现 -// ============================================================================ - -inline std::u16string utf8ToUtf16(const std::string& utf8) { - if (utf8.empty()) return std::u16string(); - - // UTF-8 → UTF-32 → UTF-16 (with surrogate pairs) - std::u32string u32 = utf8ToUtf32(utf8); - std::u16string result; - result.reserve(u32.size()); - - for (char32_t ch : u32) { - if (ch <= 0xFFFF) { - result.push_back(static_cast(ch)); - } else if (ch <= 0x10FFFF) { - // Surrogate pair - ch -= 0x10000; - result.push_back(static_cast(0xD800 | (ch >> 10))); - result.push_back(static_cast(0xDC00 | (ch & 0x3FF))); - } - } - - return result; -} - -inline std::string utf16ToUtf8(const std::u16string& utf16) { - if (utf16.empty()) return std::string(); - - // UTF-16 → UTF-32 → UTF-8 - std::u32string u32; - u32.reserve(utf16.size()); - - for (size_t i = 0; i < utf16.size(); ++i) { - char16_t cu = utf16[i]; - char32_t ch; - if (cu >= 0xD800 && cu <= 0xDBFF && i + 1 < utf16.size()) { - // High surrogate - char16_t cl = utf16[i + 1]; - if (cl >= 0xDC00 && cl <= 0xDFFF) { - ch = 0x10000 + ((static_cast(cu - 0xD800) << 10) | - (cl - 0xDC00)); - ++i; - } else { - ch = cu; // Invalid, pass through - } - } else { - ch = cu; - } - u32.push_back(ch); - } - - return utf32ToUtf8(u32); -} - -inline std::u32string utf8ToUtf32(const std::string& utf8) { - std::u32string result; - result.reserve(utf8.size()); - - const char* ptr = utf8.c_str(); - const char* end = ptr + utf8.size(); - - while (ptr < end) { - char32_t ch = 0; - unsigned char byte = static_cast(*ptr); - - if ((byte & 0x80) == 0) { - // 1-byte sequence - ch = byte; - ptr += 1; - } else if ((byte & 0xE0) == 0xC0) { - // 2-byte sequence - ch = (byte & 0x1F) << 6; - ch |= (static_cast(ptr[1]) & 0x3F); - ptr += 2; - } else if ((byte & 0xF0) == 0xE0) { - // 3-byte sequence - ch = (byte & 0x0F) << 12; - ch |= (static_cast(ptr[1]) & 0x3F) << 6; - ch |= (static_cast(ptr[2]) & 0x3F); - ptr += 3; - } else if ((byte & 0xF8) == 0xF0) { - // 4-byte sequence - ch = (byte & 0x07) << 18; - ch |= (static_cast(ptr[1]) & 0x3F) << 12; - ch |= (static_cast(ptr[2]) & 0x3F) << 6; - ch |= (static_cast(ptr[3]) & 0x3F); - ptr += 4; - } else { - // Invalid UTF-8, skip - ptr += 1; - continue; - } - - result.push_back(ch); - } - - return result; -} - -inline std::string utf32ToUtf8(const std::u32string& utf32) { - std::string result; - - for (char32_t ch : utf32) { - if (ch <= 0x7F) { - // 1-byte - result.push_back(static_cast(ch)); - } else if (ch <= 0x7FF) { - // 2-byte - result.push_back(static_cast(0xC0 | ((ch >> 6) & 0x1F))); - result.push_back(static_cast(0x80 | (ch & 0x3F))); - } else if (ch <= 0xFFFF) { - // 3-byte - result.push_back(static_cast(0xE0 | ((ch >> 12) & 0x0F))); - result.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); - result.push_back(static_cast(0x80 | (ch & 0x3F))); - } else if (ch <= 0x10FFFF) { - // 4-byte - result.push_back(static_cast(0xF0 | ((ch >> 18) & 0x07))); - result.push_back(static_cast(0x80 | ((ch >> 12) & 0x3F))); - result.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); - result.push_back(static_cast(0x80 | (ch & 0x3F))); - } - } - - return result; -} - -inline std::wstring utf8ToWide(const std::string& utf8) { - if (utf8.empty()) return std::wstring(); - - if constexpr (sizeof(wchar_t) == 4) { - // wchar_t is 32-bit (Linux/Switch): same as UTF-32 - std::u32string u32 = utf8ToUtf32(utf8); - return std::wstring(u32.begin(), u32.end()); - } else { - // wchar_t is 16-bit (Windows): same as UTF-16 - std::u16string u16 = utf8ToUtf16(utf8); - return std::wstring(u16.begin(), u16.end()); - } -} - -inline std::string wideToUtf8(const std::wstring& wide) { - if (wide.empty()) return std::string(); - - if constexpr (sizeof(wchar_t) == 4) { - std::u32string u32(wide.begin(), wide.end()); - return utf32ToUtf8(u32); - } else { - std::u16string u16(wide.begin(), wide.end()); - return utf16ToUtf8(u16); - } -} - -// GBK/GB2312 转换(Windows 平台实现) -// 注意:Windows 实现在 .cpp 文件中,避免头文件包含 windows.h 导致冲突 -#ifdef _WIN32 -// 前向声明,实现在 .cpp 文件中 -std::string utf8ToGbkImpl(const std::string& utf8); -std::string gbkToUtf8Impl(const std::string& gbk); - -inline std::string utf8ToGbk(const std::string& utf8) { - return utf8ToGbkImpl(utf8); -} - -inline std::string gbkToUtf8(const std::string& gbk) { - return gbkToUtf8Impl(gbk); -} -#else -// 非 Windows 平台,GBK 转换使用 iconv 或返回原字符串 -inline std::string utf8ToGbk(const std::string& utf8) { - // TODO: 使用 iconv 实现 - return utf8; -} - -inline std::string gbkToUtf8(const std::string& gbk) { - // TODO: 使用 iconv 实现 - return gbk; -} -#endif - -} // namespace extra2d diff --git a/Extra2D/include/extra2d/core/types.h b/Extra2D/include/extra2d/core/types.h index fb75de7..1afc74f 100644 --- a/Extra2D/include/extra2d/core/types.h +++ b/Extra2D/include/extra2d/core/types.h @@ -7,41 +7,55 @@ namespace extra2d { +// --------------------------------------------------------------------------- +// 宏定义 +// --------------------------------------------------------------------------- +#define E2D_CONCAT_IMPL(a, b) a##b +#define E2D_CONCAT(a, b) E2D_CONCAT_IMPL(a, b) + // --------------------------------------------------------------------------- // 智能指针别名 // --------------------------------------------------------------------------- -template using Ptr = std::shared_ptr; +template using Ref = std::shared_ptr; +template using Unique = std::unique_ptr; +template using Weak = std::weak_ptr; -template using UniquePtr = std::unique_ptr; +/// 智能指针工厂函数命名空间 +namespace ptr { + /// 创建 Ref 的便捷函数 + template inline Ref make(Args &&...args) { + return std::make_shared(std::forward(args)...); + } -template using WeakPtr = std::weak_ptr; - -/// 创建 shared_ptr 的便捷函数 -template inline Ptr makePtr(Args &&...args) { - return std::make_shared(std::forward(args)...); -} - -/// 创建 unique_ptr 的便捷函数 -template -inline UniquePtr makeUnique(Args &&...args) { - return std::make_unique(std::forward(args)...); + /// 创建 Unique 的便捷函数 + template + inline Unique makeUnique(Args &&...args) { + return std::make_unique(std::forward(args)...); + } } // --------------------------------------------------------------------------- // 函数别名 // --------------------------------------------------------------------------- -template using Function = std::function; +template using Fn = std::function; // --------------------------------------------------------------------------- // 基础类型别名 // --------------------------------------------------------------------------- -using int8 = std::int8_t; -using int16 = std::int16_t; -using int32 = std::int32_t; -using int64 = std::int64_t; -using uint8 = std::uint8_t; -using uint16 = std::uint16_t; -using uint32 = std::uint32_t; -using uint64 = std::uint64_t; +using i8 = int8_t; +using i16 = int16_t; +using i32 = int32_t; +using i64 = int64_t; +using u8 = uint8_t; +using u16 = uint16_t; +using u32 = uint32_t; +using u64 = uint64_t; +using f32 = float; +using f64 = double; + +// --------------------------------------------------------------------------- +// ID 类型别名 +// --------------------------------------------------------------------------- +using ID = u32; } // namespace extra2d diff --git a/Extra2D/include/extra2d/event/event.h b/Extra2D/include/extra2d/event/event.h index 45eb403..92e00a6 100644 --- a/Extra2D/include/extra2d/event/event.h +++ b/Extra2D/include/extra2d/event/event.h @@ -59,70 +59,70 @@ enum class EventType { // 键盘事件数据 // ============================================================================ struct KeyEvent { - int keyCode; - int scancode; - int mods; // 修饰键 (Shift, Ctrl, Alt, etc.) + i32 key; + i32 scancode; + i32 mods; // 修饰键 (Shift, Ctrl, Alt, etc.) }; // ============================================================================ // 鼠标事件数据 // ============================================================================ struct MouseButtonEvent { - int button; - int mods; - Vec2 position; + i32 button; + i32 mods; + Vec2 pos; }; struct MouseMoveEvent { - Vec2 position; + Vec2 pos; Vec2 delta; }; struct MouseScrollEvent { Vec2 offset; - Vec2 position; + Vec2 pos; }; // ============================================================================ // 窗口事件数据 // ============================================================================ struct WindowResizeEvent { - int width; - int height; + i32 w; + i32 h; }; struct WindowMoveEvent { - int x; - int y; + i32 x; + i32 y; }; // ============================================================================ // 游戏手柄事件数据 // ============================================================================ struct GamepadButtonEvent { - int gamepadId; - int button; + i32 gamepadId; + i32 button; }; struct GamepadAxisEvent { - int gamepadId; - int axis; - float value; + i32 gamepadId; + i32 axis; + f32 value; }; // ============================================================================ // 触摸事件数据 // ============================================================================ struct TouchEvent { - int touchId; - Vec2 position; + i32 touchId; + Vec2 pos; }; // ============================================================================ // 自定义事件数据 // ============================================================================ struct CustomEvent { - uint32_t id; + u32 id; void *data; }; @@ -131,7 +131,7 @@ struct CustomEvent { // ============================================================================ struct Event { EventType type = EventType::None; - double timestamp = 0.0; + f64 timestamp = 0.0; bool handled = false; // 事件数据联合体 @@ -141,32 +141,32 @@ struct Event { data; // 便捷访问方法 - bool isWindowEvent() const { + bool window() const { return type == EventType::WindowClose || type == EventType::WindowResize || type == EventType::WindowFocus || type == EventType::WindowLostFocus || type == EventType::WindowMoved; } - bool isKeyboardEvent() const { + bool keyboard() const { return type == EventType::KeyPressed || type == EventType::KeyReleased || type == EventType::KeyRepeat; } - bool isMouseEvent() const { + bool mouse() const { return type == EventType::MouseButtonPressed || type == EventType::MouseButtonReleased || type == EventType::MouseMoved || type == EventType::MouseScrolled; } // 静态工厂方法 - static Event createWindowResize(int width, int height); - static Event createWindowClose(); - static Event createKeyPress(int keyCode, int scancode, int mods); - static Event createKeyRelease(int keyCode, int scancode, int mods); - static Event createMouseButtonPress(int button, int mods, const Vec2 &pos); - static Event createMouseButtonRelease(int button, int mods, const Vec2 &pos); - static Event createMouseMove(const Vec2 &pos, const Vec2 &delta); - static Event createMouseScroll(const Vec2 &offset, const Vec2 &pos); + static Event windowResize(i32 w, i32 h); + static Event windowClose(); + static Event keyPress(i32 key, i32 scancode, i32 mods); + static Event keyRelease(i32 key, i32 scancode, i32 mods); + static Event mousePress(i32 btn, i32 mods, Vec2 pos); + static Event mouseRelease(i32 btn, i32 mods, Vec2 pos); + static Event mouseMove(Vec2 pos, Vec2 delta); + static Event mouseScroll(Vec2 offset, Vec2 pos); }; } // namespace extra2d diff --git a/Extra2D/include/extra2d/event/event_dispatcher.h b/Extra2D/include/extra2d/event/event_dispatcher.h index 1ddff8a..4d01f25 100644 --- a/Extra2D/include/extra2d/event/event_dispatcher.h +++ b/Extra2D/include/extra2d/event/event_dispatcher.h @@ -11,46 +11,46 @@ namespace extra2d { // ============================================================================ // 事件监听器 ID // ============================================================================ -using ListenerId = uint64_t; +using ListenerID = u64; // ============================================================================ // 事件分发器 // ============================================================================ class EventDispatcher { public: - using EventCallback = std::function; + using EventFn = Fn; EventDispatcher(); ~EventDispatcher() = default; // 添加监听器 - ListenerId addListener(EventType type, EventCallback callback); + ListenerID on(EventType type, EventFn fn); // 移除监听器 - void removeListener(ListenerId id); - void removeAllListeners(EventType type); - void removeAllListeners(); + void off(ListenerID id); + void offAll(EventType type); + void offAll(); // 分发事件 void dispatch(Event &event); void dispatch(const Event &event); // 处理事件队列 - void processQueue(class EventQueue &queue); + void process(class EventQueue &queue); // 统计 - size_t getListenerCount(EventType type) const; - size_t getTotalListenerCount() const; + size_t listenerCount(EventType type) const; + size_t totalListeners() const; private: struct Listener { - ListenerId id; + ListenerID id; EventType type; - EventCallback callback; + EventFn fn; }; std::unordered_map> listeners_; - ListenerId nextId_; + ListenerID nextId_; }; } // namespace extra2d diff --git a/Extra2D/include/extra2d/event/event_queue.h b/Extra2D/include/extra2d/event/event_queue.h index 5ef03bd..2c01fab 100644 --- a/Extra2D/include/extra2d/event/event_queue.h +++ b/Extra2D/include/extra2d/event/event_queue.h @@ -1,9 +1,9 @@ #pragma once +#include #include #include #include -#include namespace extra2d { @@ -12,29 +12,32 @@ namespace extra2d { // ============================================================================ class EventQueue { public: - EventQueue(); - ~EventQueue() = default; + static constexpr size_t DEFAULT_CAPACITY = 1024; - // 添加事件到队列 - void push(const Event &event); - void push(Event &&event); + EventQueue(); + ~EventQueue() = default; - // 从队列取出事件 - bool poll(Event &event); + // 添加事件到队列 + bool push(const Event &event); + bool push(Event &&event); - // 查看队列头部事件(不移除) - bool peek(Event &event) const; + // 从队列取出事件 + bool poll(Event &event); - // 清空队列 - void clear(); + // 查看队列头部事件(不移除) + bool peek(Event &event) const; - // 队列状态 - bool empty() const; - size_t size() const; + // 清空队列 + void clear(); + + // 队列状态 + bool empty() const; + size_t size() const; + size_t capacity() const { return buffer_.capacity(); } private: - std::queue queue_; - mutable std::mutex mutex_; + RingBuffer buffer_; + mutable std::mutex mutex_; // 用于peek和clear的互斥 }; } // namespace extra2d diff --git a/Extra2D/include/extra2d/extra2d.h b/Extra2D/include/extra2d/extra2d.h index dfc2679..f384383 100644 --- a/Extra2D/include/extra2d/extra2d.h +++ b/Extra2D/include/extra2d/extra2d.h @@ -1,56 +1,43 @@ #pragma once -// Easy2D v3.0 - 统一入口头文件 +// Extra2D - 统一入口头文件 // 包含所有公共 API // Core -#include -#include #include #include +#include +#include +#include -// Window - SDL2 + OpenGL -#include - -// Graphics -#include -#include -#include -#include -#include -#include - -// Scene -#include -#include -#include -#include -#include +// Platform +#include +#include +#include // Event #include -#include #include -#include - -// Audio -#include -#include - -// Resource -#include +#include // Utils -#include -#include -#include #include +#include -// Spatial -#include -#include -#include -#include +// Services +#include +#include +#include +#include + +// Asset +#include +#include +#include +#include +#include +#include +#include // Application #include diff --git a/Extra2D/include/extra2d/services/asset_service.h b/Extra2D/include/extra2d/services/asset_service.h new file mode 100644 index 0000000..e4e4982 --- /dev/null +++ b/Extra2D/include/extra2d/services/asset_service.h @@ -0,0 +1,334 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// IAssetService - 资源服务接口 +// --------------------------------------------------------------------------- + +/** + * @brief 资源服务接口 + * + * 提供资源加载、缓存、异步加载等功能。 + * 使用模板方法支持类型安全的资源加载。 + */ +class IAssetService : public IService { +public: + virtual ~IAssetService() = default; + + /** + * @brief 同步加载资源 + * @tparam T 资源类型 + * @param path 资源路径 + * @return 资源句柄 + */ + template AssetHandle load(const std::string &path) { + static_assert(std::is_base_of_v, "T must derive from Asset"); + return AssetHandle(loadImpl(AssetID(path), typeid(T))); + } + + /** + * @brief 异步加载资源 + * @tparam T 资源类型 + * @param path 资源路径 + * @param callback 加载完成回调 + */ + template + void loadAsync(const std::string &path, AssetLoadCallback callback) { + static_assert(std::is_base_of_v, "T must derive from Asset"); + loadAsyncImpl(AssetID(path), typeid(T), + [cb = std::move(callback)](AssetHandleBase handle) { + cb(AssetHandle(handle)); + }); + } + + /** + * @brief 获取已缓存的资源 + * @tparam T 资源类型 + * @param path 资源路径 + * @return 资源句柄,不存在返回空句柄 + */ + template AssetHandle get(const std::string &path) { + static_assert(std::is_base_of_v, "T must derive from Asset"); + return AssetHandle(getImpl(AssetID(path), typeid(T))); + } + + /** + * @brief 预加载资源(后台加载,不返回句柄) + * @tparam T 资源类型 + * @param path 资源路径 + */ + template void preload(const std::string &path) { + static_assert(std::is_base_of_v, "T must derive from Asset"); + preloadImpl(AssetID(path), typeid(T)); + } + + /** + * @brief 检查资源是否已加载 + * @param path 资源路径 + * @return 已加载返回 true + */ + virtual bool isLoaded(const std::string &path) const = 0; + + /** + * @brief 检查资源是否正在加载 + * @param path 资源路径 + * @return 正在加载返回 true + */ + virtual bool isLoading(const std::string &path) const = 0; + + /** + * @brief 卸载资源 + * @param path 资源路径 + */ + virtual void unload(const std::string &path) = 0; + + /** + * @brief 设置缓存上限 + * @param maxBytes 最大字节数 + */ + virtual void setLimit(size_t maxBytes) = 0; + + /** + * @brief 获取当前缓存大小 + * @return 缓存字节数 + */ + virtual size_t size() const = 0; + + /** + * @brief 清理无引用资源 + */ + virtual void purge() = 0; + + /** + * @brief 清空所有缓存 + */ + virtual void clear() = 0; + + /** + * @brief 获取缓存统计信息 + * @return 缓存统计 + */ + virtual CacheStats stats() const = 0; + + /** + * @brief 注册加载器 + * @tparam T 资源类型 + * @param loader 加载器实例 + */ + template void registerLoader(Unique> loader) { + static_assert(std::is_base_of_v, "T must derive from Asset"); + registerLoaderImpl(typeid(T), std::move(loader)); + } + + /** + * @brief 挂载资源包 + * @param path 资源包路径 + * @return 成功返回 true + */ + virtual bool mount(const std::string &path) = 0; + + /** + * @brief 卸载资源包 + * @param path 资源包路径 + */ + virtual void unmount(const std::string &path) = 0; + + /** + * @brief 设置数据处理管道 + * @param pipe 处理管道 + */ + virtual void setPipe(DataPipe pipe) = 0; + + /** + * @brief 设置资源根目录 + * @param path 根目录路径 + */ + virtual void setRoot(const std::string &path) = 0; + + /** + * @brief 获取资源根目录 + * @return 根目录路径 + */ + virtual std::string root() const = 0; + + /** + * @brief 处理异步加载完成回调(在主线程调用) + */ + virtual void process() = 0; + +protected: + /** + * @brief 同步加载实现 + */ + virtual AssetHandleBase loadImpl(const AssetID &id, std::type_index type) = 0; + + /** + * @brief 异步加载实现 + */ + virtual void loadAsyncImpl(const AssetID &id, std::type_index type, + std::function callback) = 0; + + /** + * @brief 获取资源实现 + */ + virtual AssetHandleBase getImpl(const AssetID &id, std::type_index type) = 0; + + /** + * @brief 预加载实现 + */ + virtual void preloadImpl(const AssetID &id, std::type_index type) = 0; + + /** + * @brief 注册加载器实现 + */ + virtual void registerLoaderImpl(std::type_index type, + Unique loader) = 0; +}; + +// --------------------------------------------------------------------------- +// AssetService - 资源服务实现 +// --------------------------------------------------------------------------- + +/** + * @brief 资源服务实现 + * + * 实现资源加载、缓存、异步加载等功能。 + * 使用线程池处理异步加载任务。 + */ +class AssetService : public IAssetService { +public: + AssetService(); + ~AssetService() override; + + ServiceInfo info() const override { + ServiceInfo i; + i.name = "AssetService"; + i.priority = ServicePriority::Resource; + i.enabled = true; + return i; + } + + bool init() override; + void shutdown() override; + + bool isLoaded(const std::string &path) const override; + bool isLoading(const std::string &path) const override; + void unload(const std::string &path) override; + + void setLimit(size_t maxBytes) override; + size_t size() const override; + void purge() override; + void clear() override; + CacheStats stats() const override; + + bool mount(const std::string &path) override; + void unmount(const std::string &path) override; + void setPipe(DataPipe pipe) override; + void setRoot(const std::string &path) override; + std::string root() const override; + + void process() override; + +protected: + AssetHandleBase loadImpl(const AssetID &id, std::type_index type) override; + void loadAsyncImpl(const AssetID &id, std::type_index type, + std::function callback) override; + AssetHandleBase getImpl(const AssetID &id, std::type_index type) override; + void preloadImpl(const AssetID &id, std::type_index type) override; + void registerLoaderImpl(std::type_index type, + Unique loader) override; + +private: + struct LoadTask { + AssetID id; + std::type_index type = typeid(void); + std::function callback; + }; + + struct LoadedAsset { + Ref asset; + std::type_index type = typeid(void); + }; + + std::string root_; + Unique cache_; + PackManager packManager_; + DataPipe pipe_; + + mutable std::shared_mutex mutex_; + std::unordered_map assets_; + std::unordered_map states_; + std::unordered_map> loaders_; + + std::thread workerThread_; + std::queue taskQueue_; + std::mutex taskMutex_; + std::condition_variable taskCv_; + std::atomic running_{false}; + + std::queue> callbackQueue_; + std::mutex callbackMutex_; + + /** + * @brief 工作线程函数 + */ + void workerFunc(); + + /** + * @brief 从文件系统加载资源 + * @param id 资源ID + * @param type 资源类型 + * @return 加载的资源 + */ + Ref loadFromFile(const AssetID &id, std::type_index type); + + /** + * @brief 从资源包加载资源 + * @param id 资源ID + * @param type 资源类型 + * @return 加载的资源 + */ + Ref loadFromPack(const AssetID &id, std::type_index type); + + /** + * @brief 获取加载器 + * @param type 资源类型 + * @return 加载器指针 + */ + AssetLoaderBase *getLoader(std::type_index type); + + /** + * @brief 根据路径推断资源类型 + * @param path 资源路径 + * @return 资源类型索引 + */ + std::type_index inferType(const std::string &path); + + E2D_AUTO_REGISTER_SERVICE(IAssetService, AssetService); +}; + +} // namespace extra2d diff --git a/Extra2D/include/extra2d/services/event_service.h b/Extra2D/include/extra2d/services/event_service.h new file mode 100644 index 0000000..ae60d64 --- /dev/null +++ b/Extra2D/include/extra2d/services/event_service.h @@ -0,0 +1,77 @@ +#pragma once + +#include +#include +#include +#include + +namespace extra2d { + +/** + * @brief 事件服务接口 + */ +class IEventService : public IService { +public: + virtual ~IEventService() = default; + + virtual void push(const Event& event) = 0; + virtual void push(Event&& event) = 0; + virtual bool poll(Event& event) = 0; + + virtual ListenerID on(EventType type, EventDispatcher::EventFn fn) = 0; + virtual void off(ListenerID id) = 0; + virtual void offAll(EventType type) = 0; + virtual void offAll() = 0; + + virtual void dispatch(Event& event) = 0; + virtual void process() = 0; + + virtual size_t listenerCount(EventType type) const = 0; + virtual size_t totalListeners() const = 0; + virtual size_t queueSize() const = 0; +}; + +/** + * @brief 事件服务实现 + */ +class EventService : public IEventService { +public: + EventService(); + ~EventService() override = default; + + ServiceInfo info() const override; + + bool init() override; + void shutdown() override; + void update(f32 dt) override; + + void push(const Event& event) override; + void push(Event&& event) override; + bool poll(Event& event) override; + + ListenerID on(EventType type, EventDispatcher::EventFn fn) override; + void off(ListenerID id) override; + void offAll(EventType type) override; + void offAll() override; + + void dispatch(Event& event) override; + void process() override; + + size_t listenerCount(EventType type) const override; + size_t totalListeners() const override; + size_t queueSize() const override; + + EventQueue& queue() { return queue_; } + const EventQueue& queue() const { return queue_; } + EventDispatcher& dispatcher() { return dispatcher_; } + const EventDispatcher& dispatcher() const { return dispatcher_; } + +private: + EventQueue queue_; + EventDispatcher dispatcher_; + + // 服务注册元数据 + E2D_AUTO_REGISTER_SERVICE(IEventService, EventService); +}; + +} diff --git a/Extra2D/include/extra2d/services/logger_service.h b/Extra2D/include/extra2d/services/logger_service.h new file mode 100644 index 0000000..a3333ad --- /dev/null +++ b/Extra2D/include/extra2d/services/logger_service.h @@ -0,0 +1,310 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace extra2d { + +/** + * @brief 日志颜色结构 + */ +struct LogColor { + u8 r, g, b; + + constexpr LogColor() : r(255), g(255), b(255) {} + constexpr LogColor(u8 r, u8 g, u8 b) : r(r), g(g), b(b) {} + + static constexpr LogColor White() { return LogColor(255, 255, 255); } + static constexpr LogColor Gray() { return LogColor(128, 128, 128); } + static constexpr LogColor Red() { return LogColor(255, 85, 85); } + static constexpr LogColor Green() { return LogColor(85, 255, 85); } + static constexpr LogColor Yellow() { return LogColor(255, 255, 85); } + static constexpr LogColor Blue() { return LogColor(85, 85, 255); } + static constexpr LogColor Magenta() { return LogColor(255, 85, 255); } + static constexpr LogColor Cyan() { return LogColor(85, 255, 255); } + static constexpr LogColor Orange() { return LogColor(255, 165, 0); } + + static constexpr LogColor Slate() { return LogColor(100, 116, 139); } + static constexpr LogColor SlateLight() { return LogColor(148, 163, 184); } + static constexpr LogColor Sky() { return LogColor(14, 165, 233); } + static constexpr LogColor SkyLight() { return LogColor(125, 211, 252); } + static constexpr LogColor Emerald() { return LogColor(16, 185, 129); } + static constexpr LogColor EmeraldLight() { return LogColor(110, 231, 183); } + static constexpr LogColor Amber() { return LogColor(245, 158, 11); } + static constexpr LogColor AmberLight() { return LogColor(252, 211, 77); } + static constexpr LogColor Rose() { return LogColor(244, 63, 94); } + static constexpr LogColor RoseLight() { return LogColor(253, 164, 175); } + static constexpr LogColor Violet() { return LogColor(139, 92, 246); } + static constexpr LogColor VioletLight() { return LogColor(196, 181, 253); } + static constexpr LogColor Indigo() { return LogColor(99, 102, 241); } + static constexpr LogColor IndigoLight() { return LogColor(165, 180, 252); } +}; + +/** + * @brief 日志级别枚举 + */ +enum class LogLevel { + Trace = 0, + Debug = 1, + Info = 2, + Registry = 3, + Warn = 4, + Error = 5, + Fatal = 6, + Off = 7 +}; + +/** + * @brief 日志服务接口 + */ +class ILogger : public IService { +public: + virtual ~ILogger() = default; + + /** + * @brief 设置日志级别 + */ + virtual void level(LogLevel lvl) = 0; + + /** + * @brief 获取日志级别 + */ + virtual LogLevel level() const = 0; + + /** + * @brief 检查日志级别是否启用 + */ + virtual bool enabled(LogLevel lvl) const = 0; + + /** + * @brief 记录日志(格式化) + */ + virtual void log(LogLevel lvl, const char *fmt, ...) = 0; + + /** + * @brief 记录日志(字符串) + */ + virtual void log(LogLevel lvl, const std::string &msg) = 0; + + /** + * @brief Trace级别日志 + */ + virtual void trace(const char *fmt, ...) = 0; + + /** + * @brief Debug级别日志 + */ + virtual void debug(const char *fmt, ...) = 0; + + /** + * @brief Info级别日志 + */ + virtual void info(const char *fmt, ...) = 0; + + /** + * @brief Registry级别日志(用于模块/服务注册显示) + */ + virtual void registry(const char *fmt, ...) = 0; + + /** + * @brief Warn级别日志 + */ + virtual void warn(const char *fmt, ...) = 0; + + /** + * @brief Error级别日志 + */ + virtual void error(const char *fmt, ...) = 0; + + /** + * @brief Fatal级别日志 + */ + virtual void fatal(const char *fmt, ...) = 0; + + /** + * @brief 设置日志级别颜色 + * @param lvl 日志级别 + * @param c 颜色 + */ + virtual void levelColor(LogLevel lvl, const LogColor &c) = 0; + + /** + * @brief 获取日志级别颜色 + * @param lvl 日志级别 + * @return 颜色 + */ + virtual LogColor levelColor(LogLevel lvl) const = 0; + + /** + * @brief 启用/禁用颜色输出 + * @param on 是否启用 + */ + virtual void colors(bool on) = 0; + + /** + * @brief 是否启用颜色输出 + */ + virtual bool colors() const = 0; + + ServiceInfo info() const override { + ServiceInfo i; + i.name = "Logger"; + i.priority = ServicePriority::Core; + i.enabled = true; + return i; + } +}; + +/** + * @brief 控制台日志服务实现 + */ +class ConsoleLogger : public ILogger { +public: + ConsoleLogger(); + ~ConsoleLogger() override; + + bool init() override; + void shutdown() override; + + void level(LogLevel lvl) override; + LogLevel level() const override; + bool enabled(LogLevel lvl) const override; + + void log(LogLevel lvl, const char *fmt, ...) override; + void log(LogLevel lvl, const std::string &msg) override; + + void trace(const char *fmt, ...) override; + void debug(const char *fmt, ...) override; + void info(const char *fmt, ...) override; + void registry(const char *fmt, ...) override; + void warn(const char *fmt, ...) override; + void error(const char *fmt, ...) override; + void fatal(const char *fmt, ...) override; + + void levelColor(LogLevel lvl, const LogColor &c) override; + LogColor levelColor(LogLevel lvl) const override; + void colors(bool on) override; + bool colors() const override; + +private: + void output(LogLevel lvl, const char *msg); + const char *levelString(LogLevel lvl); + std::string ansiColor(LogLevel lvl); + + LogLevel level_; + bool colors_; + LogColor levelColors_[7]; + class Impl; + Unique impl_; + + // 服务注册元数据 + E2D_AUTO_REGISTER_SERVICE(ILogger, ConsoleLogger); +}; + +} // namespace extra2d + +// 格式化辅助函数 - 将参数转换为字符串 +namespace extra2d { +namespace detail { +template std::string to_string(T &&value) { + using Decayed = std::decay_t; + if constexpr (std::is_same_v) { + return value; + } else if constexpr (std::is_same_v) { + return value ? value : "(null)"; + } else if constexpr (std::is_arithmetic_v) { + if constexpr (std::is_same_v) { + return value ? "true" : "false"; + } else if constexpr (std::is_floating_point_v) { + return std::to_string(value); + } else { + return std::to_string(value); + } + } else { + return ""; + } +} + +inline void format_impl(std::string &result, const char *fmt) { result += fmt; } + +template +void format_impl(std::string &result, const char *fmt, T &&value, + Args &&...args) { + const char *p = fmt; + while (*p) { + if (*p == '{' && *(p + 1) == '}') { + result += to_string(std::forward(value)); + format_impl(result, p + 2, std::forward(args)...); + return; + } + result += *p++; + } + result += " "; + result += to_string(std::forward(value)); + format_impl(result, p, std::forward(args)...); +} +} // namespace detail + +template +std::string format_str(const char *fmt, Args &&...args) { + if constexpr (sizeof...(args) == 0) { + return std::string(fmt); + } else { + std::string result; + detail::format_impl(result, fmt, std::forward(args)...); + return result; + } +} +} // namespace extra2d + +// 便捷宏 - 自动获取日志服务 +#define E2D_LOG(lvl, ...) \ + do { \ + if (auto logService = ::extra2d::ServiceLocator::instance() \ + .tryGet<::extra2d::ILogger>()) { \ + if (logService->enabled(lvl)) { \ + logService->log(lvl, ::extra2d::format_str(__VA_ARGS__)); \ + } \ + } \ + } while (0) + +#define E2D_TRACE(...) E2D_LOG(::extra2d::LogLevel::Trace, __VA_ARGS__) +#define E2D_DEBUG(...) E2D_LOG(::extra2d::LogLevel::Debug, __VA_ARGS__) +#define E2D_INFO(...) E2D_LOG(::extra2d::LogLevel::Info, __VA_ARGS__) +#define E2D_REGISTRY(...) E2D_LOG(::extra2d::LogLevel::Registry, __VA_ARGS__) +#define E2D_WARN(...) E2D_LOG(::extra2d::LogLevel::Warn, __VA_ARGS__) +#define E2D_ERROR(...) E2D_LOG(::extra2d::LogLevel::Error, __VA_ARGS__) +#define E2D_FATAL(...) E2D_LOG(::extra2d::LogLevel::Fatal, __VA_ARGS__) + +// 带颜色参数的日志宏 +#define E2D_LOG_COLOR(lvl, c, ...) \ + do { \ + if (auto logService = ::extra2d::ServiceLocator::instance() \ + .tryGet<::extra2d::ILogger>()) { \ + if (logService->enabled(lvl)) { \ + auto prevColor = logService->levelColor(lvl); \ + logService->levelColor(lvl, c); \ + logService->log(lvl, ::extra2d::format_str(__VA_ARGS__)); \ + logService->levelColor(lvl, prevColor); \ + } \ + } \ + } while (0) + +#define E2D_TRACE_COLOR(c, ...) \ + E2D_LOG_COLOR(::extra2d::LogLevel::Trace, c, __VA_ARGS__) +#define E2D_DEBUG_COLOR(c, ...) \ + E2D_LOG_COLOR(::extra2d::LogLevel::Debug, c, __VA_ARGS__) +#define E2D_INFO_COLOR(c, ...) \ + E2D_LOG_COLOR(::extra2d::LogLevel::Info, c, __VA_ARGS__) +#define E2D_REGISTRY_COLOR(c, ...) \ + E2D_LOG_COLOR(::extra2d::LogLevel::Registry, c, __VA_ARGS__) +#define E2D_WARN_COLOR(c, ...) \ + E2D_LOG_COLOR(::extra2d::LogLevel::Warn, c, __VA_ARGS__) +#define E2D_ERROR_COLOR(c, ...) \ + E2D_LOG_COLOR(::extra2d::LogLevel::Error, c, __VA_ARGS__) +#define E2D_FATAL_COLOR(c, ...) \ + E2D_LOG_COLOR(::extra2d::LogLevel::Fatal, c, __VA_ARGS__) diff --git a/Extra2D/include/extra2d/services/timer_service.h b/Extra2D/include/extra2d/services/timer_service.h new file mode 100644 index 0000000..c7b112b --- /dev/null +++ b/Extra2D/include/extra2d/services/timer_service.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include + +namespace extra2d { + +/** + * @brief 计时器服务接口 + */ +class ITimerService : public IService { +public: + virtual ~ITimerService() = default; + + virtual u32 add(f32 delay, Timer::Fn fn) = 0; + virtual u32 addRepeat(f32 interval, Timer::Fn fn) = 0; + virtual void cancel(u32 timerId) = 0; + virtual void pauseTimer(u32 timerId) = 0; + virtual void resumeTimer(u32 timerId) = 0; + virtual void clear() = 0; + virtual size_t count() const = 0; +}; + +/** + * @brief 计时器服务实现 + */ +class TimerService : public ITimerService { +public: + TimerService(); + ~TimerService() override = default; + + ServiceInfo info() const override; + + bool init() override; + void shutdown() override; + void update(f32 dt) override; + + u32 add(f32 delay, Timer::Fn fn) override; + u32 addRepeat(f32 interval, Timer::Fn fn) override; + void cancel(u32 timerId) override; + void pauseTimer(u32 timerId) override; + void resumeTimer(u32 timerId) override; + void clear() override; + size_t count() const override; + + TimerManager& mgr() { return mgr_; } + const TimerManager& mgr() const { return mgr_; } + +private: + TimerManager mgr_; + + // 服务注册元数据 + E2D_AUTO_REGISTER_SERVICE(ITimerService, TimerService); +}; + +} diff --git a/Extra2D/include/extra2d/utils/random.h b/Extra2D/include/extra2d/utils/random.h index 3909059..3c5e768 100644 --- a/Extra2D/include/extra2d/utils/random.h +++ b/Extra2D/include/extra2d/utils/random.h @@ -11,37 +11,37 @@ namespace extra2d { class Random { public: /// 获取单例实例 - static Random &getInstance(); + static Random &get(); /// 设置随机种子 - void setSeed(uint32 seed); + void seed(u32 s); /// 使用当前时间作为种子 void randomize(); /// 获取 [0, 1) 范围内的随机浮点数 - float getFloat(); + f32 randomF32(); /// 获取 [min, max] 范围内的随机浮点数 - float getFloat(float min, float max); + f32 randomF32(f32 min, f32 max); /// 获取 [0, max] 范围内的随机整数 - int getInt(int max); + i32 randomI32(i32 max); /// 获取 [min, max] 范围内的随机整数 - int getInt(int min, int max); + i32 randomI32(i32 min, i32 max); /// 获取随机布尔值 - bool getBool(); + bool boolean(); /// 获取随机布尔值(带概率) - bool getBool(float probability); + bool boolean(f32 probability); /// 获取指定范围内的随机角度(弧度) - float getAngle(); + f32 angle(); /// 获取 [-1, 1] 范围内的随机数(用于方向) - float getSigned(); + f32 signedF32(); private: Random(); @@ -59,27 +59,27 @@ private: // ============================================================================ /// 获取 [0, 1) 范围内的随机浮点数 -inline float randomFloat() { return Random::getInstance().getFloat(); } +inline f32 randF32() { return Random::get().randomF32(); } /// 获取 [min, max] 范围内的随机浮点数 -inline float randomFloat(float min, float max) { - return Random::getInstance().getFloat(min, max); +inline f32 randF32(f32 min, f32 max) { + return Random::get().randomF32(min, max); } /// 获取 [0, max] 范围内的随机整数 -inline int randomInt(int max) { return Random::getInstance().getInt(max); } +inline i32 randI32(i32 max) { return Random::get().randomI32(max); } /// 获取 [min, max] 范围内的随机整数 -inline int randomInt(int min, int max) { - return Random::getInstance().getInt(min, max); +inline i32 randI32(i32 min, i32 max) { + return Random::get().randomI32(min, max); } /// 获取随机布尔值 -inline bool randomBool() { return Random::getInstance().getBool(); } +inline bool randBool() { return Random::get().boolean(); } /// 获取随机布尔值(带概率) -inline bool randomBool(float probability) { - return Random::getInstance().getBool(probability); +inline bool randBool(f32 probability) { + return Random::get().boolean(probability); } } // namespace extra2d diff --git a/Extra2D/include/extra2d/utils/timer.h b/Extra2D/include/extra2d/utils/timer.h index e220a8b..2bb0514 100644 --- a/Extra2D/include/extra2d/utils/timer.h +++ b/Extra2D/include/extra2d/utils/timer.h @@ -16,12 +16,12 @@ public: using Clock = std::chrono::steady_clock; using TimePoint = Clock::time_point; using Duration = Clock::duration; - using Callback = Function; + using Fn = std::function; - Timer(float interval, bool repeat, Callback callback); + Timer(f32 interval, bool repeat, Fn fn); /// 更新计时器,返回 true 如果触发了回调 - bool update(float deltaTime); + bool update(f32 dt); /// 重置计时器 void reset(); @@ -36,27 +36,27 @@ public: void cancel(); /// 是否有效 - bool isValid() const { return valid_; } + bool valid() const { return valid_; } /// 是否暂停 - bool isPaused() const { return paused_; } + bool paused() const { return paused_; } /// 获取剩余时间(秒) - float getRemaining() const; + f32 remaining() const; /// 获取唯一ID - uint32 getId() const { return id_; } + u32 id() const { return id_; } private: - uint32 id_; - float interval_; - float elapsed_; + u32 id_; + f32 interval_; + f32 elapsed_; bool repeat_; bool paused_; bool valid_; - Callback callback_; + Fn fn_; - static uint32 nextId_; + static u32 nextId_; }; // ============================================================================ @@ -68,32 +68,32 @@ public: ~TimerManager() = default; /// 创建单次计时器,返回计时器ID - uint32 addTimer(float delay, Timer::Callback callback); + u32 add(f32 delay, Timer::Fn fn); /// 创建重复计时器,返回计时器ID - uint32 addRepeatingTimer(float interval, Timer::Callback callback); + u32 addRepeat(f32 interval, Timer::Fn fn); /// 取消指定ID的计时器 - void cancelTimer(uint32 timerId); + void cancel(u32 timerId); /// 暂停指定ID的计时器 - void pauseTimer(uint32 timerId); + void pause(u32 timerId); /// 恢复指定ID的计时器 - void resumeTimer(uint32 timerId); + void resume(u32 timerId); /// 更新所有计时器(每帧调用) - void update(float deltaTime); + void update(f32 dt); /// 清除所有计时器 void clear(); /// 获取计时器数量 - size_t getTimerCount() const { return timers_.size(); } + size_t count() const { return timers_.size(); } private: - std::map> timers_; - std::vector timersToRemove_; + std::map> timers_; + std::vector timersToRemove_; }; } // namespace extra2d diff --git a/Extra2D/include/extra2d/window/window.h b/Extra2D/include/extra2d/window/window.h index 2e13314..30a45cf 100644 --- a/Extra2D/include/extra2d/window/window.h +++ b/Extra2D/include/extra2d/window/window.h @@ -1,8 +1,8 @@ #pragma once #include -#include #include +#include #include #include diff --git a/Extra2D/include/simdutf/simdutf.h b/Extra2D/include/simdutf/simdutf.h new file mode 100644 index 0000000..79081a1 --- /dev/null +++ b/Extra2D/include/simdutf/simdutf.h @@ -0,0 +1,13573 @@ +/* auto-generated on 2026-01-13 09:03:21 +0100. Do not edit! */ +/* begin file include/simdutf.h */ +#ifndef SIMDUTF_H +#define SIMDUTF_H +#include + +/* begin file include/simdutf/compiler_check.h */ +#ifndef SIMDUTF_COMPILER_CHECK_H +#define SIMDUTF_COMPILER_CHECK_H + +#ifndef __cplusplus + #error simdutf requires a C++ compiler +#endif + +#ifndef SIMDUTF_CPLUSPLUS + #if defined(_MSVC_LANG) && !defined(__clang__) + #define SIMDUTF_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) + #else + #define SIMDUTF_CPLUSPLUS __cplusplus + #endif +#endif + +// C++ 26 +#if !defined(SIMDUTF_CPLUSPLUS26) && (SIMDUTF_CPLUSPLUS >= 202602L) + #define SIMDUTF_CPLUSPLUS26 1 +#endif + +// C++ 23 +#if !defined(SIMDUTF_CPLUSPLUS23) && (SIMDUTF_CPLUSPLUS >= 202302L) + #define SIMDUTF_CPLUSPLUS23 1 +#endif + +// C++ 20 +#if !defined(SIMDUTF_CPLUSPLUS20) && (SIMDUTF_CPLUSPLUS >= 202002L) + #define SIMDUTF_CPLUSPLUS20 1 +#endif + +// C++ 17 +#if !defined(SIMDUTF_CPLUSPLUS17) && (SIMDUTF_CPLUSPLUS >= 201703L) + #define SIMDUTF_CPLUSPLUS17 1 +#endif + +// C++ 14 +#if !defined(SIMDUTF_CPLUSPLUS14) && (SIMDUTF_CPLUSPLUS >= 201402L) + #define SIMDUTF_CPLUSPLUS14 1 +#endif + +// C++ 11 +#if !defined(SIMDUTF_CPLUSPLUS11) && (SIMDUTF_CPLUSPLUS >= 201103L) + #define SIMDUTF_CPLUSPLUS11 1 +#endif + +#ifndef SIMDUTF_CPLUSPLUS11 + #error simdutf requires a compiler compliant with the C++11 standard +#endif + +#endif // SIMDUTF_COMPILER_CHECK_H +/* end file include/simdutf/compiler_check.h */ +/* begin file include/simdutf/common_defs.h */ +#ifndef SIMDUTF_COMMON_DEFS_H +#define SIMDUTF_COMMON_DEFS_H + +/* begin file include/simdutf/portability.h */ +#ifndef SIMDUTF_PORTABILITY_H +#define SIMDUTF_PORTABILITY_H + + +#include +#include +#include +#include +#ifndef _WIN32 + // strcasecmp, strncasecmp + #include +#endif + +#if defined(__apple_build_version__) + #if __apple_build_version__ < 14000000 + #define SIMDUTF_SPAN_DISABLED \ + 1 // apple-clang/13 doesn't support std::convertible_to + #endif +#endif + +#if SIMDUTF_CPLUSPLUS20 + #include + #if __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L && \ + !defined(SIMDUTF_SPAN_DISABLED) + #define SIMDUTF_SPAN 1 + #endif // __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L + #if __cpp_lib_atomic_ref >= 201806L + #define SIMDUTF_ATOMIC_REF 1 + #endif // __cpp_lib_atomic_ref + #if __has_cpp_attribute(maybe_unused) >= 201603L + #define SIMDUTF_MAYBE_UNUSED_AVAILABLE 1 + #endif // __has_cpp_attribute(maybe_unused) >= 201603L +#endif + +/** + * We want to check that it is actually a little endian system at + * compile-time. + */ + +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + #define SIMDUTF_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined(_WIN32) + #define SIMDUTF_IS_BIG_ENDIAN 0 +#else + #if defined(__APPLE__) || \ + defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined + // __ORDER_BIG_ENDIAN__ + #include + #elif defined(sun) || \ + defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) + #include + #else // defined(__APPLE__) || defined(__FreeBSD__) + + #ifdef __has_include + #if __has_include() + #include + #endif //__has_include() + #endif //__has_include + + #endif // defined(__APPLE__) || defined(__FreeBSD__) + + #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) + #define SIMDUTF_IS_BIG_ENDIAN 0 + #endif + + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define SIMDUTF_IS_BIG_ENDIAN 0 + #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define SIMDUTF_IS_BIG_ENDIAN 1 + #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +#endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ + +/** + * At this point in time, SIMDUTF_IS_BIG_ENDIAN is defined. + */ + +#ifdef _MSC_VER + #define SIMDUTF_VISUAL_STUDIO 1 + /** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * + */ + #ifdef __clang__ + // clang under visual studio + #define SIMDUTF_CLANG_VISUAL_STUDIO 1 + #else + // just regular visual studio (best guess) + #define SIMDUTF_REGULAR_VISUAL_STUDIO 1 + #endif // __clang__ +#endif // _MSC_VER + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + // https://en.wikipedia.org/wiki/C_alternative_tokens + // This header should have no effect, except maybe + // under Visual Studio. + #include +#endif + +#if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC) + #define SIMDUTF_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + #define SIMDUTF_IS_ARM64 1 +#elif defined(__PPC64__) || defined(_M_PPC64) + #if defined(__VEC__) && defined(__ALTIVEC__) + #define SIMDUTF_IS_PPC64 1 + #endif +#elif defined(__s390__) +// s390 IBM system. Big endian. +#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 + // RISC-V 64-bit + #define SIMDUTF_IS_RISCV64 1 + + // #if __riscv_v_intrinsic >= 1000000 + // #define SIMDUTF_HAS_RVV_INTRINSICS 1 + // #define SIMDUTF_HAS_RVV_TARGET_REGION 1 + // #elif ... + // Check for special compiler versions that implement pre v1.0 intrinsics + #if __riscv_v_intrinsic >= 11000 + #define SIMDUTF_HAS_RVV_INTRINSICS 1 + #endif + + #define SIMDUTF_HAS_ZVBB_INTRINSICS \ + 0 // there is currently no way to detect this + + #if SIMDUTF_HAS_RVV_INTRINSICS && __riscv_vector && \ + __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64 + // RISC-V V extension + #define SIMDUTF_IS_RVV 1 + #if SIMDUTF_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000 + // RISC-V Vector Basic Bit-manipulation + #define SIMDUTF_IS_ZVBB 1 + #endif + #endif + +#elif defined(__loongarch_lp64) + #if defined(__loongarch_sx) && defined(__loongarch_asx) + #define SIMDUTF_IS_LSX 1 + #define SIMDUTF_IS_LASX 1 // We can always run both + #elif defined(__loongarch_sx) + #define SIMDUTF_IS_LSX 1 + #endif +#else + // The simdutf library is designed + // for 64-bit processors and it seems that you are not + // compiling for a known 64-bit platform. Please + // use a 64-bit target such as x64 or 64-bit ARM for best performance. + #define SIMDUTF_IS_32BITS 1 + + // We do not support 32-bit platforms, but it can be + // handy to identify them. + #if defined(_M_IX86) || defined(__i386__) + #define SIMDUTF_IS_X86_32BITS 1 + #elif defined(__arm__) || defined(_M_ARM) + #define SIMDUTF_IS_ARM_32BITS 1 + #elif defined(__PPC__) || defined(_M_PPC) + #define SIMDUTF_IS_PPC_32BITS 1 + #endif + +#endif // defined(__x86_64__) || defined(_M_AMD64) + +#ifdef SIMDUTF_IS_32BITS + #ifndef SIMDUTF_NO_PORTABILITY_WARNING + // In the future, we may want to warn users of 32-bit systems that + // the simdutf does not support accelerated kernels for such systems. + #endif // SIMDUTF_NO_PORTABILITY_WARNING +#endif // SIMDUTF_IS_32BITS + +// this is almost standard? +#define SIMDUTF_STRINGIFY_IMPLEMENTATION_(a) #a +#define SIMDUTF_STRINGIFY(a) SIMDUTF_STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdutf users should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select +// SIMDUTF_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#if defined(SIMDUTF_IS_X86_64) || defined(SIMDUTF_IS_LSX) + #ifdef __clang__ + // clang does not have GCC push pop + // warning: clang attribute push can't be used within a namespace in clang + // up til 8.0 so SIMDUTF_TARGET_REGION and SIMDUTF_UNTARGET_REGION must be + // *outside* of a namespace. + #define SIMDUTF_TARGET_REGION(T) \ + _Pragma(SIMDUTF_STRINGIFY(clang attribute push( \ + __attribute__((target(T))), apply_to = function))) + #define SIMDUTF_UNTARGET_REGION _Pragma("clang attribute pop") + #elif defined(__GNUC__) + // GCC is easier + #define SIMDUTF_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T))) + #define SIMDUTF_UNTARGET_REGION _Pragma("GCC pop_options") + #endif // clang then gcc + +#endif // defined(SIMDUTF_IS_X86_64) || defined(SIMDUTF_IS_LSX) + +// Default target region macros don't do anything. +#ifndef SIMDUTF_TARGET_REGION + #define SIMDUTF_TARGET_REGION(T) + #define SIMDUTF_UNTARGET_REGION +#endif + +// Is threading enabled? +#if defined(_REENTRANT) || defined(_MT) + #ifndef SIMDUTF_THREADS_ENABLED + #define SIMDUTF_THREADS_ENABLED + #endif +#endif + +// workaround for large stack sizes under -O0. +// https://github.com/simdutf/simdutf/issues/691 +#ifdef __APPLE__ + #ifndef __OPTIMIZE__ + // Apple systems have small stack sizes in secondary threads. + // Lack of compiler optimization may generate high stack usage. + // Users may want to disable threads for safety, but only when + // in debug mode which we detect by the fact that the __OPTIMIZE__ + // macro is not defined. + #undef SIMDUTF_THREADS_ENABLED + #endif +#endif + +#ifdef SIMDUTF_VISUAL_STUDIO + // This is one case where we do not distinguish between + // regular visual studio and clang under visual studio. + // clang under Windows has _stricmp (like visual studio) but not strcasecmp + // (as clang normally has) + #define simdutf_strcasecmp _stricmp + #define simdutf_strncasecmp _strnicmp +#else + // The strcasecmp, strncasecmp, and strcasestr functions do not work with + // multibyte strings (e.g. UTF-8). So they are only useful for ASCII in our + // context. + // https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings + #define simdutf_strcasecmp strcasecmp + #define simdutf_strncasecmp strncasecmp +#endif + +#if defined(__GNUC__) && !defined(__clang__) + #if __GNUC__ >= 11 + #define SIMDUTF_GCC11ORMORE 1 + #endif // __GNUC__ >= 11 + #if __GNUC__ == 10 + #define SIMDUTF_GCC10 1 + #endif // __GNUC__ == 10 + #if __GNUC__ < 10 + #define SIMDUTF_GCC9OROLDER 1 + #endif // __GNUC__ == 10 +#endif // defined(__GNUC__) && !defined(__clang__) + +#endif // SIMDUTF_PORTABILITY_H +/* end file include/simdutf/portability.h */ +/* begin file include/simdutf/avx512.h */ +#ifndef SIMDUTF_AVX512_H_ +#define SIMDUTF_AVX512_H_ + +/* + It's possible to override AVX512 settings with cmake DCMAKE_CXX_FLAGS. + + All preprocessor directives has form `SIMDUTF_HAS_AVX512{feature}`, + where a feature is a code name for extensions. + + Please see the listing below to find which are supported. +*/ + +#ifndef SIMDUTF_HAS_AVX512F + #if defined(__AVX512F__) && __AVX512F__ == 1 + #define SIMDUTF_HAS_AVX512F 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512DQ + #if defined(__AVX512DQ__) && __AVX512DQ__ == 1 + #define SIMDUTF_HAS_AVX512DQ 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512IFMA + #if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1 + #define SIMDUTF_HAS_AVX512IFMA 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512CD + #if defined(__AVX512CD__) && __AVX512CD__ == 1 + #define SIMDUTF_HAS_AVX512CD 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512BW + #if defined(__AVX512BW__) && __AVX512BW__ == 1 + #define SIMDUTF_HAS_AVX512BW 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512VL + #if defined(__AVX512VL__) && __AVX512VL__ == 1 + #define SIMDUTF_HAS_AVX512VL 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512VBMI + #if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1 + #define SIMDUTF_HAS_AVX512VBMI 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512VBMI2 + #if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1 + #define SIMDUTF_HAS_AVX512VBMI2 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512VNNI + #if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1 + #define SIMDUTF_HAS_AVX512VNNI 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512BITALG + #if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1 + #define SIMDUTF_HAS_AVX512BITALG 1 + #endif +#endif + +#ifndef SIMDUTF_HAS_AVX512VPOPCNTDQ + #if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1 + #define SIMDUTF_HAS_AVX512VPOPCNTDQ 1 + #endif +#endif + +#endif // SIMDUTF_AVX512_H_ +/* end file include/simdutf/avx512.h */ + +// Sometimes logging is useful, but we want it disabled by default +// and free of any logging code in release builds. +#ifdef SIMDUTF_LOGGING + #include + #define simdutf_log(msg) \ + std::cout << "[" << __FUNCTION__ << "]: " << msg << std::endl \ + << "\t" << __FILE__ << ":" << __LINE__ << std::endl; + #define simdutf_log_assert(cond, msg) \ + do { \ + if (!(cond)) { \ + std::cerr << "[" << __FUNCTION__ << "]: " << msg << std::endl \ + << "\t" << __FILE__ << ":" << __LINE__ << std::endl; \ + std::abort(); \ + } \ + } while (0) +#else + #define simdutf_log(msg) + #define simdutf_log_assert(cond, msg) +#endif + +#if defined(SIMDUTF_REGULAR_VISUAL_STUDIO) + #define SIMDUTF_DEPRECATED __declspec(deprecated) + + #define simdutf_really_inline __forceinline // really inline in release mode + #define simdutf_always_inline __forceinline // always inline, no matter what + #define simdutf_never_inline __declspec(noinline) + + #define simdutf_unused + #define simdutf_warn_unused + + #ifndef simdutf_likely + #define simdutf_likely(x) x + #endif + #ifndef simdutf_unlikely + #define simdutf_unlikely(x) x + #endif + + #define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning(push)) + #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning(push, 0)) + #define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER) \ + __pragma(warning(disable : WARNING_NUMBER)) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or + // better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS \ + SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS + #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996) + #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning(pop)) + #define SIMDUTF_DISABLE_UNUSED_WARNING +#else // SIMDUTF_REGULAR_VISUAL_STUDIO + #if defined(__OPTIMIZE__) || defined(NDEBUG) + #define simdutf_really_inline inline __attribute__((always_inline)) + #else + #define simdutf_really_inline inline + #endif + #define simdutf_always_inline \ + inline __attribute__((always_inline)) // always inline, no matter what + #define SIMDUTF_DEPRECATED __attribute__((deprecated)) + #define simdutf_never_inline inline __attribute__((noinline)) + + #define simdutf_unused __attribute__((unused)) + #define simdutf_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdutf_likely + #define simdutf_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdutf_unlikely + #define simdutf_unlikely(x) __builtin_expect(!!(x), 0) + #endif + // clang-format off + #define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings + // here as necessary + #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS \ + SIMDUTF_PUSH_DISABLE_WARNINGS \ + SIMDUTF_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wall) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wextra) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wunused-variable) + #define SIMDUTF_PRAGMA(P) _Pragma(#P) + #define SIMDUTF_DISABLE_GCC_WARNING(WARNING) \ + SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING) + #if defined(SIMDUTF_CLANG_VISUAL_STUDIO) + #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS \ + SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDUTF_DISABLE_DEPRECATED_WARNING \ + SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING \ + SIMDUTF_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + #define SIMDUTF_DISABLE_UNUSED_WARNING \ + SIMDUTF_PUSH_DISABLE_WARNINGS \ + SIMDUTF_DISABLE_GCC_WARNING(-Wunused-function) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wunused-const-variable) + // clang-format on + +#endif // MSC_VER + +// Conditional constexpr macro: expands to constexpr for C++17+, empty otherwise +#if SIMDUTF_CPLUSPLUS17 + #define simdutf_constexpr constexpr +#else + #define simdutf_constexpr +#endif + +// Will evaluate to constexpr in C++23 or later. This makes it possible to mark +// functions constexpr if the "if consteval" feature is available to use. +#if SIMDUTF_CPLUSPLUS23 + #define simdutf_constexpr23 constexpr +#else + #define simdutf_constexpr23 +#endif + +#ifndef SIMDUTF_DLLIMPORTEXPORT + #if defined(SIMDUTF_VISUAL_STUDIO) // Visual Studio + /** + * Windows users need to do some extra work when building + * or using a dynamic library (DLL). When building, we need + * to set SIMDUTF_DLLIMPORTEXPORT to __declspec(dllexport). + * When *using* the DLL, the user needs to set + * SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport). + * + * Static libraries not need require such work. + * + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio, you still need to handle these issues. + * + * Non-Windows systems do not have this complexity. + */ + #if SIMDUTF_BUILDING_WINDOWS_DYNAMIC_LIBRARY + + // We set SIMDUTF_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL + // under Windows. It should never happen that both + // SIMDUTF_BUILDING_WINDOWS_DYNAMIC_LIBRARY and + // SIMDUTF_USING_WINDOWS_DYNAMIC_LIBRARY are set. + #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport) + #elif SIMDUTF_USING_WINDOWS_DYNAMIC_LIBRARY + // Windows user who call a dynamic library should set + // SIMDUTF_USING_WINDOWS_DYNAMIC_LIBRARY to 1. + + #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport) + #else + // We assume by default static linkage + #define SIMDUTF_DLLIMPORTEXPORT + #endif + #else // defined(SIMDUTF_VISUAL_STUDIO) + // Non-Windows systems do not have this complexity. + #define SIMDUTF_DLLIMPORTEXPORT + #endif // defined(SIMDUTF_VISUAL_STUDIO) +#endif + +#if SIMDUTF_MAYBE_UNUSED_AVAILABLE + #define simdutf_maybe_unused [[maybe_unused]] +#else + #define simdutf_maybe_unused +#endif + +#endif // SIMDUTF_COMMON_DEFS_H +/* end file include/simdutf/common_defs.h */ +/* begin file include/simdutf/encoding_types.h */ +#ifndef SIMDUTF_ENCODING_TYPES_H +#define SIMDUTF_ENCODING_TYPES_H +#include + +#if !defined(SIMDUTF_NO_STD_TEXT_ENCODING) && \ + defined(__cpp_lib_text_encoding) && __cpp_lib_text_encoding >= 202306L + #define SIMDUTF_HAS_STD_TEXT_ENCODING 1 + #include +#endif + +namespace simdutf { + +enum encoding_type { + UTF8 = 1, // BOM 0xef 0xbb 0xbf + UTF16_LE = 2, // BOM 0xff 0xfe + UTF16_BE = 4, // BOM 0xfe 0xff + UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00 + UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff + Latin1 = 32, + + unspecified = 0 +}; + +#ifndef SIMDUTF_IS_BIG_ENDIAN + #error "SIMDUTF_IS_BIG_ENDIAN needs to be defined." +#endif + +enum endianness { + LITTLE = 0, + BIG = 1, + NATIVE = +#if SIMDUTF_IS_BIG_ENDIAN + BIG +#else + LITTLE +#endif +}; + +simdutf_warn_unused simdutf_really_inline constexpr bool +match_system(endianness e) { + return e == endianness::NATIVE; +} + +simdutf_warn_unused std::string to_string(encoding_type bom); + +// Note that BOM for UTF8 is discouraged. +namespace BOM { + +/** + * Checks for a BOM. If not, returns unspecified + * @param input the string to process + * @param length the length of the string in code units + * @return the corresponding encoding + */ + +simdutf_warn_unused encoding_type check_bom(const uint8_t *byte, size_t length); +simdutf_warn_unused encoding_type check_bom(const char *byte, size_t length); +/** + * Returns the size, in bytes, of the BOM for a given encoding type. + * Note that UTF8 BOM are discouraged. + * @param bom the encoding type + * @return the size in bytes of the corresponding BOM + */ +simdutf_warn_unused size_t bom_byte_size(encoding_type bom); + +} // namespace BOM + +#ifdef SIMDUTF_HAS_STD_TEXT_ENCODING +/** + * Convert a simdutf encoding type to a std::text_encoding. + * + * @param enc the simdutf encoding type + * @return the corresponding std::text_encoding, or + * std::text_encoding::id::unknown for unspecified/unsupported + */ +simdutf_warn_unused constexpr std::text_encoding +to_std_encoding(encoding_type enc) noexcept { + switch (enc) { + case UTF8: + return std::text_encoding(std::text_encoding::id::UTF8); + case UTF16_LE: + return std::text_encoding(std::text_encoding::id::UTF16LE); + case UTF16_BE: + return std::text_encoding(std::text_encoding::id::UTF16BE); + case UTF32_LE: + return std::text_encoding(std::text_encoding::id::UTF32LE); + case UTF32_BE: + return std::text_encoding(std::text_encoding::id::UTF32BE); + case Latin1: + return std::text_encoding(std::text_encoding::id::ISOLatin1); + case unspecified: + default: + return std::text_encoding(std::text_encoding::id::unknown); + } +} + +/** + * Convert a std::text_encoding to a simdutf encoding type. + * + * @param enc the std::text_encoding + * @return the corresponding simdutf encoding type, or + * encoding_type::unspecified if the encoding is not supported + */ +simdutf_warn_unused constexpr encoding_type +from_std_encoding(const std::text_encoding &enc) noexcept { + switch (enc.mib()) { + case std::text_encoding::id::UTF8: + return UTF8; + case std::text_encoding::id::UTF16LE: + return UTF16_LE; + case std::text_encoding::id::UTF16BE: + return UTF16_BE; + case std::text_encoding::id::UTF32LE: + return UTF32_LE; + case std::text_encoding::id::UTF32BE: + return UTF32_BE; + case std::text_encoding::id::ISOLatin1: + return Latin1; + default: + return unspecified; + } +} + +/** + * Get the native-endian UTF-16 encoding type for this system. + * + * @return UTF16_LE on little-endian systems, UTF16_BE on big-endian systems + */ +simdutf_warn_unused constexpr encoding_type native_utf16_encoding() noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return UTF16_BE; + #else + return UTF16_LE; + #endif +} + +/** + * Get the native-endian UTF-32 encoding type for this system. + * + * @return UTF32_LE on little-endian systems, UTF32_BE on big-endian systems + */ +simdutf_warn_unused constexpr encoding_type native_utf32_encoding() noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return UTF32_BE; + #else + return UTF32_LE; + #endif +} + +/** + * Convert a std::text_encoding to a simdutf encoding type, + * using native endianness for UTF-16/UTF-32 without explicit endianness. + * + * When the input is std::text_encoding::id::UTF16 or UTF32 (without LE/BE + * suffix), this returns the native-endian simdutf variant. + * + * @param enc the std::text_encoding + * @return the corresponding simdutf encoding type, or + * encoding_type::unspecified if the encoding is not supported + */ +simdutf_warn_unused constexpr encoding_type +from_std_encoding_native(const std::text_encoding &enc) noexcept { + switch (enc.mib()) { + case std::text_encoding::id::UTF8: + return UTF8; + case std::text_encoding::id::UTF16: + return native_utf16_encoding(); + case std::text_encoding::id::UTF16LE: + return UTF16_LE; + case std::text_encoding::id::UTF16BE: + return UTF16_BE; + case std::text_encoding::id::UTF32: + return native_utf32_encoding(); + case std::text_encoding::id::UTF32LE: + return UTF32_LE; + case std::text_encoding::id::UTF32BE: + return UTF32_BE; + case std::text_encoding::id::ISOLatin1: + return Latin1; + default: + return unspecified; + } +} +#endif // SIMDUTF_HAS_STD_TEXT_ENCODING + +} // namespace simdutf +#endif +/* end file include/simdutf/encoding_types.h */ +/* begin file include/simdutf/error.h */ +#ifndef SIMDUTF_ERROR_H +#define SIMDUTF_ERROR_H +namespace simdutf { + +enum error_code { + SUCCESS = 0, + HEADER_BITS, // Any byte must have fewer than 5 header bits. + TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes, + // where N is the UTF-8 character length This is also the error + // when the input is truncated. + TOO_LONG, // We either have too many consecutive continuation bytes or the + // string starts with a continuation byte. + OVERLONG, // The decoded character must be above U+7F for two-byte characters, + // U+7FF for three-byte characters, and U+FFFF for four-byte + // characters. + TOO_LARGE, // The decoded character must be less than or equal to + // U+10FFFF,less than or equal than U+7F for ASCII OR less than + // equal than U+FF for Latin1 + SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or + // UTF-32) + // OR + // a high surrogate must be followed by a low surrogate + // and a low surrogate must be preceded by a high surrogate + // (UTF-16) + // OR + // there must be no surrogate at all and one is + // found (Latin1 functions) + // OR + // *specifically* for the function + // utf8_length_from_utf16_with_replacement, a surrogate (whether + // in error or not) has been found (I.e., whether we are in the + // Basic Multilingual Plane or not). + INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid + // base64 string. This may include a misplaced + // padding character ('='). + BASE64_INPUT_REMAINDER, // The base64 input terminates with a single + // character, excluding padding (=). It is also used + // in strict mode when padding is not adequate. + BASE64_EXTRA_BITS, // The base64 input terminates with non-zero + // padding bits. + OUTPUT_BUFFER_TOO_SMALL, // The provided buffer is too small. + OTHER // Not related to validation/transcoding. +}; +#if SIMDUTF_CPLUSPLUS17 +inline std::string_view error_to_string(error_code code) noexcept { + switch (code) { + case SUCCESS: + return "SUCCESS"; + case HEADER_BITS: + return "HEADER_BITS"; + case TOO_SHORT: + return "TOO_SHORT"; + case TOO_LONG: + return "TOO_LONG"; + case OVERLONG: + return "OVERLONG"; + case TOO_LARGE: + return "TOO_LARGE"; + case SURROGATE: + return "SURROGATE"; + case INVALID_BASE64_CHARACTER: + return "INVALID_BASE64_CHARACTER"; + case BASE64_INPUT_REMAINDER: + return "BASE64_INPUT_REMAINDER"; + case BASE64_EXTRA_BITS: + return "BASE64_EXTRA_BITS"; + case OUTPUT_BUFFER_TOO_SMALL: + return "OUTPUT_BUFFER_TOO_SMALL"; + default: + return "OTHER"; + } +} +#endif + +struct result { + error_code error; + size_t count; // In case of error, indicates the position of the error. In + // case of success, indicates the number of code units + // validated/written. + + simdutf_really_inline simdutf_constexpr23 result() noexcept + : error{error_code::SUCCESS}, count{0} {} + + simdutf_really_inline simdutf_constexpr23 result(error_code err, + size_t pos) noexcept + : error{err}, count{pos} {} + + simdutf_really_inline simdutf_constexpr23 bool is_ok() const noexcept { + return error == error_code::SUCCESS; + } + + simdutf_really_inline simdutf_constexpr23 bool is_err() const noexcept { + return error != error_code::SUCCESS; + } +}; + +struct full_result { + error_code error; + size_t input_count; + size_t output_count; + bool padding_error = false; // true if the error is due to padding, only + // meaningful when error is not SUCCESS + + simdutf_really_inline simdutf_constexpr23 full_result() noexcept + : error{error_code::SUCCESS}, input_count{0}, output_count{0} {} + + simdutf_really_inline simdutf_constexpr23 full_result(error_code err, + size_t pos_in, + size_t pos_out) noexcept + : error{err}, input_count{pos_in}, output_count{pos_out} {} + simdutf_really_inline simdutf_constexpr23 full_result( + error_code err, size_t pos_in, size_t pos_out, bool padding_err) noexcept + : error{err}, input_count{pos_in}, output_count{pos_out}, + padding_error{padding_err} {} + + simdutf_really_inline simdutf_constexpr23 operator result() const noexcept { + if (error == error_code::SUCCESS) { + return result{error, output_count}; + } else { + return result{error, input_count}; + } + } +}; + +} // namespace simdutf +#endif +/* end file include/simdutf/error.h */ + +SIMDUTF_PUSH_DISABLE_WARNINGS +SIMDUTF_DISABLE_UNDESIRED_WARNINGS + +// Public API +/* begin file include/simdutf/simdutf_version.h */ +// /include/simdutf/simdutf_version.h automatically generated by release.py, +// do not change by hand +#ifndef SIMDUTF_SIMDUTF_VERSION_H +#define SIMDUTF_SIMDUTF_VERSION_H + +/** The version of simdutf being used (major.minor.revision) */ +#define SIMDUTF_VERSION "8.0.0" + +namespace simdutf { +enum { + /** + * The major version (MAJOR.minor.revision) of simdutf being used. + */ + SIMDUTF_VERSION_MAJOR = 8, + /** + * The minor version (major.MINOR.revision) of simdutf being used. + */ + SIMDUTF_VERSION_MINOR = 0, + /** + * The revision (major.minor.REVISION) of simdutf being used. + */ + SIMDUTF_VERSION_REVISION = 0 +}; +} // namespace simdutf + +#endif // SIMDUTF_SIMDUTF_VERSION_H +/* end file include/simdutf/simdutf_version.h */ +/* begin file include/simdutf/implementation.h */ +#ifndef SIMDUTF_IMPLEMENTATION_H +#define SIMDUTF_IMPLEMENTATION_H +#if !defined(SIMDUTF_NO_THREADS) + #include +#endif +#include +#ifdef SIMDUTF_INTERNAL_TESTS + #include +#endif +/* begin file include/simdutf/internal/isadetection.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDutf_INTERNAL_ISADETECTION_H +#define SIMDutf_INTERNAL_ISADETECTION_H + +#include +#include +#if defined(_MSC_VER) + #include +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + #include +#endif + + +// RISC-V ISA detection utilities +#if SIMDUTF_IS_RISCV64 && defined(__linux__) + #include // for syscall +// We define these ourselves, for backwards compatibility +struct simdutf_riscv_hwprobe { + int64_t key; + uint64_t value; +}; + #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__) + #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4 + #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2) + #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17) +#endif // SIMDUTF_IS_RISCV64 && defined(__linux__) + +#if defined(__loongarch__) && defined(__linux__) + #include +// bits/hwcap.h +// #define HWCAP_LOONGARCH_LSX (1 << 4) +// #define HWCAP_LOONGARCH_LASX (1 << 5) +#endif + +namespace simdutf { +namespace internal { + +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000, + AVX512VPOPCNTDQ = 0x2000, + RVV = 0x4000, + ZVBB = 0x8000, + LSX = 0x40000, + LASX = 0x80000, +}; + +#if defined(__PPC64__) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::ALTIVEC; +} + +#elif SIMDUTF_IS_RISCV64 + +static inline uint32_t detect_supported_architectures() { + uint32_t host_isa = instruction_set::DEFAULT; + #if SIMDUTF_IS_RVV + host_isa |= instruction_set::RVV; + #endif + #if SIMDUTF_IS_ZVBB + host_isa |= instruction_set::ZVBB; + #endif + #if defined(__linux__) + simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}}; + long ret = simdutf_riscv_hwprobe(&probes, sizeof probes / sizeof *probes, 0, + nullptr, 0); + if (ret == 0) { + uint64_t extensions = probes[0].value; + if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V) + host_isa |= instruction_set::RVV; + if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB) + host_isa |= instruction_set::ZVBB; + } + #endif + #if defined(RUN_IN_SPIKE_SIMULATOR) + // Proxy Kernel does not implement yet hwprobe syscall + host_isa |= instruction_set::RVV; + #endif + return host_isa; +} + +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::NEON; +} + +#elif defined(__x86_64__) || defined(_M_AMD64) // x64 + +namespace { +namespace cpuid_bit { +// Can be found on Intel ISA Reference for CPUID + +// EAX = 0x01 +constexpr uint32_t pclmulqdq = uint32_t(1) + << 1; ///< @private bit 1 of ECX for EAX=0x1 +constexpr uint32_t sse42 = uint32_t(1) + << 20; ///< @private bit 20 of ECX for EAX=0x1 +constexpr uint32_t osxsave = + (uint32_t(1) << 26) | + (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1 + +// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf) +// See: "Table 3-8. Information Returned by CPUID Instruction" +namespace ebx { +constexpr uint32_t bmi1 = uint32_t(1) << 3; +constexpr uint32_t avx2 = uint32_t(1) << 5; +constexpr uint32_t bmi2 = uint32_t(1) << 8; +constexpr uint32_t avx512f = uint32_t(1) << 16; +constexpr uint32_t avx512dq = uint32_t(1) << 17; +constexpr uint32_t avx512ifma = uint32_t(1) << 21; +constexpr uint32_t avx512cd = uint32_t(1) << 28; +constexpr uint32_t avx512bw = uint32_t(1) << 30; +constexpr uint32_t avx512vl = uint32_t(1) << 31; +} // namespace ebx + +namespace ecx { +constexpr uint32_t avx512vbmi = uint32_t(1) << 1; +constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6; +constexpr uint32_t avx512vnni = uint32_t(1) << 11; +constexpr uint32_t avx512bitalg = uint32_t(1) << 12; +constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14; +} // namespace ecx +namespace edx { +constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8; +} +namespace xcr0_bit { +constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX +constexpr uint64_t avx512_saved = + uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM +} // namespace xcr0_bit +} // namespace cpuid_bit +} // namespace + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { + #if defined(_MSC_VER) + int cpu_info[4]; + __cpuidex(cpu_info, *eax, *ecx); + *eax = cpu_info[0]; + *ebx = cpu_info[1]; + *ecx = cpu_info[2]; + *edx = cpu_info[3]; + #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + uint32_t level = *eax; + __get_cpuid(level, eax, ebx, ecx, edx); + #else + uint32_t a = *eax, b, c = *ecx, d; + asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); + *eax = a; + *ebx = b; + *ecx = c; + *edx = d; + #endif +} + +static inline uint64_t xgetbv() { + #if defined(_MSC_VER) + return _xgetbv(0); + #else + uint32_t xcr0_lo, xcr0_hi; + asm volatile("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0)); + return xcr0_lo | ((uint64_t)xcr0_hi << 32); + #endif +} + +static inline uint32_t detect_supported_architectures() { + uint32_t eax; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + uint32_t host_isa = 0x0; + + // EBX for EAX=0x1 + eax = 0x1; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_bit::sse42) { + host_isa |= instruction_set::SSE42; + } + + if (ecx & cpuid_bit::pclmulqdq) { + host_isa |= instruction_set::PCLMULQDQ; + } + + if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) { + return host_isa; + } + + // xgetbv for checking if the OS saves registers + uint64_t xcr0 = xgetbv(); + + if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) { + return host_isa; + } + // ECX for EAX=0x7 + eax = 0x7; + ecx = 0x0; // Sub-leaf = 0 + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx & cpuid_bit::ebx::avx2) { + host_isa |= instruction_set::AVX2; + } + if (ebx & cpuid_bit::ebx::bmi1) { + host_isa |= instruction_set::BMI1; + } + if (ebx & cpuid_bit::ebx::bmi2) { + host_isa |= instruction_set::BMI2; + } + if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == + cpuid_bit::xcr0_bit::avx512_saved)) { + return host_isa; + } + if (ebx & cpuid_bit::ebx::avx512f) { + host_isa |= instruction_set::AVX512F; + } + if (ebx & cpuid_bit::ebx::avx512bw) { + host_isa |= instruction_set::AVX512BW; + } + if (ebx & cpuid_bit::ebx::avx512cd) { + host_isa |= instruction_set::AVX512CD; + } + if (ebx & cpuid_bit::ebx::avx512dq) { + host_isa |= instruction_set::AVX512DQ; + } + if (ebx & cpuid_bit::ebx::avx512vl) { + host_isa |= instruction_set::AVX512VL; + } + if (ecx & cpuid_bit::ecx::avx512vbmi2) { + host_isa |= instruction_set::AVX512VBMI2; + } + if (ecx & cpuid_bit::ecx::avx512vpopcnt) { + host_isa |= instruction_set::AVX512VPOPCNTDQ; + } + return host_isa; +} +#elif defined(__loongarch__) + +static inline uint32_t detect_supported_architectures() { + uint32_t host_isa = instruction_set::DEFAULT; + #if defined(__linux__) + uint64_t hwcap = 0; + hwcap = getauxval(AT_HWCAP); + if (hwcap & HWCAP_LOONGARCH_LSX) { + host_isa |= instruction_set::LSX; + } + if (hwcap & HWCAP_LOONGARCH_LASX) { + host_isa |= instruction_set::LASX; + } + #endif + return host_isa; +} +#else // fallback + +// includes 32-bit ARM. +static inline uint32_t detect_supported_architectures() { + return instruction_set::DEFAULT; +} + +#endif // end SIMD extension detection code + +} // namespace internal +} // namespace simdutf + +#endif // SIMDutf_INTERNAL_ISADETECTION_H +/* end file include/simdutf/internal/isadetection.h */ + +#if SIMDUTF_SPAN + #include + #include + #include + #include +#endif +#if SIMDUTF_CPLUSPLUS17 + #include +#endif +// The following defines are conditionally enabled/disabled during amalgamation. +// By default all features are enabled, regular code shouldn't check them. Only +// when user code really relies of a selected subset, it's good to verify these +// flags, like: +// +// #if !SIMDUTF_FEATURE_UTF16 +// # error("Please amalgamate simdutf with UTF-16 support") +// #endif +// +#define SIMDUTF_FEATURE_DETECT_ENCODING 1 +#define SIMDUTF_FEATURE_ASCII 1 +#define SIMDUTF_FEATURE_LATIN1 1 +#define SIMDUTF_FEATURE_UTF8 1 +#define SIMDUTF_FEATURE_UTF16 1 +#define SIMDUTF_FEATURE_UTF32 1 +#define SIMDUTF_FEATURE_BASE64 1 + +#if SIMDUTF_CPLUSPLUS23 +/* begin file include/simdutf/constexpr_ptr.h */ +#ifndef SIMDUTF_CONSTEXPR_PTR_H +#define SIMDUTF_CONSTEXPR_PTR_H + +#include + +namespace simdutf { +namespace detail { +/** + * The constexpr_ptr class is a workaround for reinterpret_cast not being + * allowed during constant evaluation. + */ +template + requires(sizeof(to) == sizeof(from)) +struct constexpr_ptr { + const from *p; + + constexpr explicit constexpr_ptr(const from *ptr) noexcept : p(ptr) {} + + constexpr to operator*() const noexcept { return static_cast(*p); } + + constexpr constexpr_ptr &operator++() noexcept { + ++p; + return *this; + } + + constexpr constexpr_ptr operator++(int) noexcept { + auto old = *this; + ++p; + return old; + } + + constexpr constexpr_ptr &operator--() noexcept { + --p; + return *this; + } + + constexpr constexpr_ptr operator--(int) noexcept { + auto old = *this; + --p; + return old; + } + + constexpr constexpr_ptr &operator+=(std::ptrdiff_t n) noexcept { + p += n; + return *this; + } + + constexpr constexpr_ptr &operator-=(std::ptrdiff_t n) noexcept { + p -= n; + return *this; + } + + constexpr constexpr_ptr operator+(std::ptrdiff_t n) const noexcept { + return constexpr_ptr{p + n}; + } + + constexpr constexpr_ptr operator-(std::ptrdiff_t n) const noexcept { + return constexpr_ptr{p - n}; + } + + constexpr std::ptrdiff_t operator-(const constexpr_ptr &o) const noexcept { + return p - o.p; + } + + constexpr to operator[](std::ptrdiff_t n) const noexcept { + return static_cast(*(p + n)); + } + + // to prevent compilation errors for memcpy, even if it is never + // called during constant evaluation + constexpr operator const void *() const noexcept { return p; } +}; + +template +constexpr constexpr_ptr constexpr_cast_ptr(from *p) noexcept { + return constexpr_ptr{p}; +} + +/** + * helper type for constexpr_writeptr, so it is possible to + * do "*ptr = val;" + */ +template +struct constexpr_write_ptr_proxy { + + constexpr explicit constexpr_write_ptr_proxy(TargetType *raw) : p(raw) {} + + constexpr constexpr_write_ptr_proxy &operator=(SrcType v) { + *p = static_cast(v); + return *this; + } + + TargetType *p; +}; + +/** + * helper for working around reinterpret_cast not being allowed during constexpr + * evaluation. will try to act as a SrcType* but actually write to the pointer + * given in the constructor, which is of another type TargetType + */ +template struct constexpr_write_ptr { + constexpr explicit constexpr_write_ptr(TargetType *raw) : p(raw) {} + + constexpr constexpr_write_ptr_proxy operator*() const { + return constexpr_write_ptr_proxy{p}; + } + + constexpr constexpr_write_ptr_proxy + operator[](std::ptrdiff_t n) const { + return constexpr_write_ptr_proxy{p + n}; + } + + constexpr constexpr_write_ptr &operator++() { + ++p; + return *this; + } + + constexpr constexpr_write_ptr operator++(int) { + constexpr_write_ptr old = *this; + ++p; + return old; + } + + constexpr std::ptrdiff_t operator-(const constexpr_write_ptr &other) const { + return p - other.p; + } + + TargetType *p; +}; + +template +constexpr auto constexpr_cast_writeptr(TargetType *raw) { + return constexpr_write_ptr{raw}; +} + +} // namespace detail +} // namespace simdutf +#endif +/* end file include/simdutf/constexpr_ptr.h */ +#endif + +#if SIMDUTF_SPAN +/// helpers placed in namespace detail are not a part of the public API +namespace simdutf { +namespace detail { +/** + * matches a byte, in the many ways C++ allows. note that these + * are all distinct types. + */ +template +concept byte_like = std::is_same_v || // + std::is_same_v || // + std::is_same_v || // + std::is_same_v || // + std::is_same_v; + +template +concept is_byte_like = byte_like>; + +template +concept is_pointer = std::is_pointer_v; + +/** + * matches anything that behaves like std::span and points to character-like + * data such as: std::byte, char, unsigned char, signed char, std::int8_t, + * std::uint8_t + */ +template +concept input_span_of_byte_like = requires(const T &t) { + { t.size() } noexcept -> std::convertible_to; + { t.data() } noexcept -> is_pointer; + { *t.data() } noexcept -> is_byte_like; +}; + +template +concept is_mutable = !std::is_const_v>; + +/** + * like span_of_byte_like, but for an output span (intended to be written to) + */ +template +concept output_span_of_byte_like = requires(T &t) { + { t.size() } noexcept -> std::convertible_to; + { t.data() } noexcept -> is_pointer; + { *t.data() } noexcept -> is_byte_like; + { *t.data() } noexcept -> is_mutable; +}; + +/** + * a pointer like object, when indexed, results in a byte like result. + * valid examples: char*, const char*, std::array + * invalid examples: int*, std::array + */ +template +concept indexes_into_byte_like = requires(InputPtr p) { + { std::decay_t{} } -> simdutf::detail::byte_like; +}; +template +concept indexes_into_utf16 = requires(InputPtr p) { + { std::decay_t{} } -> std::same_as; +}; +template +concept indexes_into_utf32 = requires(InputPtr p) { + { std::decay_t{} } -> std::same_as; +}; + +template +concept index_assignable_from_char = requires(InputPtr p, char s) { + { p[0] = s }; +}; + +/** + * a pointer like object that results in a uint32_t when indexed. + * valid examples: uint32_t* + */ +template +concept indexes_into_uint32 = requires(InputPtr p) { + { std::decay_t{} } -> std::same_as; +}; +} // namespace detail +} // namespace simdutf +#endif // SIMDUTF_SPAN + +// these includes are needed for constexpr support. they are +// not part of the public api. +/* begin file include/simdutf/scalar/swap_bytes.h */ +#ifndef SIMDUTF_SWAP_BYTES_H +#define SIMDUTF_SWAP_BYTES_H + +namespace simdutf { +namespace scalar { + +constexpr inline simdutf_warn_unused uint16_t +u16_swap_bytes(const uint16_t word) { + return uint16_t((word >> 8) | (word << 8)); +} + +constexpr inline simdutf_warn_unused uint32_t +u32_swap_bytes(const uint32_t word) { + return ((word >> 24) & 0xff) | // move byte 3 to byte 0 + ((word << 8) & 0xff0000) | // move byte 1 to byte 2 + ((word >> 8) & 0xff00) | // move byte 2 to byte 1 + ((word << 24) & 0xff000000); // byte 0 to byte 3 +} + +namespace utf32 { +template constexpr uint32_t swap_if_needed(uint32_t c) { + return !match_system(big_endian) ? scalar::u32_swap_bytes(c) : c; +} +} // namespace utf32 + +namespace utf16 { +template constexpr uint16_t swap_if_needed(uint16_t c) { + return !match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; +} +} // namespace utf16 + +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/swap_bytes.h */ +/* begin file include/simdutf/scalar/ascii.h */ +#ifndef SIMDUTF_ASCII_H +#define SIMDUTF_ASCII_H + +namespace simdutf { +namespace scalar { +namespace { +namespace ascii { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_warn_unused simdutf_constexpr23 bool validate(InputPtr data, + size_t len) noexcept { + uint64_t pos = 0; + +#if SIMDUTF_CPLUSPLUS23 + // avoid memcpy during constant evaluation + if !consteval +#endif + // process in blocks of 16 bytes when possible + { + for (; pos + 16 <= len; pos += 16) { + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) != 0) { + return false; + } + } + } + + // process the tail byte-by-byte + for (; pos < len; pos++) { + if (static_cast(data[pos]) >= 0b10000000) { + return false; + } + } + return true; +} +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_warn_unused simdutf_constexpr23 result +validate_with_errors(InputPtr data, size_t len) noexcept { + size_t pos = 0; +#if SIMDUTF_CPLUSPLUS23 + // avoid memcpy during constant evaluation + if !consteval +#endif + { + // process in blocks of 16 bytes when possible + for (; pos + 16 <= len; pos += 16) { + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) != 0) { + for (; pos < len; pos++) { + if (static_cast(data[pos]) >= 0b10000000) { + return result(error_code::TOO_LARGE, pos); + } + } + } + } + } + + // process the tail byte-by-byte + for (; pos < len; pos++) { + if (static_cast(data[pos]) >= 0b10000000) { + return result(error_code::TOO_LARGE, pos); + } + } + return result(error_code::SUCCESS, pos); +} + +} // namespace ascii +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/ascii.h */ +/* begin file include/simdutf/scalar/atomic_util.h */ +#ifndef SIMDUTF_ATOMIC_UTIL_H +#define SIMDUTF_ATOMIC_UTIL_H +#if SIMDUTF_ATOMIC_REF + #include +namespace simdutf { +namespace scalar { + +// This function is a memcpy that uses atomic operations to read from the +// source. +inline void memcpy_atomic_read(char *dst, const char *src, size_t len) { + static_assert(std::atomic_ref::required_alignment == sizeof(char), + "std::atomic_ref requires the same alignment as char_type"); + // We expect all 64-bit systems to be able to read 64-bit words from an + // aligned memory region atomically. You might be able to do better on + // specific systems, e.g., x64 systems can read 128-bit words atomically. + constexpr size_t alignment = sizeof(uint64_t); + + // Lambda for atomic byte-by-byte copy + auto bbb_memcpy_atomic_read = [](char *bytedst, const char *bytesrc, + size_t bytelen) noexcept { + char *mutable_src = const_cast(bytesrc); + for (size_t j = 0; j < bytelen; ++j) { + bytedst[j] = + std::atomic_ref(mutable_src[j]).load(std::memory_order_relaxed); + } + }; + + // Handle unaligned start + size_t offset = reinterpret_cast(src) % alignment; + if (offset) { + size_t to_align = std::min(len, alignment - offset); + bbb_memcpy_atomic_read(dst, src, to_align); + src += to_align; + dst += to_align; + len -= to_align; + } + + // Process aligned 64-bit chunks + while (len >= alignment) { + auto *src_aligned = reinterpret_cast(const_cast(src)); + const auto dst_value = + std::atomic_ref(*src_aligned).load(std::memory_order_relaxed); + std::memcpy(dst, &dst_value, sizeof(uint64_t)); + src += alignment; + dst += alignment; + len -= alignment; + } + + // Handle remaining bytes + if (len) { + bbb_memcpy_atomic_read(dst, src, len); + } +} + +// This function is a memcpy that uses atomic operations to write to the +// destination. +inline void memcpy_atomic_write(char *dst, const char *src, size_t len) { + static_assert(std::atomic_ref::required_alignment == sizeof(char), + "std::atomic_ref requires the same alignment as char"); + // We expect all 64-bit systems to be able to write 64-bit words to an aligned + // memory region atomically. + // You might be able to do better on specific systems, e.g., x64 systems can + // write 128-bit words atomically. + constexpr size_t alignment = sizeof(uint64_t); + + // Lambda for atomic byte-by-byte write + auto bbb_memcpy_atomic_write = [](char *bytedst, const char *bytesrc, + size_t bytelen) noexcept { + for (size_t j = 0; j < bytelen; ++j) { + std::atomic_ref(bytedst[j]) + .store(bytesrc[j], std::memory_order_relaxed); + } + }; + + // Handle unaligned start + size_t offset = reinterpret_cast(dst) % alignment; + if (offset) { + size_t to_align = std::min(len, alignment - offset); + bbb_memcpy_atomic_write(dst, src, to_align); + dst += to_align; + src += to_align; + len -= to_align; + } + + // Process aligned 64-bit chunks + while (len >= alignment) { + auto *dst_aligned = reinterpret_cast(dst); + uint64_t src_val; + std::memcpy(&src_val, src, sizeof(uint64_t)); // Non-atomic read from src + std::atomic_ref(*dst_aligned) + .store(src_val, std::memory_order_relaxed); + dst += alignment; + src += alignment; + len -= alignment; + } + + // Handle remaining bytes + if (len) { + bbb_memcpy_atomic_write(dst, src, len); + } +} +} // namespace scalar +} // namespace simdutf +#endif // SIMDUTF_ATOMIC_REF +#endif // SIMDUTF_ATOMIC_UTIL_H +/* end file include/simdutf/scalar/atomic_util.h */ +/* begin file include/simdutf/scalar/latin1.h */ +#ifndef SIMDUTF_LATIN1_H +#define SIMDUTF_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1 { + +simdutf_really_inline size_t utf8_length_from_latin1(const char *buf, + size_t len) { + const uint8_t *c = reinterpret_cast(buf); + size_t answer = 0; + for (size_t i = 0; i < len; i++) { + if ((c[i] >> 7)) { + answer++; + } + } + return answer + len; +} + +} // namespace latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/latin1.h */ +/* begin file include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF16_H +#define SIMDUTF_LATIN1_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf16 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + + while (pos < len) { + uint16_t word = + uint8_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point + *utf16_output++ = + char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); + pos++; + } + + return utf16_output - start; +} + +template +inline result convert_with_errors(const char *buf, size_t len, + char16_t *utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + + while (pos < len) { + uint16_t word = + uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point + *utf16_output++ = + char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); + pos++; + } + + return result(error_code::SUCCESS, utf16_output - start); +} + +} // namespace latin1_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h */ +/* begin file include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF32_H +#define SIMDUTF_LATIN1_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf32 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + char32_t *utf32_output) { + char32_t *start{utf32_output}; + for (size_t i = 0; i < len; i++) { + *utf32_output++ = uint8_t(data[i]); + } + return utf32_output - start; +} + +} // namespace latin1_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h */ +/* begin file include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF8_H +#define SIMDUTF_LATIN1_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_byte_like && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr utf8_output) { + // const unsigned char *data = reinterpret_cast(buf); + size_t pos = 0; + size_t utf8_pos = 0; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + utf8_output[utf8_pos++] = char(data[pos]); + pos++; + } + continue; + } + } // if (pos + 16 <= len) + } // !consteval scope + + unsigned char byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } + } // while + return utf8_pos; +} + +simdutf_really_inline size_t convert(const char *buf, size_t len, + char *utf8_output) { + return convert(reinterpret_cast(buf), len, + utf8_output); +} + +inline size_t convert_safe(const char *buf, size_t len, char *utf8_output, + size_t utf8_len) { + const unsigned char *data = reinterpret_cast(buf); + size_t pos = 0; + size_t skip_pos = 0; + size_t utf8_pos = 0; + while (pos < len && utf8_pos < utf8_len) { + // try to convert the next block of 16 ASCII bytes + if (pos >= skip_pos && pos + 16 <= len && + utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes, + // check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + ::memcpy(utf8_output + utf8_pos, buf + pos, 16); + utf8_pos += 16; + pos += 16; + } else { + // At least one of the next 16 bytes are not ASCII, we will process them + // one by one + skip_pos = pos + 16; + } + } else { + const auto byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else if (utf8_pos + 2 <= utf8_len) { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } else { + break; + } + } + } + return utf8_pos; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_byte_like && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert_safe_constexpr(InputPtr data, size_t len, + OutputPtr utf8_output, + size_t utf8_len) { + size_t pos = 0; + size_t utf8_pos = 0; + while (pos < len && utf8_pos < utf8_len) { + const unsigned char byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else if (utf8_pos + 2 <= utf8_len) { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } else { + break; + } + } + return utf8_pos; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 simdutf_warn_unused size_t +utf8_length_from_latin1(InputPtr input, size_t length) noexcept { + size_t answer = length; + size_t i = 0; + +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + auto pop = [](uint64_t v) { + return (size_t)(((v >> 7) & UINT64_C(0x0101010101010101)) * + UINT64_C(0x0101010101010101) >> + 56); + }; + for (; i + 32 <= length; i += 32) { + uint64_t v; + memcpy(&v, input + i, 8); + answer += pop(v); + memcpy(&v, input + i + 8, sizeof(v)); + answer += pop(v); + memcpy(&v, input + i + 16, sizeof(v)); + answer += pop(v); + memcpy(&v, input + i + 24, sizeof(v)); + answer += pop(v); + } + for (; i + 8 <= length; i += 8) { + uint64_t v; + memcpy(&v, input + i, sizeof(v)); + answer += pop(v); + } + } // !consteval scope + for (; i + 1 <= length; i += 1) { + answer += static_cast(input[i]) >> 7; + } + return answer; +} + +} // namespace latin1_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h */ +/* begin file include/simdutf/scalar/utf16.h */ +#ifndef SIMDUTF_UTF16_H +#define SIMDUTF_UTF16_H + +namespace simdutf { +namespace scalar { +namespace utf16 { + +template +simdutf_warn_unused simdutf_constexpr23 bool +validate_as_ascii(const char16_t *data, size_t len) noexcept { + for (size_t pos = 0; pos < len; pos++) { + char16_t word = scalar::utf16::swap_if_needed(data[pos]); + if (word >= 0x80) { + return false; + } + } + return true; +} + +template +inline simdutf_warn_unused simdutf_constexpr23 bool +validate(const char16_t *data, size_t len) noexcept { + uint64_t pos = 0; + while (pos < len) { + char16_t word = scalar::utf16::swap_if_needed(data[pos]); + if ((word & 0xF800) == 0xD800) { + if (pos + 1 >= len) { + return false; + } + char16_t diff = char16_t(word - 0xD800); + if (diff > 0x3FF) { + return false; + } + char16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + char16_t diff2 = char16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return false; + } + pos += 2; + } else { + pos++; + } + } + return true; +} + +template +inline simdutf_warn_unused simdutf_constexpr23 result +validate_with_errors(const char16_t *data, size_t len) noexcept { + size_t pos = 0; + while (pos < len) { + char16_t word = scalar::utf16::swap_if_needed(data[pos]); + if ((word & 0xF800) == 0xD800) { + if (pos + 1 >= len) { + return result(error_code::SURROGATE, pos); + } + char16_t diff = char16_t(word - 0xD800); + if (diff > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + char16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + char16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + pos += 2; + } else { + pos++; + } + } + return result(error_code::SUCCESS, pos); +} + +template +simdutf_constexpr23 size_t count_code_points(const char16_t *p, size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = scalar::utf16::swap_if_needed(p[i]); + counter += ((word & 0xFC00) != 0xDC00); + } + return counter; +} + +template +simdutf_constexpr23 size_t utf8_length_from_utf16(const char16_t *p, + size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = scalar::utf16::swap_if_needed(p[i]); + counter++; // ASCII + counter += static_cast( + word > + 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes + counter += static_cast((word > 0x7FF && word <= 0xD7FF) || + (word >= 0xE000)); // three-byte + } + return counter; +} + +template +simdutf_constexpr23 size_t utf32_length_from_utf16(const char16_t *p, + size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + char16_t word = scalar::utf16::swap_if_needed(p[i]); + counter += ((word & 0xFC00) != 0xDC00); + } + return counter; +} + +simdutf_really_inline simdutf_constexpr23 void +change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) { + for (size_t i = 0; i < size; i++) { + *output++ = char16_t(input[i] >> 8 | input[i] << 8); + } +} + +template +simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf16(const char16_t *input, size_t length) { + if (length == 0) { + return 0; + } + uint16_t last_word = uint16_t(input[length - 1]); + last_word = scalar::utf16::swap_if_needed(last_word); + length -= ((last_word & 0xFC00) == 0xD800); + return length; +} + +template +simdutf_constexpr bool is_high_surrogate(char16_t c) { + c = scalar::utf16::swap_if_needed(c); + return (0xd800 <= c && c <= 0xdbff); +} + +template +simdutf_constexpr bool is_low_surrogate(char16_t c) { + c = scalar::utf16::swap_if_needed(c); + return (0xdc00 <= c && c <= 0xdfff); +} + +simdutf_really_inline constexpr bool high_surrogate(char16_t c) { + return (0xd800 <= c && c <= 0xdbff); +} + +simdutf_really_inline constexpr bool low_surrogate(char16_t c) { + return (0xdc00 <= c && c <= 0xdfff); +} + +template +simdutf_constexpr23 result +utf8_length_from_utf16_with_replacement(const char16_t *p, size_t len) { + bool any_surrogates = false; + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + if (is_high_surrogate(p[i])) { + any_surrogates = true; + // surrogate pair + if (i + 1 < len && is_low_surrogate(p[i + 1])) { + counter += 4; + i++; // skip low surrogate + } else { + counter += 3; // unpaired high surrogate replaced by U+FFFD + } + continue; + } else if (is_low_surrogate(p[i])) { + any_surrogates = true; + counter += 3; // unpaired low surrogate replaced by U+FFFD + continue; + } + char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; + counter++; // at least 1 byte + counter += + static_cast(word > 0x7F); // non-ASCII is at least 2 bytes + counter += static_cast(word > 0x7FF); // three-byte + } + return {any_surrogates ? error_code::SURROGATE : error_code::SUCCESS, + counter}; +} + +// variable templates are a C++14 extension +template constexpr char16_t replacement() { + return !match_system(big_endian) ? scalar::u16_swap_bytes(0xfffd) : 0xfffd; +} + +template +simdutf_constexpr23 void to_well_formed_utf16(const char16_t *input, size_t len, + char16_t *output) { + const char16_t replacement = utf16::replacement(); + bool high_surrogate_prev = false, high_surrogate, low_surrogate; + size_t i = 0; + for (; i < len; i++) { + char16_t c = input[i]; + high_surrogate = is_high_surrogate(c); + low_surrogate = is_low_surrogate(c); + if (high_surrogate_prev && !low_surrogate) { + output[i - 1] = replacement; + } + + if (!high_surrogate_prev && low_surrogate) { + output[i] = replacement; + } else { + output[i] = input[i]; + } + high_surrogate_prev = high_surrogate; + } + + /* string may not end with high surrogate */ + if (high_surrogate_prev) { + output[i - 1] = replacement; + } +} + +} // namespace utf16 +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16.h */ +/* begin file include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h */ +#ifndef SIMDUTF_UTF16_TO_LATIN1_H +#define SIMDUTF_UTF16_TO_LATIN1_H + +#include // for std::memcpy + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_latin1 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf16 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr latin_output) { + if (len == 0) { + return 0; + } + size_t pos = 0; + const auto latin_output_start = latin_output; + uint16_t word = 0; + uint16_t too_large = 0; + + while (pos < len) { + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + too_large |= word; + *latin_output++ = char(word & 0xFF); + pos++; + } + if ((too_large & 0xFF00) != 0) { + return 0; + } + + return latin_output - latin_output_start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf16 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + OutputPtr latin_output) { + if (len == 0) { + return result(error_code::SUCCESS, 0); + } + size_t pos = 0; + auto start = latin_output; + uint16_t word; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that + // they are Latin1 + uint64_t v1, v2, v3, v4; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + ::memcpy(&v2, data + pos + 4, sizeof(uint64_t)); + ::memcpy(&v3, data + pos + 8, sizeof(uint64_t)); + ::memcpy(&v4, data + pos + 12, sizeof(uint64_t)); + + if simdutf_constexpr (!match_system(big_endian)) { + v1 = (v1 >> 8) | (v1 << (64 - 8)); + } + if simdutf_constexpr (!match_system(big_endian)) { + v2 = (v2 >> 8) | (v2 << (64 - 8)); + } + if simdutf_constexpr (!match_system(big_endian)) { + v3 = (v3 >> 8) | (v3 << (64 - 8)); + } + if simdutf_constexpr (!match_system(big_endian)) { + v4 = (v4 >> 8) | (v4 << (64 - 8)); + } + + if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = !match_system(big_endian) + ? char(u16_swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + } + + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF00) == 0) { + *latin_output++ = char(word & 0xFF); + pos++; + } else { + return result(error_code::TOO_LARGE, pos); + } + } + return result(error_code::SUCCESS, latin_output - start); +} + +} // namespace utf16_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h */ +/* begin file include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_LATIN1_H +#define SIMDUTF_VALID_UTF16_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_latin1 { + +template +simdutf_constexpr23 inline size_t +convert_valid_impl(InputIterator data, size_t len, + OutputIterator latin_output) { + static_assert( + std::is_same::type, uint16_t>::value, + "must decay to uint16_t"); + size_t pos = 0; + const auto start = latin_output; + uint16_t word = 0; + + while (pos < len) { + word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + *latin_output++ = char(word); + pos++; + } + + return latin_output - start; +} + +template +simdutf_really_inline size_t convert_valid(const char16_t *buf, size_t len, + char *latin_output) { + return convert_valid_impl(reinterpret_cast(buf), + len, latin_output); +} +} // namespace utf16_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ +/* begin file include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h */ +#ifndef SIMDUTF_UTF16_TO_UTF32_H +#define SIMDUTF_UTF16_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf32 { + +template +simdutf_constexpr23 size_t convert(const char16_t *data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return 0; + } + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return 0; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return utf32_output - start; +} + +template +simdutf_constexpr23 result convert_with_errors(const char16_t *data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + if (pos + 1 >= len) { + return result(error_code::SURROGATE, pos); + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return result(error_code::SUCCESS, utf32_output - start); +} + +} // namespace utf16_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h */ +/* begin file include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H +#define SIMDUTF_VALID_UTF16_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf32 { + +template +simdutf_constexpr23 size_t convert_valid(const char16_t *data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return utf32_output - start; +} + +} // namespace utf16_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ +/* begin file include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h */ +#ifndef SIMDUTF_UTF16_TO_UTF8_H +#define SIMDUTF_UTF16_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_utf16 +// FIXME constrain output as well +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + const auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 8 bytes + if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if simdutf_constexpr (!match_system(big_endian)) { + v = (v >> 8) | (v << (64 - 8)); + } + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + *utf8_output++ = !match_system(big_endian) + ? char(u16_swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + } + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // must be a surrogate pair + if (pos + 1 >= len) { + return 0; + } + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return 0; + } + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return 0; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return utf8_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf16 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 full_result convert_with_errors(InputPtr data, size_t len, + OutputPtr utf8_output, + size_t utf8_len = 0) { + if (check_output && utf8_len == 0) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, 0, 0); + } + + size_t pos = 0; + auto start = utf8_output; + auto end = utf8_output + utf8_len; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 8 bytes + if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if simdutf_constexpr (!match_system(big_endian)) + v = (v >> 8) | (v << (64 - 8)); + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + if (check_output && size_t(end - utf8_output) < 1) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + *utf8_output++ = !match_system(big_endian) + ? char(u16_swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + } + + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + if (check_output && size_t(end - utf8_output) < 1) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + if (check_output && size_t(end - utf8_output) < 2) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + if (check_output && size_t(end - utf8_output) < 3) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + + if (check_output && size_t(end - utf8_output) < 4) { + return full_result(error_code::OUTPUT_BUFFER_TOO_SMALL, pos, + utf8_output - start); + } + // must be a surrogate pair + if (pos + 1 >= len) { + return full_result(error_code::SURROGATE, pos, utf8_output - start); + } + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return full_result(error_code::SURROGATE, pos, utf8_output - start); + } + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return full_result(error_code::SURROGATE, pos, utf8_output - start); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return full_result(error_code::SUCCESS, pos, utf8_output - start); +} + +template +inline result simple_convert_with_errors(const char16_t *buf, size_t len, + char *utf8_output) { + return convert_with_errors(buf, len, utf8_output, 0); +} + +} // namespace utf16_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h */ +/* begin file include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H +#define SIMDUTF_VALID_UTF16_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf16 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 4 ASCII characters + if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if simdutf_constexpr (!match_system(big_endian)) { + v = (v >> 8) | (v << (64 - 8)); + } + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + *utf8_output++ = !match_system(big_endian) + ? char(u16_swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + } + + uint16_t word = + !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? u16_swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return utf8_output - start; +} + +} // namespace utf16_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ +/* begin file include/simdutf/scalar/utf32.h */ +#ifndef SIMDUTF_UTF32_H +#define SIMDUTF_UTF32_H + +namespace simdutf { +namespace scalar { +namespace utf32 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_uint32 +#endif +simdutf_warn_unused simdutf_constexpr23 bool validate(InputPtr data, + size_t len) noexcept { + uint64_t pos = 0; + for (; pos < len; pos++) { + uint32_t word = data[pos]; + if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { + return false; + } + } + return true; +} + +simdutf_warn_unused simdutf_really_inline bool validate(const char32_t *buf, + size_t len) noexcept { + return validate(reinterpret_cast(buf), len); +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_uint32 +#endif +simdutf_warn_unused simdutf_constexpr23 result +validate_with_errors(InputPtr data, size_t len) noexcept { + size_t pos = 0; + for (; pos < len; pos++) { + uint32_t word = data[pos]; + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + } + return result(error_code::SUCCESS, pos); +} + +simdutf_warn_unused simdutf_really_inline result +validate_with_errors(const char32_t *buf, size_t len) noexcept { + return validate_with_errors(reinterpret_cast(buf), len); +} + +inline simdutf_constexpr23 size_t utf8_length_from_utf32(const char32_t *p, + size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + // credit: @ttsugriy for the vectorizable approach + counter++; // ASCII + counter += static_cast(p[i] > 0x7F); // two-byte + counter += static_cast(p[i] > 0x7FF); // three-byte + counter += static_cast(p[i] > 0xFFFF); // four-bytes + } + return counter; +} + +inline simdutf_warn_unused simdutf_constexpr23 size_t +utf16_length_from_utf32(const char32_t *p, size_t len) { + // We are not BOM aware. + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + counter++; // non-surrogate word + counter += static_cast(p[i] > 0xFFFF); // surrogate pair + } + return counter; +} + +} // namespace utf32 +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32.h */ +/* begin file include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h */ +#ifndef SIMDUTF_UTF32_TO_LATIN1_H +#define SIMDUTF_UTF32_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_latin1 { + +inline simdutf_constexpr23 size_t convert(const char32_t *data, size_t len, + char *latin1_output) { + char *start = latin1_output; + uint32_t utf32_char; + size_t pos = 0; + uint32_t too_large = 0; + + while (pos < len) { + utf32_char = (uint32_t)data[pos]; + too_large |= utf32_char; + *latin1_output++ = (char)(utf32_char & 0xFF); + pos++; + } + if ((too_large & 0xFFFFFF00) != 0) { + return 0; + } + return latin1_output - start; +} + +inline simdutf_constexpr23 result convert_with_errors(const char32_t *data, + size_t len, + char *latin1_output) { + char *start{latin1_output}; + size_t pos = 0; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that + // they are Latin1 + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF00FFFFFF00) == 0) { + *latin1_output++ = char(data[pos]); + *latin1_output++ = char(data[pos + 1]); + pos += 2; + continue; + } + } + } + + uint32_t utf32_char = data[pos]; + if ((utf32_char & 0xFFFFFF00) == + 0) { // Check if the character can be represented in Latin-1 + *latin1_output++ = (char)(utf32_char & 0xFF); + pos++; + } else { + return result(error_code::TOO_LARGE, pos); + }; + } + return result(error_code::SUCCESS, latin1_output - start); +} + +} // namespace utf32_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h */ +/* begin file include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H +#define SIMDUTF_VALID_UTF32_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_latin1 { + +template +simdutf_constexpr23 size_t convert_valid(ReadPtr data, size_t len, + WritePtr latin1_output) { + static_assert( + std::is_same::type, uint32_t>::value, + "dereferencing the data pointer must result in a uint32_t"); + auto start = latin1_output; + uint32_t utf32_char; + size_t pos = 0; + + while (pos < len) { + utf32_char = data[pos]; + +#if SIMDUTF_CPLUSPLUS23 + // avoid using the 8 byte at a time optimization in constant evaluation + // mode. memcpy can't be used and replacing it with bitwise or gave worse + // codegen (when not during constant evaluation). + if !consteval { +#endif + if (pos + 2 <= len) { + // if it is safe to read 8 more bytes, check that they are Latin1 + uint64_t v; + std::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF00FFFFFF00) == 0) { + *latin1_output++ = char(data[pos]); + *latin1_output++ = char(data[pos + 1]); + pos += 2; + continue; + } else { + // output can not be represented in latin1 + return 0; + } + } +#if SIMDUTF_CPLUSPLUS23 + } // if ! consteval +#endif + if ((utf32_char & 0xFFFFFF00) == 0) { + *latin1_output++ = char(utf32_char); + } else { + // output can not be represented in latin1 + return 0; + } + pos++; + } + return latin1_output - start; +} + +simdutf_really_inline size_t convert_valid(const char32_t *buf, size_t len, + char *latin1_output) { + return convert_valid(reinterpret_cast(buf), len, + latin1_output); +} + +} // namespace utf32_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ +/* begin file include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h */ +#ifndef SIMDUTF_UTF32_TO_UTF16_H +#define SIMDUTF_UTF32_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf16 { + +template +simdutf_constexpr23 size_t convert(const char32_t *data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return 0; + } + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(uint16_t(word))) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return 0; + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + pos++; + } + return utf16_output - start; +} + +template +simdutf_constexpr23 result convert_with_errors(const char32_t *data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(uint16_t(word))) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + pos++; + } + return result(error_code::SUCCESS, utf16_output - start); +} + +} // namespace utf32_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h */ +/* begin file include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H +#define SIMDUTF_VALID_UTF32_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf16 { + +template +simdutf_constexpr23 size_t convert_valid(const char32_t *data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(uint16_t(word))) + : char16_t(word); + pos++; + } else { + // will generate a surrogate pair + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos++; + } + } + return utf16_output - start; +} + +} // namespace utf32_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ +/* begin file include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h */ +#ifndef SIMDUTF_UTF32_TO_UTF8_H +#define SIMDUTF_UTF32_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf32 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // try to convert the next block of 2 ASCII characters + if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(data[pos]); + *utf8_output++ = char(data[pos + 1]); + pos += 2; + continue; + } + } + } + + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + if (word >= 0xD800 && word <= 0xDFFF) { + return 0; + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + if (word > 0x10FFFF) { + return 0; + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return utf8_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf32 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // try to convert the next block of 2 ASCII characters + if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(data[pos]); + *utf8_output++ = char(data[pos + 1]); + pos += 2; + continue; + } + } + } + + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return result(error_code::SUCCESS, utf8_output - start); +} + +} // namespace utf32_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h */ +/* begin file include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H +#define SIMDUTF_VALID_UTF32_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf8 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_utf32 && + simdutf::detail::index_assignable_from_char) +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + OutputPtr utf8_output) { + size_t pos = 0; + auto start = utf8_output; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // try to convert the next block of 2 ASCII characters + if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(data[pos]); + *utf8_output++ = char(data[pos + 1]); + pos += 2; + continue; + } + } + } + + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return utf8_output - start; +} + +} // namespace utf32_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ +/* begin file include/simdutf/scalar/utf8.h */ +#ifndef SIMDUTF_UTF8_H +#define SIMDUTF_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8 { + +// credit: based on code from Google Fuchsia (Apache Licensed) +template +simdutf_constexpr23 simdutf_warn_unused bool validate(BytePtr data, + size_t len) noexcept { + static_assert( + std::is_same::type, uint8_t>::value, + "dereferencing the data pointer must result in a uint8_t"); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + uint64_t next_pos; +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // check if the next 16 bytes are ascii. + next_pos = pos + 16; + if (next_pos <= len) { // if it is safe to read 16 more bytes, check + // that they are ascii + uint64_t v1{}; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2{}; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + } + + unsigned char byte = data[pos]; + + while (byte < 0b10000000) { + if (++pos == len) { + return true; + } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + return false; + } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point) || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return false; + } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; +} + +simdutf_really_inline simdutf_warn_unused bool validate(const char *buf, + size_t len) noexcept { + return validate(reinterpret_cast(buf), len); +} + +template +simdutf_constexpr23 simdutf_warn_unused result +validate_with_errors(BytePtr data, size_t len) noexcept { + static_assert( + std::is_same::type, uint8_t>::value, + "dereferencing the data pointer must result in a uint8_t"); + size_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 16 bytes are ascii. + size_t next_pos = pos + 16; + if (next_pos <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + + while (byte < 0b10000000) { + if (++pos == len) { + return result(error_code::SUCCESS, len); + } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + return result(error_code::OVERLONG, pos); + } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point)) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + pos = next_pos; + } + return result(error_code::SUCCESS, len); +} + +simdutf_really_inline simdutf_warn_unused result +validate_with_errors(const char *buf, size_t len) noexcept { + return validate_with_errors(reinterpret_cast(buf), len); +} + +// Finds the previous leading byte starting backward from buf and validates with +// errors from there Used to pinpoint the location of an error when an invalid +// chunk is detected We assume that the stream starts with a leading byte, and +// to check that it is the case, we ask that you pass a pointer to the start of +// the stream (start). +inline simdutf_warn_unused result rewind_and_validate_with_errors( + const char *start, const char *buf, size_t len) noexcept { + // First check that we start with a leading byte + if ((*start & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, 0); + } + size_t extra_len{0}; + // A leading byte cannot be further than 4 bytes away + for (int i = 0; i < 5; i++) { + unsigned char byte = *buf; + if ((byte & 0b11000000) != 0b10000000) { + break; + } else { + buf--; + extra_len++; + } + } + + result res = validate_with_errors(buf, len + extra_len); + res.count -= extra_len; + return res; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t count_code_points(InputPtr data, size_t len) { + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + // -65 is 0b10111111, anything larger in two-complement's should start a new + // code point. + if (int8_t(data[i]) > -65) { + counter++; + } + } + return counter; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t utf16_length_from_utf8(InputPtr data, size_t len) { + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + if (int8_t(data[i]) > -65) { + counter++; + } + if (uint8_t(data[i]) >= 240) { + counter++; + } + } + return counter; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf8(InputPtr input, size_t length) { + if (length < 3) { + switch (length) { + case 2: + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 2]) >= 0xe0) { + return length - 2; + } // 3- and 4-byte characters with only 2 bytes left + return length; + case 1: + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + return length; + case 0: + return length; + } + } + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 2]) >= 0xe0) { + return length - 2; + } // 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 3]) >= 0xf0) { + return length - 3; + } // 4-byte characters with only 3 bytes left + return length; +} + +} // namespace utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8.h */ +/* begin file include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h */ +#ifndef SIMDUTF_UTF8_TO_LATIN1_H +#define SIMDUTF_UTF8_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_latin1 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires(simdutf::detail::indexes_into_byte_like && + simdutf::detail::indexes_into_byte_like) +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + OutputPtr latin_output) { + size_t pos = 0; + auto start = latin_output; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 + // 1000 1000 .... etc + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = char(data[pos]); + pos++; + } + continue; + } + } + } + + // suppose it is not an all ASCII byte sequence + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (data[pos + 1] & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + if (code_point < 0x80 || 0xFF < code_point) { + return 0; // We only care about the range 129-255 which is Non-ASCII + // latin1 characters. A code_point beneath 0x80 is invalid as + // it is already covered by bytes whose leading bit is zero. + } + *latin_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } + return latin_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + char *latin_output) { + size_t pos = 0; + char *start{latin_output}; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 + // 1000 1000...etc + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = char(data[pos]); + pos++; + } + continue; + } + } + } + // suppose it is not an all ASCII byte sequence + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (data[pos + 1] & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + if (code_point < 0x80) { + return result(error_code::OVERLONG, pos); + } + if (0xFF < code_point) { + return result(error_code::TOO_LARGE, pos); + } // We only care about the range 129-255 which is Non-ASCII latin1 + // characters + *latin_output++ = char(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + return result(error_code::TOO_LARGE, pos); + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + return result(error_code::TOO_LARGE, pos); + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } + + return result(error_code::HEADER_BITS, pos); + } + } + return result(error_code::SUCCESS, latin_output - start); +} + +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char *latin1_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + // In theory '3' would be sufficient, but sometimes the error can go back + // quite far. + size_t how_far_back = prior_bytes; + // size_t how_far_back = 3; // 3 bytes in the past + current position + // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + result res = convert_with_errors(buf, len + extra_len, latin1_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H +#define SIMDUTF_VALID_UTF8_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_latin1 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + char *latin_output) { + + size_t pos = 0; + char *start{latin_output}; + + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = uint8_t(data[pos]); + pos++; + } + continue; + } + } + } + + // suppose it is not an all ASCII byte sequence + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + break; + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return 0; + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (uint8_t(data[pos + 1]) & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + *latin_output++ = char(code_point); + pos += 2; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return latin_output - start; +} + +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ +/* begin file include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h */ +#ifndef SIMDUTF_UTF8_TO_UTF16_H +#define SIMDUTF_UTF8_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf16 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + // try to convert the next block of 16 ASCII bytes + { + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(data[pos])) + : char16_t(data[pos]); + pos++; + } + continue; + } + } + } + + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return 0; + } + if simdutf_constexpr (!match_system(big_endian)) { + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + return 0; + } // minimal bound checking + + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return 0; + } + if simdutf_constexpr (!match_system(big_endian)) { + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return 0; + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | + (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return 0; + } + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + return 0; + } + } + return utf16_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + const char16_t byte = uint8_t(data[pos]); + *utf16_output++ = + !match_system(big_endian) ? u16_swap_bytes(byte) : byte; + pos++; + } + continue; + } + } + } + + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00011111) << 6 | + (uint8_t(data[pos + 1]) & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return result(error_code::OVERLONG, pos); + } + if simdutf_constexpr (!match_system(big_endian)) { + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (uint8_t(data[pos + 1]) & 0b00111111) << 6 | + (uint8_t(data[pos + 2]) & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point)) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + if simdutf_constexpr (!match_system(big_endian)) { + code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 3]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (uint8_t(data[pos + 1]) & 0b00111111) << 12 | + (uint8_t(data[pos + 2]) & 0b00111111) << 6 | + (uint8_t(data[pos + 3]) & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + } + return result(error_code::SUCCESS, utf16_output - start); +} + +/** + * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and + * we have up to len input bytes left, and we encountered some error. It is + * possible that the error is at 'buf' exactly, but it could also be in the + * previous bytes (up to 3 bytes back). + * + * prior_bytes indicates how many bytes, prior to 'buf' may belong to the + * current memory section and can be safely accessed. We prior_bytes to access + * safely up to three bytes before 'buf'. + * + * The caller is responsible to ensure that len > 0. + * + * If the error is believed to have occurred prior to 'buf', the count value + * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. + */ +template +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char16_t *utf16_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + // In theory '3' would be sufficient, but sometimes the error can go back + // quite far. + size_t how_far_back = prior_bytes; + // size_t how_far_back = 3; // 3 bytes in the past + current position + // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + result res = convert_with_errors(buf, len + extra_len, utf16_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H +#define SIMDUTF_VALID_UTF8_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf16 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { // try to convert the next block of 8 ASCII bytes + if (pos + 8 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 8; + while (pos < final_pos) { + const char16_t byte = uint8_t(data[pos]); + *utf16_output++ = + !match_system(big_endian) ? u16_swap_bytes(byte) : byte; + pos++; + } + continue; + } + } + } + + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(u16_swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + break; + } // minimal bound checking + uint16_t code_point = uint16_t(((leading_byte & 0b00011111) << 6) | + (uint8_t(data[pos + 1]) & 0b00111111)); + if simdutf_constexpr (!match_system(big_endian)) { + code_point = u16_swap_bytes(uint16_t(code_point)); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + break; + } // minimal bound checking + uint16_t code_point = + uint16_t(((leading_byte & 0b00001111) << 12) | + ((uint8_t(data[pos + 1]) & 0b00111111) << 6) | + (uint8_t(data[pos + 2]) & 0b00111111)); + if simdutf_constexpr (!match_system(big_endian)) { + code_point = u16_swap_bytes(uint16_t(code_point)); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + break; + } // minimal bound checking + uint32_t code_point = ((leading_byte & 0b00000111) << 18) | + ((uint8_t(data[pos + 1]) & 0b00111111) << 12) | + ((uint8_t(data[pos + 2]) & 0b00111111) << 6) | + (uint8_t(data[pos + 3]) & 0b00111111); + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = u16_swap_bytes(high_surrogate); + low_surrogate = u16_swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h */ +#ifndef SIMDUTF_UTF8_TO_UTF32_H +#define SIMDUTF_UTF8_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf32 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert(InputPtr data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf32_output++ = uint8_t(data[pos]); + pos++; + } + continue; + } + } + } + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = (leading_byte & 0b00011111) << 6 | + (uint8_t(data[pos + 1]) & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + return 0; + } // minimal bound checking + + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return 0; + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (uint8_t(data[pos + 1]) & 0b00111111) << 6 | + (uint8_t(data[pos + 2]) & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return 0; + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return 0; + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return 0; + } + if ((uint8_t(data[pos + 3]) & 0b11000000) != 0b10000000) { + return 0; + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (uint8_t(data[pos + 1]) & 0b00111111) << 12 | + (uint8_t(data[pos + 2]) & 0b00111111) << 6 | + (uint8_t(data[pos + 3]) & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 4; + } else { + return 0; + } + } + return utf32_output - start; +} + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that + // they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf32_output++ = uint8_t(data[pos]); + pos++; + } + continue; + } + } + } + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00011111) << 6 | + (uint8_t(data[pos + 1]) & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return result(error_code::OVERLONG, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (uint8_t(data[pos + 1]) & 0b00111111) << 6 | + (uint8_t(data[pos + 2]) & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 2]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((uint8_t(data[pos + 3]) & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (uint8_t(data[pos + 1]) & 0b00111111) << 12 | + (uint8_t(data[pos + 2]) & 0b00111111) << 6 | + (uint8_t(data[pos + 3]) & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 4; + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + } + return result(error_code::SUCCESS, utf32_output - start); +} + +/** + * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and + * we have up to len input bytes left, and we encountered some error. It is + * possible that the error is at 'buf' exactly, but it could also be in the + * previous bytes location (up to 3 bytes back). + * + * prior_bytes indicates how many bytes, prior to 'buf' may belong to the + * current memory section and can be safely accessed. We prior_bytes to access + * safely up to three bytes before 'buf'. + * + * The caller is responsible to ensure that len > 0. + * + * If the error is believed to have occurred prior to 'buf', the count value + * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. + */ +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char32_t *utf32_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + size_t how_far_back = 3; // 3 bytes in the past + current position + if (how_far_back > prior_bytes) { + how_far_back = prior_bytes; + } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + + result res = convert_with_errors(buf, len + extra_len, utf32_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h */ +/* begin file include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H +#define SIMDUTF_VALID_UTF8_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf32 { + +template +#if SIMDUTF_CPLUSPLUS20 + requires simdutf::detail::indexes_into_byte_like +#endif +simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { +#if SIMDUTF_CPLUSPLUS23 + if !consteval +#endif + { + // try to convert the next block of 8 ASCII bytes + if (pos + 8 <= len) { // if it is safe to read 8 more bytes, check that + // they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 8; + while (pos < final_pos) { + *utf32_output++ = uint8_t(data[pos]); + pos++; + } + continue; + } + } + } + auto leading_byte = uint8_t(data[pos]); // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + break; + } // minimal bound checking + *utf32_output++ = char32_t(((leading_byte & 0b00011111) << 6) | + (uint8_t(data[pos + 1]) & 0b00111111)); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + break; + } // minimal bound checking + *utf32_output++ = char32_t(((leading_byte & 0b00001111) << 12) | + ((uint8_t(data[pos + 1]) & 0b00111111) << 6) | + (uint8_t(data[pos + 2]) & 0b00111111)); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + break; + } // minimal bound checking + uint32_t code_word = ((leading_byte & 0b00000111) << 18) | + ((uint8_t(data[pos + 1]) & 0b00111111) << 12) | + ((uint8_t(data[pos + 2]) & 0b00111111) << 6) | + (uint8_t(data[pos + 3]) & 0b00111111); + *utf32_output++ = char32_t(code_word); + pos += 4; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ + +namespace simdutf { + +constexpr size_t default_line_length = + 76; ///< default line length for base64 encoding with lines + +#if SIMDUTF_FEATURE_DETECT_ENCODING +/** + * Autodetect the encoding of the input, a single encoding is recommended. + * E.g., the function might return simdutf::encoding_type::UTF8, + * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or + * simdutf::encoding_type::UTF32_LE. + * + * @param input the string to analyze. + * @param length the length of the string in bytes. + * @return the detected encoding type + */ +simdutf_warn_unused simdutf::encoding_type +autodetect_encoding(const char *input, size_t length) noexcept; +simdutf_really_inline simdutf_warn_unused simdutf::encoding_type +autodetect_encoding(const uint8_t *input, size_t length) noexcept { + return autodetect_encoding(reinterpret_cast(input), length); +} + #if SIMDUTF_SPAN +/** + * Autodetect the encoding of the input, a single encoding is recommended. + * E.g., the function might return simdutf::encoding_type::UTF8, + * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or + * simdutf::encoding_type::UTF32_LE. + * + * @param input the string to analyze. can be a anything span-like that has a + * data() and size() that points to character data: std::string, + * std::string_view, std::vector, std::span etc. + * @return the detected encoding type + */ +simdutf_really_inline simdutf_warn_unused simdutf::encoding_type +autodetect_encoding( + const detail::input_span_of_byte_like auto &input) noexcept { + return autodetect_encoding(reinterpret_cast(input.data()), + input.size()); +} + #endif // SIMDUTF_SPAN + +/** + * Autodetect the possible encodings of the input in one pass. + * E.g., if the input might be UTF-16LE or UTF-8, this function returns + * the value (simdutf::encoding_type::UTF8 | simdutf::encoding_type::UTF16_LE). + * + * Overridden by each implementation. + * + * @param input the string to analyze. + * @param length the length of the string in bytes. + * @return the detected encoding type + */ +simdutf_warn_unused int detect_encodings(const char *input, + size_t length) noexcept; +simdutf_really_inline simdutf_warn_unused int +detect_encodings(const uint8_t *input, size_t length) noexcept { + return detect_encodings(reinterpret_cast(input), length); +} + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused int +detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept { + return detect_encodings(reinterpret_cast(input.data()), + input.size()); +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/** + * Validate the UTF-8 string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf8_with_errors. + * + * Overridden by each implementation. + * + * @param buf the UTF-8 string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ +simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_constexpr23 simdutf_really_inline simdutf_warn_unused bool +validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::validate( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_utf8(reinterpret_cast(input.data()), + input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +/** + * Validate the UTF-8 string and stop on error. + * + * Overridden by each implementation. + * + * @param buf the UTF-8 string to validate. + * @param len the length of the string in bytes. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated if + * successful. + */ +simdutf_warn_unused result validate_utf8_with_errors(const char *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result +validate_utf8_with_errors( + const detail::input_span_of_byte_like auto &input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::validate_with_errors( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_utf8_with_errors( + reinterpret_cast(input.data()), input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +/** + * Validate the ASCII string. + * + * Overridden by each implementation. + * + * @param buf the ASCII string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ +simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool +validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::ascii::validate( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_ascii(reinterpret_cast(input.data()), + input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Validate the ASCII string and stop on error. It might be faster than + * validate_utf8 when an error is expected to occur early. + * + * Overridden by each implementation. + * + * @param buf the ASCII string to validate. + * @param len the length of the string in bytes. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated if + * successful. + */ +simdutf_warn_unused result validate_ascii_with_errors(const char *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +validate_ascii_with_errors( + const detail::input_span_of_byte_like auto &input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::ascii::validate_with_errors( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_ascii_with_errors( + reinterpret_cast(input.data()), input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +/** + * Validate the ASCII string as a UTF-16 sequence. + * An UTF-16 sequence is considered an ASCII sequence + * if it could be converted to an ASCII string losslessly. + * + * Overridden by each implementation. + * + * @param buf the UTF-16 string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ +simdutf_warn_unused bool validate_utf16_as_ascii(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool +validate_utf16_as_ascii(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_as_ascii(input.data(), + input.size()); + } else + #endif + { + return validate_utf16_as_ascii(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Validate the ASCII string as a UTF-16BE sequence. + * An UTF-16 sequence is considered an ASCII sequence + * if it could be converted to an ASCII string losslessly. + * + * Overridden by each implementation. + * + * @param buf the UTF-16BE string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ +simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool +validate_utf16be_as_ascii(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_as_ascii(input.data(), + input.size()); + } else + #endif + { + return validate_utf16be_as_ascii(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Validate the ASCII string as a UTF-16LE sequence. + * An UTF-16 sequence is considered an ASCII sequence + * if it could be converted to an ASCII string losslessly. + * + * Overridden by each implementation. + * + * @param buf the UTF-16LE string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ +simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool +validate_utf16le_as_ascii(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_as_ascii(input.data(), + input.size()); + } else + #endif + { + return validate_utf16le_as_ascii(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 +/** + * Using native endianness; Validate the UTF-16 string. + * This function may be best when you expect the input to be almost always + * valid. Otherwise, consider using validate_utf16_with_errors. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16 string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return true if and only if the string is valid UTF-16. + */ +simdutf_warn_unused bool validate_utf16(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool +validate_utf16(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate(input.data(), + input.size()); + } else + #endif + { + return validate_utf16(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/** + * Validate the UTF-16LE string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16le_with_errors. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16LE string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return true if and only if the string is valid UTF-16LE. + */ +simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused bool +validate_utf16le(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate(input.data(), + input.size()); + } else + #endif + { + return validate_utf16le(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +/** + * Validate the UTF-16BE string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16be_with_errors. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16BE string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return true if and only if the string is valid UTF-16BE. + */ +simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool +validate_utf16be(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate(input.data(), input.size()); + } else + #endif + { + return validate_utf16be(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness; Validate the UTF-16 string and stop on error. + * It might be faster than validate_utf16 when an error is expected to occur + * early. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16 string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated if + * successful. + */ +simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +validate_utf16_with_errors(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_with_errors( + input.data(), input.size()); + } else + #endif + { + return validate_utf16_with_errors(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Validate the UTF-16LE string and stop on error. It might be faster than + * validate_utf16le when an error is expected to occur early. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16LE string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated if + * successful. + */ +simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +validate_utf16le_with_errors(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_with_errors( + input.data(), input.size()); + } else + #endif + { + return validate_utf16le_with_errors(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Validate the UTF-16BE string and stop on error. It might be faster than + * validate_utf16be when an error is expected to occur early. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16BE string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated if + * successful. + */ +simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +validate_utf16be_with_errors(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::validate_with_errors(input.data(), + input.size()); + } else + #endif + { + return validate_utf16be_with_errors(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Fixes an ill-formed UTF-16LE string by replacing mismatched surrogates with + * the Unicode replacement character U+FFFD. If input and output points to + * different memory areas, the procedure copies string, and it's expected that + * output memory is at least as big as the input. It's also possible to set + * input equal output, that makes replacements an in-place operation. + * + * @param input the UTF-16LE string to correct. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @param output the output buffer. + */ +void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 void +to_well_formed_utf16le(std::span input, + std::span output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + scalar::utf16::to_well_formed_utf16( + input.data(), input.size(), output.data()); + } else + #endif + { + to_well_formed_utf16le(input.data(), input.size(), output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Fixes an ill-formed UTF-16BE string by replacing mismatched surrogates with + * the Unicode replacement character U+FFFD. If input and output points to + * different memory areas, the procedure copies string, and it's expected that + * output memory is at least as big as the input. It's also possible to set + * input equal output, that makes replacements an in-place operation. + * + * @param input the UTF-16BE string to correct. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @param output the output buffer. + */ +void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 void +to_well_formed_utf16be(std::span input, + std::span output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + scalar::utf16::to_well_formed_utf16( + input.data(), input.size(), output.data()); + } else + #endif + { + to_well_formed_utf16be(input.data(), input.size(), output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Fixes an ill-formed UTF-16 string by replacing mismatched surrogates with the + * Unicode replacement character U+FFFD. If input and output points to different + * memory areas, the procedure copies string, and it's expected that output + * memory is at least as big as the input. It's also possible to set input equal + * output, that makes replacements an in-place operation. + * + * @param input the UTF-16 string to correct. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @param output the output buffer. + */ +void to_well_formed_utf16(const char16_t *input, size_t len, + char16_t *output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 void +to_well_formed_utf16(std::span input, + std::span output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + scalar::utf16::to_well_formed_utf16( + input.data(), input.size(), output.data()); + } else + #endif + { + to_well_formed_utf16(input.data(), input.size(), output.data()); + } +} + #endif // SIMDUTF_SPAN + +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/** + * Validate the UTF-32 string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf32_with_errors. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-32 string to validate. + * @param len the length of the string in number of 4-byte code units + * (char32_t). + * @return true if and only if the string is valid UTF-32. + */ +simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool +validate_utf32(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32::validate( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_utf32(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +/** + * Validate the UTF-32 string and stop on error. It might be faster than + * validate_utf32 when an error is expected to occur early. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-32 string to validate. + * @param len the length of the string in number of 4-byte code units + * (char32_t). + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated if + * successful. + */ +simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, + size_t len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +validate_utf32_with_errors(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32::validate_with_errors( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return validate_utf32_with_errors(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/** + * Convert Latin1 string into UTF-8 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf8_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, + size_t length, + char *utf8_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf8( + const detail::input_span_of_byte_like auto &latin1_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf8::convert( + detail::constexpr_cast_ptr(latin1_input.data()), + latin1_input.size(), + detail::constexpr_cast_writeptr(utf8_output.data())); + } else + #endif + { + return convert_latin1_to_utf8( + reinterpret_cast(latin1_input.data()), + latin1_input.size(), reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert Latin1 string into UTF-8 string with output limit. + * + * This function is suitable to work with inputs from untrusted sources. + * + * We write as many characters as possible. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf8_output the pointer to buffer that can hold conversion result + * @param utf8_len the maximum output length + * @return the number of written char; 0 if conversion is not possible + */ +simdutf_warn_unused size_t +convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output, + size_t utf8_len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf8_safe( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + // implementation note: outputspan is a forwarding ref to avoid copying + // and allow both lvalues and rvalues. std::span can be copied without + // problems, but std::vector should not, and this function should accept + // both. it will allow using an owning rvalue ref (example: passing a + // temporary std::string) as output, but the user will quickly find out + // that he has no way of getting the data out of the object in that case. + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf8::convert_safe_constexpr( + input.data(), input.size(), utf8_output.data(), utf8_output.size()); + } else + #endif + { + return convert_latin1_to_utf8_safe( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(utf8_output.data()), utf8_output.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/** + * Convert possibly Latin1 string into UTF-16LE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf16le( + const detail::input_span_of_byte_like auto &latin1_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf16::convert( + latin1_input.data(), latin1_input.size(), utf16_output.data()); + } else + #endif + { + return convert_latin1_to_utf16le( + reinterpret_cast(latin1_input.data()), + latin1_input.size(), utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert Latin1 string into UTF-16BE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input, + std::span output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf16::convert( + input.data(), input.size(), output.data()); + } else + #endif + { + return convert_latin1_to_utf16be( + reinterpret_cast(input.data()), input.size(), + output.data()); + } +} + #endif // SIMDUTF_SPAN +/** + * Compute the number of bytes that this UTF-16 string would require in Latin1 + * format. + * + * @param length the length of the string in Latin1 code units (char) + * @return the length of the string in Latin1 code units (char) required to + * encode the UTF-16 string as Latin1 + */ +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +latin1_length_from_utf16(size_t length) noexcept { + return length; +} + +/** + * Compute the number of code units that this Latin1 string would require in + * UTF-16 format. + * + * @param length the length of the string in Latin1 code units (char) + * @return the length of the string in 2-byte code units (char16_t) required to + * encode the Latin1 string as UTF-16 + */ +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf16_length_from_latin1(size_t length) noexcept { + return length; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/** + * Convert Latin1 string into UTF-32 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf32( + const detail::input_span_of_byte_like auto &latin1_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf32::convert( + latin1_input.data(), latin1_input.size(), utf32_output.data()); + } else + #endif + { + return convert_latin1_to_utf32( + reinterpret_cast(latin1_input.data()), + latin1_input.size(), utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/** + * Convert possibly broken UTF-8 string into latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + * or if it cannot be represented as Latin1 + */ +simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, + size_t length, + char *latin1_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf8_to_latin1( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_latin1::convert(input.data(), input.size(), + output.data()); + } else + #endif + { + return convert_utf8_to_latin1(reinterpret_cast(input.data()), + input.size(), + reinterpret_cast(output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Using native endianness, convert possibly broken UTF-8 string into a UTF-16 + * string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 + * string + */ +simdutf_warn_unused size_t convert_utf8_to_utf16( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input, + std::span output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert( + input.data(), input.size(), output.data()); + } else + #endif + { + return convert_utf8_to_utf16(reinterpret_cast(input.data()), + input.size(), output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Compute the number of bytes that this UTF-16LE string would require in UTF-8 + * format even when the UTF-16LE content contains mismatched surrogates + * that have to be replaced by the replacement character (0xFFFD). + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) where the count is the number of bytes required to + * encode the UTF-16LE string as UTF-8, and the error code is either SUCCESS or + * SURROGATE. The count is correct regardless of the error field. + * When SURROGATE is returned, it does not indicate an error in the case of this + * function: it indicates that at least one surrogate has been encountered: the + * surrogates may be matched or not (thus this function does not validate). If + * the returned error code is SUCCESS, then the input contains no surrogate, is + * in the Basic Multilingual Plane, and is necessarily valid. + */ +simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result +utf8_length_from_utf16le_with_replacement( + std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::LITTLE>(valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16le_with_replacement(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Compute the number of bytes that this UTF-16BE string would require in UTF-8 + * format even when the UTF-16BE content contains mismatched surrogates + * that have to be replaced by the replacement character (0xFFFD). + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) where the count is the number of bytes required to + * encode the UTF-16BE string as UTF-8, and the error code is either SUCCESS or + * SURROGATE. The count is correct regardless of the error field. + * When SURROGATE is returned, it does not indicate an error in the case of this + * function: it indicates that at least one surrogate has been encountered: the + * surrogates may be matched or not (thus this function does not validate). If + * the returned error code is SUCCESS, then the input contains no surrogate, is + * in the Basic Multilingual Plane, and is necessarily valid. + */ +simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +utf8_length_from_utf16be_with_replacement( + std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::BIG>(valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16be_with_replacement(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/** + * Using native endianness, convert a Latin1 string into a UTF-16 string. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t. + */ +simdutf_warn_unused size_t convert_latin1_to_utf16( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input, + std::span output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf16::convert( + input.data(), input.size(), output.data()); + } else + #endif + { + return convert_latin1_to_utf16(reinterpret_cast(input.data()), + input.size(), output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Convert possibly broken UTF-8 string into UTF-16LE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 + * string + */ +simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16le( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-8 string into UTF-16BE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 + * string + */ +simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16be( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/** + * Convert possibly broken UTF-8 string into latin1 string with errors. + * If the string cannot be represented as Latin1, an error + * code is returned. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated if + * successful. + */ +simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *input, size_t length, char *latin1_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf8_to_latin1_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_latin1::convert_with_errors( + utf8_input.data(), utf8_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf8_to_latin1_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Using native endianness, convert possibly broken UTF-8 string into UTF-16 + * string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char16_t written if + * successful. + */ +simdutf_warn_unused result convert_utf8_to_utf16_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf8_to_utf16_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_with_errors( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char16_t written if + * successful. + */ +simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf8_to_utf16le_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_with_errors( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16le_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char16_t written if + * successful. + */ +simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf8_to_utf16be_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_with_errors( + utf8_input.data(), utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf8_to_utf16be_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/** + * Convert possibly broken UTF-8 string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t; 0 if the input was not valid UTF-8 + * string + */ +simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *input, size_t length, char32_t *utf32_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf32::convert(utf8_input.data(), utf8_input.size(), + utf32_output.data()); + } else + #endif + { + return convert_utf8_to_utf32( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char32_t written if + * successful. + */ +simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *input, size_t length, char32_t *utf32_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf8_to_utf32_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf32::convert_with_errors( + utf8_input.data(), utf8_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf8_to_utf32_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/** + * Convert valid UTF-8 string into latin1 string. + * + * This function assumes that the input string is valid UTF-8 and that it can be + * represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf8_to_latin1 instead. The function may be removed from the library + * in the future. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ +simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *input, size_t length, char *latin1_output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_latin1( + const detail::input_span_of_byte_like auto &valid_utf8_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_latin1::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), latin1_output.data()); + } else + #endif + { + return convert_valid_utf8_to_latin1( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), latin1_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Using native endianness, convert valid UTF-8 string into a UTF-16 string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t + */ +simdutf_warn_unused size_t convert_valid_utf8_to_utf16( + const char *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_utf16( + const detail::input_span_of_byte_like auto &valid_utf8_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_valid_utf8_to_utf16( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-8 string into UTF-16LE string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t + */ +simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_utf16le( + const detail::input_span_of_byte_like auto &valid_utf8_input, + std::span utf16_output) noexcept { + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_valid_utf8_to_utf16le( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-8 string into UTF-16BE string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t + */ +simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_utf16be( + const detail::input_span_of_byte_like auto &valid_utf8_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf16::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data()); + } else + #endif + { + return convert_valid_utf8_to_utf16be( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/** + * Convert valid UTF-8 string into UTF-32 string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t + */ +simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf8_to_utf32( + const detail::input_span_of_byte_like auto &valid_utf8_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8_to_utf32::convert_valid( + valid_utf8_input.data(), valid_utf8_input.size(), utf32_output.data()); + } else + #endif + { + return convert_valid_utf8_to_utf32( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/** + * Return the number of bytes that this Latin1 string would require in UTF-8 + * format. + * + * @param input the Latin1 string to convert + * @param length the length of the string bytes + * @return the number of bytes required to encode the Latin1 string as UTF-8 + */ +simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf8_length_from_latin1( + const detail::input_span_of_byte_like auto &latin1_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::latin1_to_utf8::utf8_length_from_latin1(latin1_input.data(), + latin1_input.size()); + } else + #endif + { + return utf8_length_from_latin1( + reinterpret_cast(latin1_input.data()), + latin1_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Compute the number of bytes that this UTF-8 string would require in Latin1 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-8 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in byte + * @return the number of bytes required to encode the UTF-8 string as Latin1 + */ +simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +latin1_length_from_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::count_code_points(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return latin1_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Compute the number of 2-byte code units that this UTF-8 string would require + * in UTF-16LE format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-8 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return the number of char16_t code units required to encode the UTF-8 string + * as UTF-16LE + */ +simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf16_length_from_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::utf16_length_from_utf8(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return utf16_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/** + * Compute the number of 4-byte code units that this UTF-8 string would require + * in UTF-32 format. + * + * This function is equivalent to count_utf8 + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-8 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return the number of char32_t code units required to encode the UTF-8 string + * as UTF-32 + */ +simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf32_length_from_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::count_code_points(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return utf32_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Using native endianness, convert possibly broken UTF-16 string into UTF-8 + * string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string + */ +simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input, + size_t length, + char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16_to_utf8( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness, convert possibly broken UTF-16 string into UTF-8 + * string with output limit. + * + * We write as many characters as possible into the output buffer, + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 16-bit code units (char16_t) + * @param utf8_output the pointer to buffer that can hold conversion result + * @param utf8_len the maximum output length + * @return the number of written char; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_utf16_to_utf8_safe(const char16_t *input, + size_t length, + char *utf8_output, + size_t utf8_len) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16_to_utf8_safe( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + // implementation note: outputspan is a forwarding ref to avoid copying + // and allow both lvalues and rvalues. std::span can be copied without + // problems, but std::vector should not, and this function should accept + // both. it will allow using an owning rvalue ref (example: passing a + // temporary std::string) as output, but the user will quickly find out + // that he has no way of getting the data out of the object in that case. + #if SIMDUTF_CPLUSPLUS23 + if consteval { + const full_result r = + scalar::utf16_to_utf8::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf8_output.data(), + utf8_output.size()); + if (r.error != error_code::SUCCESS && + r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) { + return 0; + } + return r.output_count; + } else + #endif + { + return convert_utf16_to_utf8_safe( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data()), utf8_output.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/** + * Using native endianness, convert possibly broken UTF-16 string into Latin1 + * string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16 string + * or if it cannot be represented as Latin1 + */ +simdutf_warn_unused size_t convert_utf16_to_latin1( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16_to_latin1( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16LE string into Latin1 string. + * If the string cannot be represented as Latin1, an error + * is returned. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string or if it cannot be represented as Latin1 + */ +simdutf_warn_unused size_t convert_utf16le_to_latin1( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16le_to_latin1( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16le_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16BE + * string or if it cannot be represented as Latin1 + */ +simdutf_warn_unused size_t convert_utf16be_to_latin1( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16be_to_latin1( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16be_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Convert possibly broken UTF-16LE string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string + */ +simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, + size_t length, + char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16le_to_utf8( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16le_to_utf8( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16BE string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string + */ +simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, + size_t length, + char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16be_to_utf8( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16be_to_utf8( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/** + * Using native endianness, convert possibly broken UTF-16 string into Latin1 + * string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ +simdutf_warn_unused result convert_utf16_to_latin1_with_errors( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16_to_latin1_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_with_errors( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ +simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16le_to_latin1_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_with_errors( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16le_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16BE string into Latin1 string. + * If the string cannot be represented as Latin1, an error + * is returned. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ +simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16be_to_latin1_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_with_errors( + utf16_input.data(), utf16_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf16be_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Using native endianness, convert possibly broken UTF-16 string into UTF-8 + * string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ +simdutf_warn_unused result convert_utf16_to_utf8_with_errors( + const char16_t *input, size_t length, char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16_to_utf8_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ +simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *input, size_t length, char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16le_to_utf8_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16le_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ +simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *input, size_t length, char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16be_to_utf8_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf16be_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness, convert valid UTF-16 string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-16. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16_to_utf8( + const char16_t *input, size_t length, char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16_to_utf8( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_valid_utf16_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/** + * Using native endianness, convert UTF-16 string into Latin1 string. + * + * This function assumes that the input string is valid UTF-16 and that it can + * be represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf16_to_latin1 instead. The function may be removed from the library + * in the future. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16_to_latin1( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16_to_latin1( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_valid_impl( + detail::constexpr_cast_ptr(valid_utf16_input.data()), + valid_utf16_input.size(), + detail::constexpr_cast_writeptr(latin1_output.data())); + } else + #endif + { + return convert_valid_utf16_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-16LE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-16LE and that it can + * be represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf16le_to_latin1 instead. The function may be removed from the + * library in the future. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t +convert_valid_utf16le_to_latin1( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_valid_impl( + detail::constexpr_cast_ptr(valid_utf16_input.data()), + valid_utf16_input.size(), + detail::constexpr_cast_writeptr(latin1_output.data())); + } else + #endif + { + return convert_valid_utf16le_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-16BE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-16BE and that it can + * be represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf16be_to_latin1 instead. The function may be removed from the + * library in the future. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t +convert_valid_utf16be_to_latin1( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_latin1::convert_valid_impl( + detail::constexpr_cast_ptr(valid_utf16_input.data()), + valid_utf16_input.size(), + detail::constexpr_cast_writeptr(latin1_output.data())); + } else + #endif + { + return convert_valid_utf16be_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Convert valid UTF-16LE string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-16LE + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *input, size_t length, char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16le_to_utf8( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_valid_utf16le_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-16BE string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-16BE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf8_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *input, size_t length, char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16be_to_utf8( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf8::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data()); + } else + #endif + { + return convert_valid_utf16be_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/** + * Using native endianness, convert possibly broken UTF-16 string into UTF-32 + * string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string + */ +simdutf_warn_unused size_t convert_utf16_to_utf32( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16_to_utf32(std::span utf16_input, + std::span utf32_output) noexcept { + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16LE string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string + */ +simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16le_to_utf32(std::span utf16_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16BE string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string + */ +simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf16be_to_utf32(std::span utf16_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness, convert possibly broken UTF-16 string into + * UTF-32 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char32_t written if + * successful. + */ +simdutf_warn_unused result convert_utf16_to_utf32_with_errors( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16_to_utf32_with_errors(std::span utf16_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char32_t written if + * successful. + */ +simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16le_to_utf32_with_errors( + std::span utf16_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16le_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char32_t written if + * successful. + */ +simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf16be_to_utf32_with_errors( + std::span utf16_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } else + #endif + { + return convert_utf16be_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness, convert valid UTF-16 string into UTF-32 string. + * + * This function assumes that the input string is valid UTF-16 (native + * endianness). + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16_to_utf32( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16_to_utf32(std::span valid_utf16_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), + utf32_output.data()); + } else + #endif + { + return convert_valid_utf16_to_utf32(valid_utf16_input.data(), + valid_utf16_input.size(), + utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-16LE string into UTF-32 string. + * + * This function assumes that the input string is valid UTF-16LE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16le_to_utf32(std::span valid_utf16_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), + utf32_output.data()); + } else + #endif + { + return convert_valid_utf16le_to_utf32(valid_utf16_input.data(), + valid_utf16_input.size(), + utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-16BE string into UTF-32 string. + * + * This function assumes that the input string is valid UTF-16LE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @param utf32_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf16be_to_utf32(std::span valid_utf16_input, + std::span utf32_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16_to_utf32::convert_valid( + valid_utf16_input.data(), valid_utf16_input.size(), + utf32_output.data()); + } else + #endif + { + return convert_valid_utf16be_to_utf32(valid_utf16_input.data(), + valid_utf16_input.size(), + utf32_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/** + * Using native endianness; Compute the number of bytes that this UTF-16 + * string would require in UTF-8 format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as UTF-8 + */ +simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf8_length_from_utf16(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness; compute the number of bytes that this UTF-16 + * string would require in UTF-8 format even when the UTF-16LE content contains + * mismatched surrogates that have to be replaced by the replacement character + * (0xFFFD). + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) where the count is the number of bytes required to + * encode the UTF-16 string as UTF-8, and the error code is either SUCCESS or + * SURROGATE. The count is correct regardless of the error field. + * When SURROGATE is returned, it does not indicate an error in the case of this + * function: it indicates that at least one surrogate has been encountered: the + * surrogates may be matched or not (thus this function does not validate). If + * the returned error code is SUCCESS, then the input contains no surrogate, is + * in the Basic Multilingual Plane, and is necessarily valid. + */ +simdutf_warn_unused result utf8_length_from_utf16_with_replacement( + const char16_t *input, size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +utf8_length_from_utf16_with_replacement( + std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::NATIVE>(valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16_with_replacement(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Compute the number of bytes that this UTF-16LE string would require in UTF-8 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as UTF-8 + */ +simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t +utf8_length_from_utf16le(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Compute the number of bytes that this UTF-16BE string would require in UTF-8 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return the number of bytes required to encode the UTF-16BE string as UTF-8 + */ +simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf8_length_from_utf16be(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf8_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf8_length_from_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/** + * Convert possibly broken UTF-32 string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-32 string + */ +simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, + size_t length, + char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf32_to_utf8( + std::span utf32_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf8::convert( + utf32_input.data(), utf32_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ +simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *input, size_t length, char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf32_to_utf8_with_errors( + std::span utf32_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf8::convert_with_errors( + utf32_input.data(), utf32_input.size(), utf8_output.data()); + } else + #endif + { + return convert_utf32_to_utf8_with_errors( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-32 string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf8_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *input, size_t length, char *utf8_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf32_to_utf8( + std::span valid_utf32_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf8::convert_valid( + valid_utf32_input.data(), valid_utf32_input.size(), utf8_output.data()); + } else + #endif + { + return convert_valid_utf32_to_utf8( + valid_utf32_input.data(), valid_utf32_input.size(), + reinterpret_cast(utf8_output.data())); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/** + * Using native endianness, convert possibly broken UTF-32 string into a UTF-16 + * string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-32 string + */ +simdutf_warn_unused size_t convert_utf32_to_utf16( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf32_to_utf16(std::span utf32_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-32 string into UTF-16LE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-32 string + */ +simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf32_to_utf16le(std::span utf32_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/** + * Convert possibly broken UTF-32 string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-32 string + * or if it cannot be represented as Latin1 + */ +simdutf_warn_unused size_t convert_utf32_to_latin1( + const char32_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf32_to_latin1( + std::span utf32_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_latin1::convert( + utf32_input.data(), utf32_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf32_to_latin1( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-32 string into Latin1 string and stop on error. + * If the string cannot be represented as Latin1, an error is returned. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ +simdutf_warn_unused result convert_utf32_to_latin1_with_errors( + const char32_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf32_to_latin1_with_errors( + std::span utf32_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_latin1::convert_with_errors( + utf32_input.data(), utf32_input.size(), latin1_output.data()); + } else + #endif + { + return convert_utf32_to_latin1_with_errors( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-32 string into Latin1 string. + * + * This function assumes that the input string is valid UTF-32 and that it can + * be represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf32_to_latin1 instead. The function may be removed from the library + * in the future. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param latin1_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + const char32_t *input, size_t length, char *latin1_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t +convert_valid_utf32_to_latin1( + std::span valid_utf32_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_latin1::convert_valid( + detail::constexpr_cast_ptr(valid_utf32_input.data()), + valid_utf32_input.size(), + detail::constexpr_cast_writeptr(latin1_output.data())); + } + #endif + { + return convert_valid_utf32_to_latin1( + valid_utf32_input.data(), valid_utf32_input.size(), + reinterpret_cast(latin1_output.data())); + } +} + #endif // SIMDUTF_SPAN + +/** + * Compute the number of bytes that this UTF-32 string would require in Latin1 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-32 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param length the length of the string in 4-byte code units (char32_t) + * @return the number of bytes required to encode the UTF-32 string as Latin1 + */ +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t +latin1_length_from_utf32(size_t length) noexcept { + return length; +} + +/** + * Compute the number of bytes that this Latin1 string would require in UTF-32 + * format. + * + * @param length the length of the string in Latin1 code units (char) + * @return the length of the string in 4-byte code units (char32_t) required to + * encode the Latin1 string as UTF-32 + */ +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t +utf32_length_from_latin1(size_t length) noexcept { + return length; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/** + * Convert possibly broken UTF-32 string into UTF-16BE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-32 string + */ +simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_utf32_to_utf16be(std::span utf32_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness, convert possibly broken UTF-32 string into UTF-16 + * string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char16_t written if + * successful. + */ +simdutf_warn_unused result convert_utf32_to_utf16_with_errors( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf32_to_utf16_with_errors(std::span utf32_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char16_t written if + * successful. + */ +simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf32_to_utf16le_with_errors( + std::span utf32_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16le_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char16_t written if + * successful. + */ +simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +convert_utf32_to_utf16be_with_errors( + std::span utf32_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } else + #endif + { + return convert_utf32_to_utf16be_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness, convert valid UTF-32 string into a UTF-16 string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf32_to_utf16( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf32_to_utf16(std::span valid_utf32_input, + std::span utf16_output) noexcept { + + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_valid( + valid_utf32_input.data(), valid_utf32_input.size(), + utf16_output.data()); + } else + #endif + { + return convert_valid_utf32_to_utf16(valid_utf32_input.data(), + valid_utf32_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-32 string into UTF-16LE string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf32_to_utf16le(std::span valid_utf32_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_valid( + valid_utf32_input.data(), valid_utf32_input.size(), + utf16_output.data()); + } else + #endif + { + return convert_valid_utf32_to_utf16le(valid_utf32_input.data(), + valid_utf32_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert valid UTF-32 string into UTF-16BE string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @param utf16_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ +simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +convert_valid_utf32_to_utf16be(std::span valid_utf32_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32_to_utf16::convert_valid( + valid_utf32_input.data(), valid_utf32_input.size(), + utf16_output.data()); + } else + #endif + { + return convert_valid_utf32_to_utf16be(valid_utf32_input.data(), + valid_utf32_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +/** + * Change the endianness of the input. Can be used to go from UTF-16LE to + * UTF-16BE or from UTF-16BE to UTF-16LE. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to process + * @param length the length of the string in 2-byte code units (char16_t) + * @param output the pointer to a buffer that can hold the conversion + * result + */ +void change_endianness_utf16(const char16_t *input, size_t length, + char16_t *output) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_constexpr23 void +change_endianness_utf16(std::span utf16_input, + std::span utf16_output) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::change_endianness_utf16( + utf16_input.data(), utf16_input.size(), utf16_output.data()); + } else + #endif + { + return change_endianness_utf16(utf16_input.data(), utf16_input.size(), + utf16_output.data()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/** + * Compute the number of bytes that this UTF-32 string would require in UTF-8 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-32 strings but in such cases the result is implementation defined. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @return the number of bytes required to encode the UTF-32 string as UTF-8 + */ +simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf8_length_from_utf32(std::span valid_utf32_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32::utf8_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); + } else + #endif + { + return utf8_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/** + * Compute the number of two-byte code units that this UTF-32 string would + * require in UTF-16 format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-32 strings but in such cases the result is implementation defined. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units (char32_t) + * @return the number of bytes required to encode the UTF-32 string as UTF-16 + */ +simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf16_length_from_utf32(std::span valid_utf32_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf32::utf16_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); + } else + #endif + { + return utf16_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Using native endianness; Compute the number of bytes that this UTF-16 + * string would require in UTF-32 format. + * + * This function is equivalent to count_utf16. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as UTF-32 + */ +simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf32_length_from_utf16(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf32_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf32_length_from_utf16(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Compute the number of bytes that this UTF-16LE string would require in UTF-32 + * format. + * + * This function is equivalent to count_utf16le. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as UTF-32 + */ +simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf32_length_from_utf16le( + std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf32_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf32_length_from_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Compute the number of bytes that this UTF-16BE string would require in UTF-32 + * format. + * + * This function is equivalent to count_utf16be. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units (char16_t) + * @return the number of bytes required to encode the UTF-16BE string as UTF-32 + */ +simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +utf32_length_from_utf16be( + std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::utf32_length_from_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return utf32_length_from_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +/** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-16 (native + * endianness). It is acceptable to pass invalid UTF-16 strings but in such + * cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to process + * @param length the length of the string in 2-byte code units (char16_t) + * @return number of code points + */ +simdutf_warn_unused size_t count_utf16(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +count_utf16(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::count_code_points( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return count_utf16(valid_utf16_input.data(), valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-16LE. + * It is acceptable to pass invalid UTF-16 strings but in such cases + * the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to process + * @param length the length of the string in 2-byte code units (char16_t) + * @return number of code points + */ +simdutf_warn_unused size_t count_utf16le(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +count_utf16le(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::count_code_points( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-16BE. + * It is acceptable to pass invalid UTF-16 strings but in such cases + * the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to process + * @param length the length of the string in 2-byte code units (char16_t) + * @return number of code points + */ +simdutf_warn_unused size_t count_utf16be(const char16_t *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +count_utf16be(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::count_code_points( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +/** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-8. + * It is acceptable to pass invalid UTF-8 strings but in such cases + * the result is implementation defined. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return number of code points + */ +simdutf_warn_unused size_t count_utf8(const char *input, + size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t count_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::count_code_points(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return count_utf8(reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Given a valid UTF-8 string having a possibly truncated last character, + * this function checks the end of string. If the last character is truncated + * (or partial), then it returns a shorter length (shorter by 1 to 3 bytes) so + * that the short UTF-8 strings only contain complete characters. If there is no + * truncated character, the original length is returned. + * + * This function assumes that the input string is valid UTF-8, but possibly + * truncated. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return the length of the string in bytes, possibly shorter by 1 to 3 bytes + */ +simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length); + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf8::trim_partial_utf8(valid_utf8_input.data(), + valid_utf8_input.size()); + } else + #endif + { + return trim_partial_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 +/** + * Given a valid UTF-16BE string having a possibly truncated last character, + * this function checks the end of string. If the last character is truncated + * (or partial), then it returns a shorter length (shorter by 1 unit) so that + * the short UTF-16BE strings only contain complete characters. If there is no + * truncated character, the original length is returned. + * + * This function assumes that the input string is valid UTF-16BE, but possibly + * truncated. + * + * @param input the UTF-16BE string to process + * @param length the length of the string in bytes + * @return the length of the string in bytes, possibly shorter by 1 unit + */ +simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, + size_t length); + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf16be(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::trim_partial_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return trim_partial_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Given a valid UTF-16LE string having a possibly truncated last character, + * this function checks the end of string. If the last character is truncated + * (or partial), then it returns a shorter length (shorter by 1 unit) so that + * the short UTF-16LE strings only contain complete characters. If there is no + * truncated character, the original length is returned. + * + * This function assumes that the input string is valid UTF-16LE, but possibly + * truncated. + * + * @param input the UTF-16LE string to process + * @param length the length of the string in bytes + * @return the length of the string in unit, possibly shorter by 1 unit + */ +simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, + size_t length); + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf16le(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::trim_partial_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return trim_partial_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Given a valid UTF-16 string having a possibly truncated last character, + * this function checks the end of string. If the last character is truncated + * (or partial), then it returns a shorter length (shorter by 1 unit) so that + * the short UTF-16 strings only contain complete characters. If there is no + * truncated character, the original length is returned. + * + * This function assumes that the input string is valid UTF-16, but possibly + * truncated. We use the native endianness. + * + * @param input the UTF-16 string to process + * @param length the length of the string in bytes + * @return the length of the string in unit, possibly shorter by 1 unit + */ +simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, + size_t length); + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +trim_partial_utf16(std::span valid_utf16_input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::utf16::trim_partial_utf16( + valid_utf16_input.data(), valid_utf16_input.size()); + } else + #endif + { + return trim_partial_utf16(valid_utf16_input.data(), + valid_utf16_input.size()); + } +} + #endif // SIMDUTF_SPAN +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || \ + SIMDUTF_FEATURE_DETECT_ENCODING + #ifndef SIMDUTF_NEED_TRAILING_ZEROES + #define SIMDUTF_NEED_TRAILING_ZEROES 1 + #endif +#endif // SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +// base64_options are used to specify the base64 encoding options. +// ASCII spaces are ' ', '\t', '\n', '\r', '\f' +// garbage characters are characters that are not part of the base64 alphabet +// nor ASCII spaces. +constexpr uint64_t base64_reverse_padding = + 2; /* modifier for base64_default and base64_url */ +enum base64_options : uint64_t { + base64_default = 0, /* standard base64 format (with padding) */ + base64_url = 1, /* base64url format (no padding) */ + base64_default_no_padding = + base64_default | + base64_reverse_padding, /* standard base64 format without padding */ + base64_url_with_padding = + base64_url | base64_reverse_padding, /* base64url with padding */ + base64_default_accept_garbage = + 4, /* standard base64 format accepting garbage characters, the input stops + with the first '=' if any */ + base64_url_accept_garbage = + 5, /* base64url format accepting garbage characters, the input stops with + the first '=' if any */ + base64_default_or_url = + 8, /* standard/base64url hybrid format (only meaningful for decoding!) */ + base64_default_or_url_accept_garbage = + 12, /* standard/base64url hybrid format accepting garbage characters + (only meaningful for decoding!), the input stops with the first '=' + if any */ +}; + +// last_chunk_handling_options are used to specify the handling of the last +// chunk in base64 decoding. +// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 +enum last_chunk_handling_options : uint64_t { + loose = 0, /* standard base64 format, decode partial final chunk */ + strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and + unpadded, or non-zero bit padding */ + stop_before_partial = + 2, /* if the last chunk is partial, ignore it (no error) */ + only_full_chunks = + 3 /* only decode full blocks (4 base64 characters, no padding) */ +}; + +inline simdutf_constexpr23 bool +is_partial(last_chunk_handling_options options) { + return (options == stop_before_partial) || (options == only_full_chunks); +} + +namespace detail { +simdutf_warn_unused const char *find(const char *start, const char *end, + char character) noexcept; +simdutf_warn_unused const char16_t * +find(const char16_t *start, const char16_t *end, char16_t character) noexcept; +} // namespace detail + +/** + * Find the first occurrence of a character in a string. If the character is + * not found, return a pointer to the end of the string. + * @param start the start of the string + * @param end the end of the string + * @param character the character to find + * @return a pointer to the first occurrence of the character in the string, + * or a pointer to the end of the string if the character is not found. + * + */ +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char * +find(const char *start, const char *end, char character) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + for (; start != end; ++start) + if (*start == character) + return start; + return end; + } else + #endif + { + return detail::find(start, end, character); + } +} +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char16_t * +find(const char16_t *start, const char16_t *end, char16_t character) noexcept { + // implementation note: this is repeated instead of a template, to ensure + // the api is still a function and compiles without concepts + #if SIMDUTF_CPLUSPLUS23 + if consteval { + for (; start != end; ++start) + if (*start == character) + return start; + return end; + } else + #endif + { + return detail::find(start, end, character); + } +} +} + // We include base64_tables once. +/* begin file include/simdutf/base64_tables.h */ +#ifndef SIMDUTF_BASE64_TABLES_H +#define SIMDUTF_BASE64_TABLES_H +#include + +namespace simdutf { +namespace { +namespace tables { +namespace base64 { +namespace base64_default { + +constexpr char e0[256] = { + 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'D', 'D', 'D', + 'D', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', + 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 'K', 'K', 'K', 'K', 'L', + 'L', 'L', 'L', 'M', 'M', 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', + 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 'R', 'R', 'S', 'S', 'S', + 'S', 'T', 'T', 'T', 'T', 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', + 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 'Z', 'Z', 'Z', 'Z', 'a', + 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', + 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', 'h', 'h', 'h', + 'h', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', + 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 'o', 'o', 'o', 'o', 'p', + 'p', 'p', 'p', 'q', 'q', 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', + 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 'v', 'v', 'w', 'w', 'w', + 'w', 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', + '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', + '4', '4', '4', '5', '5', '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', + '8', '8', '8', '8', '9', '9', '9', '9', '+', '+', '+', '+', '/', '/', '/', + '/'}; + +constexpr char e1[256] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', + '/'}; + +constexpr char e2[256] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', + '/'}; + +constexpr uint32_t d0[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, + 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, + 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, + 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, + 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, + 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, + 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, + 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, + 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, + 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, + 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; + +constexpr uint32_t d1[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, + 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, + 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, + 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, + 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, + 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, + 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, + 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, + 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, + 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, + 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; + +constexpr uint32_t d2[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, + 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, + 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, + 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, + 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, + 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, + 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, + 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, + 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, + 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, + 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; + +constexpr uint32_t d3[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, + 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, + 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, + 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, + 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, + 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, + 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, + 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, + 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, + 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, + 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +} // namespace base64_default + +namespace base64_url { + +constexpr char e0[256] = { + 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'D', 'D', 'D', + 'D', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', + 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 'K', 'K', 'K', 'K', 'L', + 'L', 'L', 'L', 'M', 'M', 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', + 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 'R', 'R', 'S', 'S', 'S', + 'S', 'T', 'T', 'T', 'T', 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', + 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 'Z', 'Z', 'Z', 'Z', 'a', + 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', + 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', 'h', 'h', 'h', + 'h', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', + 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 'o', 'o', 'o', 'o', 'p', + 'p', 'p', 'p', 'q', 'q', 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', + 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 'v', 'v', 'w', 'w', 'w', + 'w', 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', + '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', + '4', '4', '4', '5', '5', '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', + '8', '8', '8', '8', '9', '9', '9', '9', '-', '-', '-', '-', '_', '_', '_', + '_'}; + +constexpr char e1[256] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', + '_'}; + +constexpr char e2[256] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', + '_'}; + +constexpr uint32_t d0[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, + 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, + 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, + 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, + 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, + 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, + 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, + 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, + 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, + 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, + 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, + 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d1[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, + 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, + 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, + 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, + 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, + 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, + 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, + 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, + 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, + 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, + 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, + 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d2[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, + 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, + 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, + 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, + 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, + 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, + 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, + 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, + 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, + 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, + 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, + 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d3[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, + 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, + 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, + 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, + 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, + 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, + 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, + 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, + 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, + 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, + 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, + 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +} // namespace base64_url + +namespace base64_default_or_url { +constexpr uint32_t d0[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x000000f8, 0x01ffffff, 0x000000f8, 0x01ffffff, 0x000000fc, + 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, + 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, + 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, + 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, + 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, + 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, + 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, + 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, + 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, + 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, + 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d1[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x0000e003, 0x01ffffff, 0x0000e003, 0x01ffffff, 0x0000f003, + 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, + 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, + 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, + 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, + 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, + 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, + 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, + 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, + 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, + 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, + 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d2[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x00800f00, 0x01ffffff, 0x00800f00, 0x01ffffff, 0x00c00f00, + 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, + 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, + 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, + 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, + 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, + 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, + 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, + 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, + 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, + 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, + 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +constexpr uint32_t d3[256] = { + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x003e0000, 0x01ffffff, 0x003e0000, 0x01ffffff, 0x003f0000, + 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, + 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, + 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, + 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, + 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, + 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, + 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, + 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, + 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, + 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, + 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, + 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, + 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; +} // namespace base64_default_or_url +constexpr uint64_t thintable_epi8[256] = { + 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, + 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, + 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, + 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, + 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, + 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, + 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, + 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, + 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, + 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, + 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, + 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, + 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, + 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, + 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, + 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, + 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, + 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, + 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, + 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, + 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, + 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, + 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, + 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, + 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, + 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, + 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, + 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, + 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, + 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, + 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, + 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, + 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, + 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, + 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, + 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, + 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, + 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, + 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, + 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, + 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, + 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, + 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, + 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, + 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, + 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, + 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, + 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, + 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, + 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, + 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, + 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, + 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, + 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, + 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, + 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, + 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, + 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, + 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, + 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, + 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, + 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, + 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, + 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, + 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, + 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, + 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, + 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, + 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, + 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, + 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, + 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, + 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, + 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, + 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, + 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, + 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, + 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, + 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, + 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, + 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, + 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, + 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, + 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, + 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, + 0x0000000000000000, +}; + +constexpr uint8_t pshufb_combine_table[272] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +}; + +constexpr unsigned char BitsSetTable256mul2[256] = { + 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, + 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, + 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, + 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, + 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, + 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, + 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, + 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, + 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, + 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, + 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, + 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, + 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, + 14, 10, 12, 12, 14, 12, 14, 14, 16}; + +constexpr uint8_t to_base64_value[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, + 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, + 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + +constexpr uint8_t to_base64_url_value[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 62, 255, 255, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, + 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 255, 255, 255, 255, 63, 255, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + +constexpr uint8_t to_base64_default_or_url_value[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, + 62, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, + 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 255, 255, 255, 255, 63, 255, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + +static_assert(sizeof(to_base64_value) == 256, + "to_base64_value must have 256 elements"); +static_assert(sizeof(to_base64_url_value) == 256, + "to_base64_url_value must have 256 elements"); +static_assert(to_base64_value[uint8_t(' ')] == 64, + "space must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t(' ')] == 64, + "space must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('\t')] == 64, + "tab must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('\t')] == 64, + "tab must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('\r')] == 64, + "cr must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('\r')] == 64, + "cr must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('\n')] == 64, + "lf must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('\n')] == 64, + "lf must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('\f')] == 64, + "ff must be == 64 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('\f')] == 64, + "ff must be == 64 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('+')] == 62, + "+ must be == 62 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('-')] == 62, + "- must be == 62 in to_base64_url_value"); +static_assert(to_base64_value[uint8_t('/')] == 63, + "/ must be == 63 in to_base64_value"); +static_assert(to_base64_url_value[uint8_t('_')] == 63, + "_ must be == 63 in to_base64_url_value"); +} // namespace base64 +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // SIMDUTF_BASE64_TABLES_H +/* end file include/simdutf/base64_tables.h */ +/* begin file include/simdutf/scalar/base64.h */ +#ifndef SIMDUTF_BASE64_H +#define SIMDUTF_BASE64_H + +#include +#include +#include +#include +#include + +namespace simdutf { +namespace scalar { +namespace { +namespace base64 { + +// This function is not expected to be fast. Do not use in long loops. +// In most instances you should be using is_ignorable. +template bool is_ascii_white_space(char_type c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; +} + +template simdutf_constexpr23 bool is_eight_byte(char_type c) { + if simdutf_constexpr (sizeof(char_type) == 1) { + return true; + } + return uint8_t(c) == c; +} + +template +simdutf_constexpr23 bool is_ignorable(char_type c, + simdutf::base64_options options) { + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + uint8_t code = to_base64[uint8_t(c)]; + if (is_eight_byte(c) && code <= 63) { + return false; + } + if (is_eight_byte(c) && code == 64) { + return true; + } + return ignore_garbage; +} +template +simdutf_constexpr23 bool is_base64(char_type c, + simdutf::base64_options options) { + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + uint8_t code = to_base64[uint8_t(c)]; + if (is_eight_byte(c) && code <= 63) { + return true; + } + return false; +} + +template +simdutf_constexpr23 bool is_base64_or_padding(char_type c, + simdutf::base64_options options) { + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + if (c == '=') { + return true; + } + uint8_t code = to_base64[uint8_t(c)]; + if (is_eight_byte(c) && code <= 63) { + return true; + } + return false; +} + +template +bool is_ignorable_or_padding(char_type c, simdutf::base64_options options) { + return is_ignorable(c, options) || c == '='; +} + +struct reduced_input { + size_t equalsigns; // number of padding characters '=', typically 0, 1, 2. + size_t equallocation; // location of the first padding character if any + size_t srclen; // length of the input buffer before padding + size_t full_input_length; // length of the input buffer with padding but + // without ignorable characters +}; + +// find the end of the base64 input buffer +// It returns the number of padding characters, the location of the first +// padding character if any, the length of the input buffer before padding +// and the length of the input buffer with padding. The input buffer is not +// modified. The function assumes that there are at most two padding characters. +template +simdutf_constexpr23 reduced_input find_end(const char_type *src, size_t srclen, + simdutf::base64_options options) { + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + + size_t equalsigns = 0; + // We intentionally include trailing spaces in the full input length. + // See https://github.com/simdutf/simdutf/issues/824 + size_t full_input_length = srclen; + // skip trailing spaces + while (!ignore_garbage && srclen > 0 && + scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + size_t equallocation = + srclen; // location of the first padding character if any + if (ignore_garbage) { + // Technically, we don't need to find the first padding character, we can + // just change our algorithms, but it adds substantial complexity. + auto it = simdutf::find(src, src + srclen, '='); + if (it != src + srclen) { + equallocation = it - src; + equalsigns = 1; + srclen = equallocation; + full_input_length = equallocation + 1; + } + return {equalsigns, equallocation, srclen, full_input_length}; + } + if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { + // This is the last '=' sign. + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + // This is the second '=' sign. + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + return {equalsigns, equallocation, srclen, full_input_length}; +} + +// Returns true upon success. The destination buffer must be large enough. +// This functions assumes that the padding (=) has been removed. +// if check_capacity is true, it will check that the destination buffer is +// large enough. If it is not, it will return OUTPUT_BUFFER_TOO_SMALL. +template +simdutf_constexpr23 full_result base64_tail_decode_impl( + char *dst, size_t outlen, const char_type *src, size_t length, + size_t padding_characters, // number of padding characters + // '=', typically 0, 1, 2. + base64_options options, last_chunk_handling_options last_chunk_options) { + char *dstend = dst + outlen; + (void)dstend; + // This looks like 10 branches, but we expect the compiler to resolve this to + // two branches (easily predicted): + const uint8_t *to_base64 = + (options & base64_default_or_url) + ? tables::base64::to_base64_default_or_url_value + : ((options & base64_url) ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + const uint32_t *d0 = + (options & base64_default_or_url) + ? tables::base64::base64_default_or_url::d0 + : ((options & base64_url) ? tables::base64::base64_url::d0 + : tables::base64::base64_default::d0); + const uint32_t *d1 = + (options & base64_default_or_url) + ? tables::base64::base64_default_or_url::d1 + : ((options & base64_url) ? tables::base64::base64_url::d1 + : tables::base64::base64_default::d1); + const uint32_t *d2 = + (options & base64_default_or_url) + ? tables::base64::base64_default_or_url::d2 + : ((options & base64_url) ? tables::base64::base64_url::d2 + : tables::base64::base64_default::d2); + const uint32_t *d3 = + (options & base64_default_or_url) + ? tables::base64::base64_default_or_url::d3 + : ((options & base64_url) ? tables::base64::base64_url::d3 + : tables::base64::base64_default::d3); + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + + const char_type *srcend = src + length; + const char_type *srcinit = src; + const char *dstinit = dst; + + uint32_t x; + size_t idx; + uint8_t buffer[4]; + while (true) { + while (srcend - src >= 4 && is_eight_byte(src[0]) && + is_eight_byte(src[1]) && is_eight_byte(src[2]) && + is_eight_byte(src[3]) && + (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | + d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { + if (check_capacity && dstend - dst < 3) { + return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + *dst++ = static_cast(x & 0xFF); + *dst++ = static_cast((x >> 8) & 0xFF); + *dst++ = static_cast((x >> 16) & 0xFF); + src += 4; + } + const char_type *srccur = src; + idx = 0; + // we need at least four characters. +#ifdef __clang__ + // If possible, we read four characters at a time. (It is an optimization.) + if (ignore_garbage && src + 4 <= srcend) { + char_type c0 = src[0]; + char_type c1 = src[1]; + char_type c2 = src[2]; + char_type c3 = src[3]; + + uint8_t code0 = to_base64[uint8_t(c0)]; + uint8_t code1 = to_base64[uint8_t(c1)]; + uint8_t code2 = to_base64[uint8_t(c2)]; + uint8_t code3 = to_base64[uint8_t(c3)]; + + buffer[idx] = code0; + idx += (is_eight_byte(c0) && code0 <= 63); + buffer[idx] = code1; + idx += (is_eight_byte(c1) && code1 <= 63); + buffer[idx] = code2; + idx += (is_eight_byte(c2) && code2 <= 63); + buffer[idx] = code3; + idx += (is_eight_byte(c3) && code3 <= 63); + src += 4; + } +#endif + while ((idx < 4) && (src < srcend)) { + char_type c = *src; + + uint8_t code = to_base64[uint8_t(c)]; + buffer[idx] = uint8_t(code); + if (is_eight_byte(c) && code <= 63) { + idx++; + } else if (!ignore_garbage && + (code > 64 || !scalar::base64::is_eight_byte(c))) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else { + // We have a space or a newline or garbage. We ignore it. + } + src++; + } + if (idx != 4) { + simdutf_log_assert(idx < 4, "idx should be less than 4"); + // We never should have that the number of base64 characters + the + // number of padding characters is more than 4. + if (!ignore_garbage && (idx + padding_characters > 4)) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } + + // The idea here is that in loose mode, + // if there is padding at all, it must be used + // to form 4-wise chunk. However, in loose mode, + // we do accept no padding at all. + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::loose && + (idx >= 2) && padding_characters > 0 && + ((idx + padding_characters) & 3) != 0) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } else + + // The idea here is that in strict mode, we do not want to accept + // incomplete base64 chunks. So if the chunk was otherwise valid, we + // return BASE64_INPUT_REMAINDER. + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::strict && + (idx >= 2) && ((idx + padding_characters) & 3) != 0) { + // The partial chunk was at src - idx + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } else + // If there is a partial chunk with insufficient padding, with + // stop_before_partial, we need to just ignore it. In "only full" + // mode, skip the minute there are padding characters. + if ((last_chunk_options == + last_chunk_handling_options::stop_before_partial && + (padding_characters + idx < 4) && (idx != 0) && + (idx >= 2 || padding_characters == 0)) || + (last_chunk_options == + last_chunk_handling_options::only_full_chunks && + (idx >= 2 || padding_characters == 0))) { + // partial means that we are *not* going to consume the read + // characters. We need to rewind the src pointer. + src = srccur; + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } else { + if (idx == 2) { + uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + + (uint32_t(buffer[1]) << 2 * 6); + if (!ignore_garbage && + (last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xffff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (check_capacity && dstend - dst < 1) { + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), + size_t(dst - dstinit)}; + } + *dst++ = static_cast((triple >> 16) & 0xFF); + } else if (idx == 3) { + uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + + (uint32_t(buffer[1]) << 2 * 6) + + (uint32_t(buffer[2]) << 1 * 6); + if (!ignore_garbage && + (last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (check_capacity && dstend - dst < 2) { + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), + size_t(dst - dstinit)}; + } + *dst++ = static_cast((triple >> 16) & 0xFF); + *dst++ = static_cast((triple >> 8) & 0xFF); + } else if (!ignore_garbage && idx == 1 && + (!is_partial(last_chunk_options) || + (is_partial(last_chunk_options) && + padding_characters > 0))) { + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else if (!ignore_garbage && idx == 0 && padding_characters > 0) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } + } + if (check_capacity && dstend - dst < 3) { + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit), + size_t(dst - dstinit)}; + } + uint32_t triple = + (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + + (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); + *dst++ = static_cast((triple >> 16) & 0xFF); + *dst++ = static_cast((triple >> 8) & 0xFF); + *dst++ = static_cast(triple & 0xFF); + } +} + +template +simdutf_constexpr23 full_result base64_tail_decode( + char *dst, const char_type *src, size_t length, + size_t padding_characters, // number of padding characters + // '=', typically 0, 1, 2. + base64_options options, last_chunk_handling_options last_chunk_options) { + return base64_tail_decode_impl(dst, 0, src, length, padding_characters, + options, last_chunk_options); +} + +// like base64_tail_decode, but it will not write past the end of the output +// buffer. The outlen parameter is modified to reflect the number of bytes +// written. This functions assumes that the padding (=) has been removed. +// +template +simdutf_constexpr23 full_result base64_tail_decode_safe( + char *dst, size_t outlen, const char_type *src, size_t length, + size_t padding_characters, // number of padding characters + // '=', typically 0, 1, 2. + base64_options options, last_chunk_handling_options last_chunk_options) { + return base64_tail_decode_impl(dst, outlen, src, length, + padding_characters, options, + last_chunk_options); +} + +inline simdutf_constexpr23 full_result +patch_tail_result(full_result r, size_t previous_input, size_t previous_output, + size_t equallocation, size_t full_input_length, + last_chunk_handling_options last_chunk_options) { + r.input_count += previous_input; + r.output_count += previous_output; + if (r.padding_error) { + r.input_count = equallocation; + } + + if (r.error == error_code::SUCCESS) { + if (!is_partial(last_chunk_options)) { + // A success when we are not in stop_before_partial mode. + // means that we have consumed the whole input buffer. + r.input_count = full_input_length; + } else if (r.output_count % 3 != 0) { + r.input_count = full_input_length; + } + } + return r; +} + +// Returns the number of bytes written. The destination buffer must be large +// enough. It will add padding (=) if needed. +template +simdutf_constexpr23 size_t tail_encode_base64_impl( + char *dst, const char *src, size_t srclen, base64_options options, + size_t line_length = simdutf::default_line_length, size_t line_offset = 0) { + if simdutf_constexpr (use_lines) { + // sanitize line_length and starting_line_offset. + // line_length must be greater than 3. + if (line_length < 4) { + line_length = 4; + } + simdutf_log_assert(line_offset <= line_length, + "line_offset should be less than line_length"); + } + // By default, we use padding if we are not using the URL variant. + // This is check with ((options & base64_url) == 0) which returns true if we + // are not using the URL variant. However, we also allow 'inversion' of the + // convention with the base64_reverse_padding option. If the + // base64_reverse_padding option is set, we use padding if we are using the + // URL variant, and we omit it if we are not using the URL variant. This is + // checked with + // ((options & base64_reverse_padding) == base64_reverse_padding). + bool use_padding = + ((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding); + // This looks like 3 branches, but we expect the compiler to resolve this to + // a single branch: + const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 + : tables::base64::base64_default::e0; + const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 + : tables::base64::base64_default::e1; + const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 + : tables::base64::base64_default::e2; + char *out = dst; + size_t i = 0; + uint8_t t1, t2, t3; + for (; i + 2 < srclen; i += 3) { + t1 = uint8_t(src[i]); + t2 = uint8_t(src[i + 1]); + t3 = uint8_t(src[i + 2]); + if simdutf_constexpr (use_lines) { + if (line_offset + 3 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + line_offset = 4; + } else if (line_offset + 1 == line_length) { + *out++ = e0[t1]; + *out++ = '\n'; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + line_offset = 3; + } else if (line_offset + 2 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = '\n'; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + line_offset = 2; + } else if (line_offset + 3 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = '\n'; + *out++ = e2[t3]; + line_offset = 1; + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + line_offset += 4; + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + } + } + switch (srclen - i) { + case 0: + break; + case 1: + t1 = uint8_t(src[i]); + if simdutf_constexpr (use_lines) { + if (use_padding) { + if (line_offset + 3 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '='; + *out++ = '='; + } else if (line_offset + 1 == line_length) { + *out++ = e0[t1]; + *out++ = '\n'; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '='; + *out++ = '='; + } else if (line_offset + 2 == line_length) { + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '\n'; + *out++ = '='; + *out++ = '='; + } else if (line_offset + 3 == line_length) { + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '='; + *out++ = '\n'; + *out++ = '='; + } + } else { + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + *out++ = '='; + *out++ = '='; + } + } else { + if (line_offset + 2 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[uint8_t(src[i])]; + *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; + } else if (line_offset + 1 == line_length) { + *out++ = e0[uint8_t(src[i])]; + *out++ = '\n'; + *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; + } else { + *out++ = e0[uint8_t(src[i])]; + *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; + // *out++ = '\n'; ==> no newline at the end of the output + } + } else { + *out++ = e0[uint8_t(src[i])]; + *out++ = e1[(uint8_t(src[i]) & 0x03) << 4]; + } + } + } else { + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + if (use_padding) { + *out++ = '='; + *out++ = '='; + } + } + break; + default: /* case 2 */ + t1 = uint8_t(src[i]); + t2 = uint8_t(src[i + 1]); + if simdutf_constexpr (use_lines) { + if (use_padding) { + if (line_offset + 3 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '='; + } else if (line_offset + 1 == line_length) { + *out++ = e0[t1]; + *out++ = '\n'; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '='; + } else if (line_offset + 2 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = '\n'; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '='; + } else if (line_offset + 3 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '\n'; + *out++ = '='; + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + *out++ = '='; + } + } else { + if (line_offset + 3 >= line_length) { + if (line_offset == line_length) { + *out++ = '\n'; + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + } else if (line_offset + 1 == line_length) { + *out++ = e0[t1]; + *out++ = '\n'; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + } else if (line_offset + 2 == line_length) { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = '\n'; + *out++ = e2[(t2 & 0x0F) << 2]; + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + // *out++ = '\n'; ==> no newline at the end of the output + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + } + } + } else { + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + if (use_padding) { + *out++ = '='; + } + } + } + return (size_t)(out - dst); +} + +// Returns the number of bytes written. The destination buffer must be large +// enough. It will add padding (=) if needed. +inline simdutf_constexpr23 size_t tail_encode_base64(char *dst, const char *src, + size_t srclen, + base64_options options) { + return tail_encode_base64_impl(dst, src, srclen, options); +} + +template +simdutf_warn_unused simdutf_constexpr23 size_t +maximal_binary_length_from_base64(InputPtr input, size_t length) noexcept { + // We process the padding characters ('=') at the end to make sure + // that we return an exact result when the input has no ignorable characters + // (e.g., spaces). + size_t padding = 0; + if (length > 0) { + if (input[length - 1] == '=') { + padding++; + if (length > 1 && input[length - 2] == '=') { + padding++; + } + } + } + // The input is not otherwise processed for ignorable characters or + // validation, so that the function runs in constant time (very fast). In + // practice, base64 inputs without ignorable characters are common and the + // common case are line separated inputs with relatively long lines (e.g., 76 + // characters) which leads this function to a slight (1%) overestimation of + // the output size. + // + // Of course, some inputs might contain an arbitrary number of spaces or + // newlines, which would make this function return a very pessimistic output + // size but systems that produce base64 outputs typically do not do that and + // if they do, they do not care much about minimizing memory usage. + // + // In specialized applications, users may know that their input is line + // separated, which can be checked very quickly by by iterating (e.g., over 76 + // character chunks, looking for the linefeed characters only). We could + // provide a specialized function for that, but it is not clear that the added + // complexity is worth it for us. + // + size_t actual_length = length - padding; + if (actual_length % 4 <= 1) { + return actual_length / 4 * 3; + } + // if we have a valid input, then the remainder must be 2 or 3 adding one or + // two extra bytes. + return actual_length / 4 * 3 + (actual_length % 4) - 1; +} + +template +simdutf_warn_unused simdutf_constexpr23 full_result +base64_to_binary_details_impl( + const char_type *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) noexcept { + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + auto ri = simdutf::scalar::base64::find_end(input, length, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + length = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (length == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, + full_input_length, last_chunk_options); + if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + equalsigns > 0 && !ignore_garbage) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(input + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(input + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; +} + +template +simdutf_constexpr23 simdutf_warn_unused full_result +base64_to_binary_details_safe_impl( + const char_type *input, size_t length, char *output, size_t outlen, + base64_options options, + last_chunk_handling_options last_chunk_options) noexcept { + const bool ignore_garbage = + (options == base64_options::base64_url_accept_garbage) || + (options == base64_options::base64_default_accept_garbage) || + (options == base64_options::base64_default_or_url_accept_garbage); + auto ri = simdutf::scalar::base64::find_end(input, length, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + length = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (length == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + full_result r = scalar::base64::base64_tail_decode_safe( + output, outlen, input, length, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, + full_input_length, last_chunk_options); + if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + equalsigns > 0 && !ignore_garbage) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(input + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(input + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; +} + +simdutf_warn_unused simdutf_constexpr23 size_t +base64_length_from_binary(size_t length, base64_options options) noexcept { + // By default, we use padding if we are not using the URL variant. + // This is check with ((options & base64_url) == 0) which returns true if we + // are not using the URL variant. However, we also allow 'inversion' of the + // convention with the base64_reverse_padding option. If the + // base64_reverse_padding option is set, we use padding if we are using the + // URL variant, and we omit it if we are not using the URL variant. This is + // checked with + // ((options & base64_reverse_padding) == base64_reverse_padding). + bool use_padding = + ((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding); + if (!use_padding) { + return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); + } + return (length + 2) / 3 * + 4; // We use padding to make the length a multiple of 4. +} + +simdutf_warn_unused simdutf_constexpr23 size_t +base64_length_from_binary_with_lines(size_t length, base64_options options, + size_t line_length) noexcept { + if (length == 0) { + return 0; + } + size_t base64_length = + scalar::base64::base64_length_from_binary(length, options); + if (line_length < 4) { + line_length = 4; + } + size_t lines = + (base64_length + line_length - 1) / line_length; // number of lines + return base64_length + lines - 1; +} + +// Return the length of the prefix that contains count base64 characters. +// Thus, if count is 3, the function returns the length of the prefix +// that contains 3 base64 characters. +// The function returns (size_t)-1 if there is not enough base64 characters in +// the input. +template +simdutf_warn_unused size_t prefix_length(size_t count, + simdutf::base64_options options, + const char_type *input, + size_t length) noexcept { + size_t i = 0; + while (i < length && is_ignorable(input[i], options)) { + i++; + } + if (count == 0) { + return i; // duh! + } + for (; i < length; i++) { + if (is_ignorable(input[i], options)) { + continue; + } + // We have a base64 character or a padding character. + count--; + if (count == 0) { + return i + 1; + } + } + simdutf_log_assert(false, "You never get here"); + + return -1; // should never happen +} + +} // namespace base64 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file include/simdutf/scalar/base64.h */ + +namespace simdutf { + + #if SIMDUTF_CPLUSPLUS17 +inline std::string_view to_string(base64_options options) { + switch (options) { + case base64_default: + return "base64_default"; + case base64_url: + return "base64_url"; + case base64_reverse_padding: + return "base64_reverse_padding"; + case base64_url_with_padding: + return "base64_url_with_padding"; + case base64_default_accept_garbage: + return "base64_default_accept_garbage"; + case base64_url_accept_garbage: + return "base64_url_accept_garbage"; + case base64_default_or_url: + return "base64_default_or_url"; + case base64_default_or_url_accept_garbage: + return "base64_default_or_url_accept_garbage"; + } + return ""; +} + #endif // SIMDUTF_CPLUSPLUS17 + + #if SIMDUTF_CPLUSPLUS17 +inline std::string_view to_string(last_chunk_handling_options options) { + switch (options) { + case loose: + return "loose"; + case strict: + return "strict"; + case stop_before_partial: + return "stop_before_partial"; + case only_full_chunks: + return "only_full_chunks"; + } + return ""; +} + #endif + +/** + * Provide the maximal binary length in bytes given the base64 input. + * As long as the input does not contain ignorable characters (e.g., ASCII + * spaces or linefeed characters), the result is exact. In particular, the + * function checks for padding characters. + * + * The function is fast (constant time). It checks up to two characters at + * the end of the string. The input is not otherwise validated or read. + * + * @param input the base64 input to process + * @param length the length of the base64 input in bytes + * @return maximum number of binary bytes + */ +simdutf_warn_unused size_t +maximal_binary_length_from_base64(const char *input, size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +maximal_binary_length_from_base64( + const detail::input_span_of_byte_like auto &input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::maximal_binary_length_from_base64( + detail::constexpr_cast_ptr(input.data()), input.size()); + } else + #endif + { + return maximal_binary_length_from_base64( + reinterpret_cast(input.data()), input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Provide the maximal binary length in bytes given the base64 input. + * As long as the input does not contain ignorable characters (e.g., ASCII + * spaces or linefeed characters), the result is exact. In particular, the + * function checks for padding characters. + * + * The function is fast (constant time). It checks up to two characters at + * the end of the string. The input is not otherwise validated or read. + * + * @param input the base64 input to process, in ASCII stored as 16-bit + * units + * @param length the length of the base64 input in 16-bit units + * @return maximal number of binary bytes + */ +simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +maximal_binary_length_from_base64(std::span input) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::maximal_binary_length_from_base64(input.data(), + input.size()); + } else + #endif + { + return maximal_binary_length_from_base64(input.data(), input.size()); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert a base64 input to a binary output. + * + * This function follows the WHATWG forgiving-base64 format, which means that it + * will ignore any ASCII spaces in the input. You may provide a padded input + * (with one or two equal signs at the end) or an unpadded input (without any + * equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single remainder + * character (BASE64_INPUT_REMAINDER), or the input contains a character that is + * not a valid base64 character (INVALID_BASE64_CHARACTER). + * + * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the + * input where the invalid character was found. When the error is + * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded. + * + * The default option (simdutf::base64_default) expects the characters `+` and + * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the + * characters `-` and `_` as part of its alphabet. + * + * The padding (`=`) is validated if present. There may be at most two padding + * characters at the end of the input. If there are any padding characters, the + * total number of characters (excluding spaces but including padding + * characters) must be divisible by four. + * + * You should call this function with a buffer that is at least + * maximal_binary_length_from_base64(input, length) bytes long. If you fail to + * provide that much space, the function may cause a buffer overflow. + * + * Advanced users may want to tailor how the last chunk is handled. By default, + * we use a loose (forgiving) approach but we also support a strict approach + * as well as a stop_before_partial approach, as per the following proposal: + * + * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + * + * @param input the base64 string to process + * @param length the length of the string in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least maximal_binary_length_from_base64(input, length) + * bytes long). + * @param options the base64 options to use, usually base64_default or + * base64_url, and base64_default by default. + * @param last_chunk_options the last chunk handling options, + * last_chunk_handling_options::loose by default + * but can also be last_chunk_handling_options::strict or + * last_chunk_handling_options::stop_before_partial. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in bytes) if any, or the number of bytes written if successful. + */ +simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +base64_to_binary( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::base64_to_binary_details_impl( + input.data(), input.size(), binary_output.data(), options, + last_chunk_options); + } else + #endif + { + return base64_to_binary(reinterpret_cast(input.data()), + input.size(), + reinterpret_cast(binary_output.data()), + options, last_chunk_options); + } +} + #endif // SIMDUTF_SPAN + +/** + * Provide the base64 length in bytes given the length of a binary input. + * + * @param length the length of the input in bytes + * @return number of base64 bytes + */ +inline simdutf_warn_unused simdutf_constexpr23 size_t base64_length_from_binary( + size_t length, base64_options options = base64_default) noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} + +/** + * Provide the base64 length in bytes given the length of a binary input, + * taking into account line breaks. + * + * @param length the length of the input in bytes + * @param line_length the length of lines, must be at least 4 (otherwise it is + * interpreted as 4), + * @return number of base64 bytes + */ +inline simdutf_warn_unused simdutf_constexpr23 size_t +base64_length_from_binary_with_lines( + size_t length, base64_options options = base64_default, + size_t line_length = default_line_length) noexcept { + return scalar::base64::base64_length_from_binary_with_lines(length, options, + line_length); +} + +/** + * Convert a binary input to a base64 output. + * + * The default option (simdutf::base64_default) uses the characters `+` and `/` + * as part of its alphabet. Further, it adds padding (`=`) at the end of the + * output to ensure that the output length is a multiple of four. + * + * The URL option (simdutf::base64_url) uses the characters `-` and `_` as part + * of its alphabet. No padding is added at the end of the output. + * + * This function always succeeds. + * + * @param input the binary to process + * @param length the length of the input in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least base64_length_from_binary(length) bytes long) + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return number of written bytes, will be equal to + * base64_length_from_binary(length, options) + */ +size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options = base64_default) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +binary_to_base64(const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::tail_encode_base64( + binary_output.data(), input.data(), input.size(), options); + } else + #endif + { + return binary_to_base64( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), options); + } +} + #endif // SIMDUTF_SPAN + +/** + * Convert a binary input to a base64 output with line breaks. + * + * The default option (simdutf::base64_default) uses the characters `+` and `/` + * as part of its alphabet. Further, it adds padding (`=`) at the end of the + * output to ensure that the output length is a multiple of four. + * + * The URL option (simdutf::base64_url) uses the characters `-` and `_` as part + * of its alphabet. No padding is added at the end of the output. + * + * This function always succeeds. + * + * @param input the binary to process + * @param length the length of the input in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least base64_length_from_binary_with_lines(length, + * options, line_length) bytes long) + * @param line_length the length of lines, must be at least 4 (otherwise it is + * interpreted as 4), + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return number of written bytes, will be equal to + * base64_length_from_binary_with_lines(length, options) + */ +size_t +binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length = simdutf::default_line_length, + base64_options options = base64_default) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t +binary_to_base64_with_lines( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + size_t line_length = simdutf::default_line_length, + base64_options options = base64_default) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::tail_encode_base64_impl( + binary_output.data(), input.data(), input.size(), options, line_length); + } else + #endif + { + return binary_to_base64_with_lines( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), line_length, options); + } +} + #endif // SIMDUTF_SPAN + + #if SIMDUTF_ATOMIC_REF +/** + * Convert a binary input to a base64 output, using atomic accesses. + * This function comes with a potentially significant performance + * penalty, but it may be useful in some cases where the input + * buffers are shared between threads, to avoid undefined + * behavior in case of data races. + * + * The function is for advanced users. Its main use case is when + * to silence sanitizer warnings. We have no documented use case + * where this function is actually necessary in terms of practical correctness. + * + * This function is only available when simdutf is compiled with + * C++20 support and __cpp_lib_atomic_ref >= 201806L. You may check + * the availability of this function by checking the macro + * SIMDUTF_ATOMIC_REF. + * + * The default option (simdutf::base64_default) uses the characters `+` and `/` + * as part of its alphabet. Further, it adds padding (`=`) at the end of the + * output to ensure that the output length is a multiple of four. + * + * The URL option (simdutf::base64_url) uses the characters `-` and `_` as part + * of its alphabet. No padding is added at the end of the output. + * + * This function always succeeds. + * + * This function is considered experimental. It is not tested by default + * (see the CMake option SIMDUTF_ATOMIC_BASE64_TESTS) nor is it fuzz tested. + * It is not documented in the public API documentation (README). It is + * offered on a best effort basis. We rely on the community for further + * testing and feedback. + * + * @brief atomic_binary_to_base64 + * @param input the binary to process + * @param length the length of the input in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least base64_length_from_binary(length) bytes long) + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return number of written bytes, will be equal to + * base64_length_from_binary(length, options) + */ +size_t +atomic_binary_to_base64(const char *input, size_t length, char *output, + base64_options options = base64_default) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default) noexcept { + return atomic_binary_to_base64( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), options); +} + #endif // SIMDUTF_SPAN + #endif // SIMDUTF_ATOMIC_REF + +/** + * Convert a base64 input to a binary output. + * + * This function follows the WHATWG forgiving-base64 format, which means that it + * will ignore any ASCII spaces in the input. You may provide a padded input + * (with one or two equal signs at the end) or an unpadded input (without any + * equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single remainder + * character (BASE64_INPUT_REMAINDER), or the input contains a character that is + * not a valid base64 character (INVALID_BASE64_CHARACTER). + * + * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the + * input where the invalid character was found. When the error is + * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded. + * + * The default option (simdutf::base64_default) expects the characters `+` and + * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the + * characters `-` and `_` as part of its alphabet. + * + * The padding (`=`) is validated if present. There may be at most two padding + * characters at the end of the input. If there are any padding characters, the + * total number of characters (excluding spaces but including padding + * characters) must be divisible by four. + * + * You should call this function with a buffer that is at least + * maximal_binary_length_from_base64(input, length) bytes long. If you fail + * to provide that much space, the function may cause a buffer overflow. + * + * Advanced users may want to tailor how the last chunk is handled. By default, + * we use a loose (forgiving) approach but we also support a strict approach + * as well as a stop_before_partial approach, as per the following proposal: + * + * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + * + * @param input the base64 string to process, in ASCII stored as 16-bit + * units + * @param length the length of the string in 16-bit units + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least maximal_binary_length_from_base64(input, length) + * bytes long). + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @param last_chunk_options the last chunk handling options, + * last_chunk_handling_options::loose by default + * but can also be last_chunk_handling_options::strict or + * last_chunk_handling_options::stop_before_partial. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and position of the + * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number + * of bytes written if successful. + */ +simdutf_warn_unused result +base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) noexcept; + #if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result +base64_to_binary( + std::span input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose) noexcept { + #if SIMDUTF_CPLUSPLUS23 + if consteval { + return scalar::base64::base64_to_binary_details_impl( + input.data(), input.size(), binary_output.data(), options, + last_chunk_options); + } else + #endif + { + return base64_to_binary(input.data(), input.size(), + reinterpret_cast(binary_output.data()), + options, last_chunk_options); + } +} + #endif // SIMDUTF_SPAN + +/** + * Check if a character is an ignorable base64 character. + * Checking a large input, character by character, is not computationally + * efficient. + * + * @param input the character to check + * @param options the base64 options to use, is base64_default by default. + * @return true if the character is an ignorable base64 character, false + * otherwise. + */ +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_ignorable(char input, base64_options options = base64_default) noexcept { + return scalar::base64::is_ignorable(input, options); +} +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_ignorable(char16_t input, + base64_options options = base64_default) noexcept { + return scalar::base64::is_ignorable(input, options); +} + +/** + * Check if a character is a valid base64 character. + * Checking a large input, character by character, is not computationally + * efficient. + * Note that padding characters are not considered valid base64 characters in + * this context, nor are spaces. + * + * @param input the character to check + * @param options the base64 options to use, is base64_default by default. + * @return true if the character is a base64 character, false otherwise. + */ +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_valid(char input, base64_options options = base64_default) noexcept { + return scalar::base64::is_base64(input, options); +} +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_valid(char16_t input, base64_options options = base64_default) noexcept { + return scalar::base64::is_base64(input, options); +} + +/** + * Check if a character is a valid base64 character or the padding character + * ('='). Checking a large input, character by character, is not computationally + * efficient. + * + * @param input the character to check + * @param options the base64 options to use, is base64_default by default. + * @return true if the character is a base64 character, false otherwise. + */ +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_valid_or_padding(char input, + base64_options options = base64_default) noexcept { + return scalar::base64::is_base64_or_padding(input, options); +} +simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool +base64_valid_or_padding(char16_t input, + base64_options options = base64_default) noexcept { + return scalar::base64::is_base64_or_padding(input, options); +} + +/** + * Convert a base64 input to a binary output. + * + * This function follows the WHATWG forgiving-base64 format, which means that it + * will ignore any ASCII spaces in the input. You may provide a padded input + * (with one or two equal signs at the end) or an unpadded input (without any + * equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are three possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single remainder + * character (BASE64_INPUT_REMAINDER), the input contains a character that is + * not a valid base64 character (INVALID_BASE64_CHARACTER), or the output buffer + * is too small (OUTPUT_BUFFER_TOO_SMALL). + * + * When OUTPUT_BUFFER_TOO_SMALL, we return both the number of bytes written + * and the number of units processed, see description of the parameters and + * returned value. + * + * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the + * input where the invalid character was found. When the error is + * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded. + * + * The default option (simdutf::base64_default) expects the characters `+` and + * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the + * characters `-` and `_` as part of its alphabet. + * + * The padding (`=`) is validated if present. There may be at most two padding + * characters at the end of the input. If there are any padding characters, the + * total number of characters (excluding spaces but including padding + * characters) must be divisible by four. + * + * The INVALID_BASE64_CHARACTER cases are considered fatal and you are expected + * to discard the output unless the parameter decode_up_to_bad_char is set to + * true. In that case, the function will decode up to the first invalid + * character. Extra padding characters ('=') are considered invalid characters. + * + * Advanced users may want to tailor how the last chunk is handled. By default, + * we use a loose (forgiving) approach but we also support a strict approach + * as well as a stop_before_partial approach, as per the following proposal: + * + * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + * + * @param input the base64 string to process, in ASCII stored as 8-bit + * or 16-bit units + * @param length the length of the string in 8-bit or 16-bit units. + * @param output the pointer to a buffer that can hold the conversion + * result. + * @param outlen the number of bytes that can be written in the output + * buffer. Upon return, it is modified to reflect how many bytes were written. + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @param last_chunk_options the last chunk handling options, + * last_chunk_handling_options::loose by default + * but can also be last_chunk_handling_options::strict or + * last_chunk_handling_options::stop_before_partial. + * @param decode_up_to_bad_char if true, the function will decode up to the + * first invalid character. By default (false), it is assumed that the output + * buffer is to be discarded. When there are multiple errors in the input, + * using decode_up_to_bad_char might trigger a different error. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and position of the + * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number + * of units processed if successful. + */ +simdutf_warn_unused result +base64_to_binary_safe(const char *input, size_t length, char *output, + size_t &outlen, base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose, + bool decode_up_to_bad_char = false) noexcept; +// the span overload has moved to the bottom of the file + +simdutf_warn_unused result +base64_to_binary_safe(const char16_t *input, size_t length, char *output, + size_t &outlen, base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose, + bool decode_up_to_bad_char = false) noexcept; + // span overload moved to bottom of file + + #if SIMDUTF_ATOMIC_REF +/** + * Convert a base64 input to a binary output with a size limit and using atomic + * operations. + * + * Like `base64_to_binary_safe` but using atomic operations, this function is + * thread-safe for concurrent memory access, allowing the output + * buffers to be shared between threads without undefined behavior in case of + * data races. + * + * This function comes with a potentially significant performance penalty, but + * is useful when thread safety is needed during base64 decoding. + * + * This function is only available when simdutf is compiled with + * C++20 support and __cpp_lib_atomic_ref >= 201806L. You may check + * the availability of this function by checking the macro + * SIMDUTF_ATOMIC_REF. + * + * This function is considered experimental. It is not tested by default + * (see the CMake option SIMDUTF_ATOMIC_BASE64_TESTS) nor is it fuzz tested. + * It is not documented in the public API documentation (README). It is + * offered on a best effort basis. We rely on the community for further + * testing and feedback. + * + * @param input the base64 input to decode + * @param length the length of the input in bytes + * @param output the pointer to buffer that can hold the conversion + * result + * @param outlen the number of bytes that can be written in the output + * buffer. Upon return, it is modified to reflect how many bytes were written. + * @param options the base64 options to use (default, url, etc.) + * @param last_chunk_options the last chunk handling options (loose, strict, + * stop_before_partial) + * @param decode_up_to_bad_char if true, the function will decode up to the + * first invalid character. By default (false), it is assumed that the output + * buffer is to be discarded. When there are multiple errors in the input, + * using decode_up_to_bad_char might trigger a different error. + * @return a result struct with an error code and count indicating error + * position or success + */ +simdutf_warn_unused result atomic_base64_to_binary_safe( + const char *input, size_t length, char *output, size_t &outlen, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose, + bool decode_up_to_bad_char = false) noexcept; +simdutf_warn_unused result atomic_base64_to_binary_safe( + const char16_t *input, size_t length, char *output, size_t &outlen, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose, + bool decode_up_to_bad_char = false) noexcept; + #if SIMDUTF_SPAN +/** + * @brief span overload + * @return a tuple of result and outlen + */ +simdutf_really_inline simdutf_warn_unused std::tuple +atomic_base64_to_binary_safe( + const detail::input_span_of_byte_like auto &binary_input, + detail::output_span_of_byte_like auto &&output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose, + bool decode_up_to_bad_char = false) noexcept { + size_t outlen = output.size(); + auto ret = atomic_base64_to_binary_safe( + reinterpret_cast(binary_input.data()), binary_input.size(), + reinterpret_cast(output.data()), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {ret, outlen}; +} +/** + * @brief span overload + * @return a tuple of result and outlen + */ +simdutf_warn_unused std::tuple +atomic_base64_to_binary_safe( + std::span base64_input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose, + bool decode_up_to_bad_char = false) noexcept { + size_t outlen = binary_output.size(); + auto ret = atomic_base64_to_binary_safe( + base64_input.data(), base64_input.size(), + reinterpret_cast(binary_output.data()), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {ret, outlen}; +} + #endif // SIMDUTF_SPAN + #endif // SIMDUTF_ATOMIC_REF + +#endif // SIMDUTF_FEATURE_BASE64 + +/** + * An implementation of simdutf for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active + * implementation is automatically initialized on first use to the most advanced + * implementation supported by the host. + */ +class implementation { +public: + /** + * The name of this implementation. + * + * const implementation *impl = simdutf::active_implementation; + * cout << "simdutf is optimized for " << impl->name() << "(" << + * impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual std::string name() const { return std::string(_name); } + + /** + * The description of this implementation. + * + * const implementation *impl = simdutf::active_implementation; + * cout << "simdutf is optimized for " << impl->name() << "(" << + * impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual std::string description() const { return std::string(_description); } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * + * @return true if the implementation can be safely used on the current system + * (determined at runtime) + */ + bool supported_by_runtime_system() const; + +#if SIMDUTF_FEATURE_DETECT_ENCODING + /** + * This function will try to detect the encoding + * @param input the string to identify + * @param length the length of the string in bytes. + * @return the encoding type detected + */ + virtual encoding_type autodetect_encoding(const char *input, + size_t length) const noexcept; + + /** + * This function will try to detect the possible encodings in one pass + * @param input the string to identify + * @param length the length of the string in bytes. + * @return the encoding type detected + */ + virtual int detect_encodings(const char *input, + size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values + */ + virtual uint32_t required_instruction_sets() const { + return _required_instruction_sets; + } + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the UTF-8 string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdutf_warn_unused virtual bool validate_utf8(const char *buf, + size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + /** + * Validate the UTF-8 string and stop on errors. + * + * Overridden by each implementation. + * + * @param buf the UTF-8 string to validate. + * @param len the length of the string in bytes. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated + * if successful. + */ + simdutf_warn_unused virtual result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + /** + * Validate the ASCII string. + * + * Overridden by each implementation. + * + * @param buf the ASCII string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ + simdutf_warn_unused virtual bool + validate_ascii(const char *buf, size_t len) const noexcept = 0; + + /** + * Validate the ASCII string and stop on error. + * + * Overridden by each implementation. + * + * @param buf the ASCII string to validate. + * @param len the length of the string in bytes. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated + * if successful. + */ + simdutf_warn_unused virtual result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0; + +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + /** + * Validate the ASCII string as a UTF-16BE sequence. + * An UTF-16 sequence is considered an ASCII sequence + * if it could be converted to an ASCII string losslessly. + * + * Overridden by each implementation. + * + * @param buf the UTF-16BE string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ + simdutf_warn_unused virtual bool + validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept = 0; + + /** + * Validate the ASCII string as a UTF-16LE sequence. + * An UTF-16 sequence is considered an ASCII sequence + * if it could be converted to an ASCII string losslessly. + * + * Overridden by each implementation. + * + * @param buf the UTF-16LE string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ + simdutf_warn_unused virtual bool + validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + /** + * Validate the UTF-16LE string.This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16le_with_errors. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16LE string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return true if and only if the string is valid UTF-16LE. + */ + simdutf_warn_unused virtual bool + validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + /** + * Validate the UTF-16BE string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16be_with_errors. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16BE string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return true if and only if the string is valid UTF-16BE. + */ + simdutf_warn_unused virtual bool + validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0; + + /** + * Validate the UTF-16LE string and stop on error. It might be faster than + * validate_utf16le when an error is expected to occur early. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16LE string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated + * if successful. + */ + simdutf_warn_unused virtual result + validate_utf16le_with_errors(const char16_t *buf, + size_t len) const noexcept = 0; + + /** + * Validate the UTF-16BE string and stop on error. It might be faster than + * validate_utf16be when an error is expected to occur early. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16BE string to validate. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated + * if successful. + */ + simdutf_warn_unused virtual result + validate_utf16be_with_errors(const char16_t *buf, + size_t len) const noexcept = 0; + /** + * Copies the UTF-16LE string while replacing mismatched surrogates with the + * Unicode replacement character U+FFFD. We allow the input and output to be + * the same buffer so that the correction is done in-place. + * + * Overridden by each implementation. + * + * @param input the UTF-16LE string to correct. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @param output the output buffer. + */ + virtual void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept = 0; + /** + * Copies the UTF-16BE string while replacing mismatched surrogates with the + * Unicode replacement character U+FFFD. We allow the input and output to be + * the same buffer so that the correction is done in-place. + * + * Overridden by each implementation. + * + * @param input the UTF-16BE string to correct. + * @param len the length of the string in number of 2-byte code units + * (char16_t). + * @param output the output buffer. + */ + virtual void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + /** + * Validate the UTF-32 string. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-32 string to validate. + * @param len the length of the string in number of 4-byte code units + * (char32_t). + * @return true if and only if the string is valid UTF-32. + */ + simdutf_warn_unused virtual bool + validate_utf32(const char32_t *buf, size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + /** + * Validate the UTF-32 string and stop on error. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-32 string to validate. + * @param len the length of the string in number of 4-byte code units + * (char32_t). + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated + * if successful. + */ + simdutf_warn_unused virtual result + validate_utf32_with_errors(const char32_t *buf, + size_t len) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + /** + * Convert Latin1 string into UTF-8 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf8_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_latin1_to_utf8(const char *input, size_t length, + char *utf8_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + /** + * Convert possibly Latin1 string into UTF-16LE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_latin1_to_utf16le(const char *input, size_t length, + char16_t *utf16_output) const noexcept = 0; + + /** + * Convert Latin1 string into UTF-16BE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_latin1_to_utf16be(const char *input, size_t length, + char16_t *utf16_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + /** + * Convert Latin1 string into UTF-32 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_latin1_to_utf32(const char *input, size_t length, + char32_t *utf32_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + /** + * Convert possibly broken UTF-8 string into latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 + * string or if it cannot be represented as Latin1 + */ + simdutf_warn_unused virtual size_t + convert_utf8_to_latin1(const char *input, size_t length, + char *latin1_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into latin1 string with errors. + * If the string cannot be represented as Latin1, an error + * code is returned. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated + * if successful. + */ + simdutf_warn_unused virtual result + convert_utf8_to_latin1_with_errors(const char *input, size_t length, + char *latin1_output) const noexcept = 0; + + /** + * Convert valid UTF-8 string into latin1 string. + * + * This function assumes that the input string is valid UTF-8 and that it can + * be represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf8_to_latin1 instead. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 + * string + */ + simdutf_warn_unused virtual size_t + convert_valid_utf8_to_latin1(const char *input, size_t length, + char *latin1_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + /** + * Convert possibly broken UTF-8 string into UTF-16LE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 + * string + */ + simdutf_warn_unused virtual size_t + convert_utf8_to_utf16le(const char *input, size_t length, + char16_t *utf16_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-16BE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 + * string + */ + simdutf_warn_unused virtual size_t + convert_utf8_to_utf16be(const char *input, size_t length, + char16_t *utf16_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-16LE string and stop on + * error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated + * if successful. + */ + simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors( + const char *input, size_t length, + char16_t *utf16_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-16BE string and stop on + * error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of code units validated + * if successful. + */ + simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors( + const char *input, size_t length, + char16_t *utf16_output) const noexcept = 0; + /** + * Compute the number of bytes that this UTF-16LE string would require in + * UTF-8 format even when the UTF-16LE content contains mismatched + * surrogates that have to be replaced by the replacement character (0xFFFD). + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) where the count is the number of bytes required to + * encode the UTF-16LE string as UTF-8, and the error code is either SUCCESS + * or SURROGATE. The count is correct regardless of the error field. + * When SURROGATE is returned, it does not indicate an error in the case of + * this function: it indicates that at least one surrogate has been + * encountered: the surrogates may be matched or not (thus this function does + * not validate). If the returned error code is SUCCESS, then the input + * contains no surrogate, is in the Basic Multilingual Plane, and is + * necessarily valid. + */ + virtual simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-16BE string would require in + * UTF-8 format even when the UTF-16BE content contains mismatched + * surrogates that have to be replaced by the replacement character (0xFFFD). + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) where the count is the number of bytes required to + * encode the UTF-16BE string as UTF-8, and the error code is either SUCCESS + * or SURROGATE. The count is correct regardless of the error field. + * When SURROGATE is returned, it does not indicate an error in the case of + * this function: it indicates that at least one surrogate has been + * encountered: the surrogates may be matched or not (thus this function does + * not validate). If the returned error code is SUCCESS, then the input + * contains no surrogate, is in the Basic Multilingual Plane, and is + * necessarily valid. + */ + virtual simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept = 0; + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + /** + * Convert possibly broken UTF-8 string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 + * string + */ + simdutf_warn_unused virtual size_t + convert_utf8_to_utf32(const char *input, size_t length, + char32_t *utf32_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char32_t written if + * successful. + */ + simdutf_warn_unused virtual result + convert_utf8_to_utf32_with_errors(const char *input, size_t length, + char32_t *utf32_output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + /** + * Convert valid UTF-8 string into UTF-16LE string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t + */ + simdutf_warn_unused virtual size_t + convert_valid_utf8_to_utf16le(const char *input, size_t length, + char16_t *utf16_buffer) const noexcept = 0; + + /** + * Convert valid UTF-8 string into UTF-16BE string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t + */ + simdutf_warn_unused virtual size_t + convert_valid_utf8_to_utf16be(const char *input, size_t length, + char16_t *utf16_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + /** + * Convert valid UTF-8 string into UTF-32 string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t + */ + simdutf_warn_unused virtual size_t + convert_valid_utf8_to_utf32(const char *input, size_t length, + char32_t *utf32_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + /** + * Compute the number of 2-byte code units that this UTF-8 string would + * require in UTF-16LE format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-8 strings but in such cases the result is implementation defined. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return the number of char16_t code units required to encode the UTF-8 + * string as UTF-16LE + */ + simdutf_warn_unused virtual size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + /** + * Compute the number of 4-byte code units that this UTF-8 string would + * require in UTF-32 format. + * + * This function is equivalent to count_utf8. It is acceptable to pass invalid + * UTF-8 strings but in such cases the result is implementation defined. + * + * This function does not validate the input. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return the number of char32_t code units required to encode the UTF-8 + * string as UTF-32 + */ + simdutf_warn_unused virtual size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion + * result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string or if it cannot be represented as Latin1 + */ + simdutf_warn_unused virtual size_t + convert_utf16le_to_latin1(const char16_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion + * result + * @return number of written code units; 0 if input is not a valid UTF-16BE + * string or if it cannot be represented as Latin1 + */ + simdutf_warn_unused virtual size_t + convert_utf16be_to_latin1(const char16_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * If the string cannot be represented as Latin1, an error + * is returned. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion + * result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ + simdutf_warn_unused virtual result + convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * If the string cannot be represented as Latin1, an error + * is returned. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion + * result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ + simdutf_warn_unused virtual result + convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-L16LE and that it + * can be represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf16le_to_latin1 instead. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16BE string into Latin1 string. + * + * This function assumes that the input string is valid UTF16-BE and that it + * can be represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf16be_to_latin1 instead. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + /** + * Convert possibly broken UTF-16LE string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string + */ + simdutf_warn_unused virtual size_t + convert_utf16le_to_utf8(const char16_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16BE + * string + */ + simdutf_warn_unused virtual size_t + convert_utf16be_to_utf8(const char16_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into UTF-8 string and stop on + * error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ + simdutf_warn_unused virtual result + convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into UTF-8 string and stop on + * error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ + simdutf_warn_unused virtual result + convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-16LE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf8_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16BE string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-16BE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf8_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + /** + * Convert possibly broken UTF-16LE string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16LE + * string + */ + simdutf_warn_unused virtual size_t + convert_utf16le_to_utf32(const char16_t *input, size_t length, + char32_t *utf32_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-16BE + * string + */ + simdutf_warn_unused virtual size_t + convert_utf16be_to_utf32(const char16_t *input, size_t length, + char32_t *utf32_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into UTF-32 string and stop on + * error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char32_t written if + * successful. + */ + simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors( + const char16_t *input, size_t length, + char32_t *utf32_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into UTF-32 string and stop on + * error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char32_t written if + * successful. + */ + simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors( + const char16_t *input, size_t length, + char32_t *utf32_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into UTF-32 string. + * + * This function assumes that the input string is valid UTF-16LE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf32_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, + char32_t *utf32_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into UTF-32BE string. + * + * This function assumes that the input string is valid UTF-16BE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param utf32_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, + char32_t *utf32_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + /** + * Compute the number of bytes that this UTF-16LE string would require in + * UTF-8 format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as UTF-8 + */ + simdutf_warn_unused virtual size_t + utf8_length_from_utf16le(const char16_t *input, + size_t length) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-16BE string would require in + * UTF-8 format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return the number of bytes required to encode the UTF-16BE string as UTF-8 + */ + simdutf_warn_unused virtual size_t + utf8_length_from_utf16be(const char16_t *input, + size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + /** + * Convert possibly broken UTF-32 string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion + * result + * @return number of written code units; 0 if input is not a valid UTF-32 + * string + */ + simdutf_warn_unused virtual size_t + convert_utf32_to_latin1(const char32_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + /** + * Convert possibly broken UTF-32 string into Latin1 string and stop on error. + * If the string cannot be represented as Latin1, an error is returned. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion + * result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ + simdutf_warn_unused virtual result + convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into Latin1 string. + * + * This function assumes that the input string is valid UTF-32 and can be + * represented as Latin1. If you violate this assumption, the result is + * implementation defined and may include system-dependent behavior such as + * crashes. + * + * This function is for expert users only and not part of our public API. Use + * convert_utf32_to_latin1 instead. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param latin1_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf32_to_latin1(const char32_t *input, size_t length, + char *latin1_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + /** + * Convert possibly broken UTF-32 string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-32 + * string + */ + simdutf_warn_unused virtual size_t + convert_utf32_to_utf8(const char32_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char written if + * successful. + */ + simdutf_warn_unused virtual result + convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf8_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf32_to_utf8(const char32_t *input, size_t length, + char *utf8_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + /** + * Return the number of bytes that this UTF-16 string would require in Latin1 + * format. + * + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return the number of bytes required to encode the UTF-16 string as Latin1 + */ + simdutf_warn_unused virtual size_t + utf16_length_from_latin1(size_t length) const noexcept { + return length; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + /** + * Convert possibly broken UTF-32 string into UTF-16LE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-32 + * string + */ + simdutf_warn_unused virtual size_t + convert_utf32_to_utf16le(const char32_t *input, size_t length, + char16_t *utf16_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-16BE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return number of written code units; 0 if input is not a valid UTF-32 + * string + */ + simdutf_warn_unused virtual size_t + convert_utf32_to_utf16be(const char32_t *input, size_t length, + char16_t *utf16_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-16LE string and stop on + * error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char16_t written if + * successful. + */ + simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors( + const char32_t *input, size_t length, + char16_t *utf16_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-16BE string and stop on + * error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in code units) if any, or the number of char16_t written if + * successful. + */ + simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors( + const char32_t *input, size_t length, + char16_t *utf16_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into UTF-16LE string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf16_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, + char16_t *utf16_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into UTF-16BE string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @param utf16_buffer the pointer to a buffer that can hold the conversion + * result + * @return number of written code units; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t + convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, + char16_t *utf16_buffer) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + /** + * Change the endianness of the input. Can be used to go from UTF-16LE to + * UTF-16BE or from UTF-16BE to UTF-16LE. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to process + * @param length the length of the string in 2-byte code units + * (char16_t) + * @param output the pointer to a buffer that can hold the conversion + * result + */ + virtual void change_endianness_utf16(const char16_t *input, size_t length, + char16_t *output) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + /** + * Return the number of bytes that this Latin1 string would require in UTF-8 + * format. + * + * @param input the Latin1 string to convert + * @param length the length of the string bytes + * @return the number of bytes required to encode the Latin1 string as UTF-8 + */ + simdutf_warn_unused virtual size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + /** + * Compute the number of bytes that this UTF-32 string would require in UTF-8 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-32 strings but in such cases the result is implementation defined. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @return the number of bytes required to encode the UTF-32 string as UTF-8 + */ + simdutf_warn_unused virtual size_t + utf8_length_from_utf32(const char32_t *input, + size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + /** + * Compute the number of bytes that this UTF-32 string would require in Latin1 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-32 strings but in such cases the result is implementation defined. + * + * @param length the length of the string in 4-byte code units + * (char32_t) + * @return the number of bytes required to encode the UTF-32 string as Latin1 + */ + simdutf_warn_unused virtual size_t + latin1_length_from_utf32(size_t length) const noexcept { + return length; + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + /** + * Compute the number of bytes that this UTF-8 string would require in Latin1 + * format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-8 strings but in such cases the result is implementation defined. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in byte + * @return the number of bytes required to encode the UTF-8 string as Latin1 + */ + simdutf_warn_unused virtual size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + /** + * Compute the number of bytes that this UTF-16LE/BE string would require in + * Latin1 format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as + * Latin1 + */ + simdutf_warn_unused virtual size_t + latin1_length_from_utf16(size_t length) const noexcept { + return length; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + /** + * Compute the number of two-byte code units that this UTF-32 string would + * require in UTF-16 format. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-32 strings but in such cases the result is implementation defined. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte code units + * (char32_t) + * @return the number of bytes required to encode the UTF-32 string as UTF-16 + */ + simdutf_warn_unused virtual size_t + utf16_length_from_utf32(const char32_t *input, + size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + /** + * Return the number of bytes that this UTF-32 string would require in Latin1 + * format. + * + * @param length the length of the string in 4-byte code units + * (char32_t) + * @return the number of bytes required to encode the UTF-32 string as Latin1 + */ + simdutf_warn_unused virtual size_t + utf32_length_from_latin1(size_t length) const noexcept { + return length; + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + /** + * Compute the number of bytes that this UTF-16LE string would require in + * UTF-32 format. + * + * This function is equivalent to count_utf16le. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as + * UTF-32 + */ + simdutf_warn_unused virtual size_t + utf32_length_from_utf16le(const char16_t *input, + size_t length) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-16BE string would require in + * UTF-32 format. + * + * This function is equivalent to count_utf16be. + * + * This function does not validate the input. It is acceptable to pass invalid + * UTF-16 strings but in such cases the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return the number of bytes required to encode the UTF-16BE string as + * UTF-32 + */ + simdutf_warn_unused virtual size_t + utf32_length_from_utf16be(const char16_t *input, + size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + /** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-16LE. + * It is acceptable to pass invalid UTF-16 strings but in such cases + * the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to process + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return number of code points + */ + simdutf_warn_unused virtual size_t + count_utf16le(const char16_t *input, size_t length) const noexcept = 0; + + /** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-16BE. + * It is acceptable to pass invalid UTF-16 strings but in such cases + * the result is implementation defined. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to process + * @param length the length of the string in 2-byte code units + * (char16_t) + * @return number of code points + */ + simdutf_warn_unused virtual size_t + count_utf16be(const char16_t *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + /** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-8. + * It is acceptable to pass invalid UTF-8 strings but in such cases + * the result is implementation defined. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return number of code points + */ + simdutf_warn_unused virtual size_t + count_utf8(const char *input, size_t length) const noexcept = 0; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_BASE64 + /** + * Provide the maximal binary length in bytes given the base64 input. + * As long as the input does not contain ignorable characters (e.g., ASCII + * spaces or linefeed characters), the result is exact. In particular, the + * function checks for padding characters. + * + * The function is fast (constant time). It checks up to two characters at + * the end of the string. The input is not otherwise validated or read.. + * + * @param input the base64 input to process + * @param length the length of the base64 input in bytes + * @return maximal number of binary bytes + */ + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept; + + /** + * Provide the maximal binary length in bytes given the base64 input. + * As long as the input does not contain ignorable characters (e.g., ASCII + * spaces or linefeed characters), the result is exact. In particular, the + * function checks for padding characters. + * + * The function is fast (constant time). It checks up to two characters at + * the end of the string. The input is not otherwise validated or read. + * + * @param input the base64 input to process, in ASCII stored as 16-bit + * units + * @param length the length of the base64 input in 16-bit units + * @return maximal number of binary bytes + */ + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept; + + /** + * Convert a base64 input to a binary output. + * + * This function follows the WHATWG forgiving-base64 format, which means that + * it will ignore any ASCII spaces in the input. You may provide a padded + * input (with one or two equal signs at the end) or an unpadded input + * (without any equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single + * remainder character (BASE64_INPUT_REMAINDER), or the input contains a + * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). + * + * You should call this function with a buffer that is at least + * maximal_binary_length_from_base64(input, length) bytes long. If you fail to + * provide that much space, the function may cause a buffer overflow. + * + * @param input the base64 string to process + * @param length the length of the string in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least maximal_binary_length_from_base64(input, length) + * bytes long). + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and either position of the error + * (in the input in bytes) if any, or the number of bytes written if + * successful. + */ + simdutf_warn_unused virtual result + base64_to_binary(const char *input, size_t length, char *output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept = 0; + + /** + * Convert a base64 input to a binary output while returning more details + * than base64_to_binary. + * + * This function follows the WHATWG forgiving-base64 format, which means that + * it will ignore any ASCII spaces in the input. You may provide a padded + * input (with one or two equal signs at the end) or an unpadded input + * (without any equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single + * remainder character (BASE64_INPUT_REMAINDER), or the input contains a + * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). + * + * You should call this function with a buffer that is at least + * maximal_binary_length_from_base64(input, length) bytes long. If you fail to + * provide that much space, the function may cause a buffer overflow. + * + * @param input the base64 string to process + * @param length the length of the string in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least maximal_binary_length_from_base64(input, length) + * bytes long). + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return a full_result pair struct (of type simdutf::result containing the + * three fields error, input_count and output_count). + */ + simdutf_warn_unused virtual full_result base64_to_binary_details( + const char *input, size_t length, char *output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept = 0; + + /** + * Convert a base64 input to a binary output. + * + * This function follows the WHATWG forgiving-base64 format, which means that + * it will ignore any ASCII spaces in the input. You may provide a padded + * input (with one or two equal signs at the end) or an unpadded input + * (without any equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single + * remainder character (BASE64_INPUT_REMAINDER), or the input contains a + * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). + * + * You should call this function with a buffer that is at least + * maximal_binary_length_from_base64(input, length) bytes long. If you + * fail to provide that much space, the function may cause a buffer overflow. + * + * @param input the base64 string to process, in ASCII stored as + * 16-bit units + * @param length the length of the string in 16-bit units + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least maximal_binary_length_from_base64(input, length) + * bytes long). + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return a result pair struct (of type simdutf::result containing the two + * fields error and count) with an error code and position of the + * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the + * number of bytes written if successful. + */ + simdutf_warn_unused virtual result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept = 0; + + /** + * Convert a base64 input to a binary output while returning more details + * than base64_to_binary. + * + * This function follows the WHATWG forgiving-base64 format, which means that + * it will ignore any ASCII spaces in the input. You may provide a padded + * input (with one or two equal signs at the end) or an unpadded input + * (without any equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single + * remainder character (BASE64_INPUT_REMAINDER), or the input contains a + * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). + * + * You should call this function with a buffer that is at least + * maximal_binary_length_from_base64(input, length) bytes long. If you fail to + * provide that much space, the function may cause a buffer overflow. + * + * @param input the base64 string to process + * @param length the length of the string in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least maximal_binary_length_from_base64(input, length) + * bytes long). + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return a full_result pair struct (of type simdutf::result containing the + * three fields error, input_count and output_count). + */ + simdutf_warn_unused virtual full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept = 0; + + /** + * Provide the base64 length in bytes given the length of a binary input. + * + * @param length the length of the input in bytes + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return number of base64 bytes + */ + simdutf_warn_unused size_t base64_length_from_binary( + size_t length, base64_options options = base64_default) const noexcept; + + /** + * Convert a binary input to a base64 output. + * + * The default option (simdutf::base64_default) uses the characters `+` and + * `/` as part of its alphabet. Further, it adds padding (`=`) at the end of + * the output to ensure that the output length is a multiple of four. + * + * The URL option (simdutf::base64_url) uses the characters `-` and `_` as + * part of its alphabet. No padding is added at the end of the output. + * + * This function always succeeds. + * + * @param input the binary to process + * @param length the length of the input in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least base64_length_from_binary(length) bytes long) + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return number of written bytes, will be equal to + * base64_length_from_binary(length, options) + */ + virtual size_t + binary_to_base64(const char *input, size_t length, char *output, + base64_options options = base64_default) const noexcept = 0; + + /** + * Convert a binary input to a base64 output with lines of given length. + * Lines are separated by a single linefeed character. + * + * The default option (simdutf::base64_default) uses the characters `+` and + * `/` as part of its alphabet. Further, it adds padding (`=`) at the end of + * the output to ensure that the output length is a multiple of four. + * + * The URL option (simdutf::base64_url) uses the characters `-` and `_` as + * part of its alphabet. No padding is added at the end of the output. + * + * This function always succeeds. + * + * @param input the binary to process + * @param length the length of the input in bytes + * @param output the pointer to a buffer that can hold the conversion + * result (should be at least base64_length_from_binary_with_lines(length, + * options, line_length) bytes long) + * @param line_length the length of each line, values smaller than 4 are + * interpreted as 4 + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return number of written bytes, will be equal to + * base64_length_from_binary_with_lines(length, options, line_length) + */ + virtual size_t binary_to_base64_with_lines( + const char *input, size_t length, char *output, + size_t line_length = simdutf::default_line_length, + base64_options options = base64_default) const noexcept = 0; + + /** + * Find the first occurrence of a character in a string. If the character is + * not found, return a pointer to the end of the string. + * @param start the start of the string + * @param end the end of the string + * @param character the character to find + * @return a pointer to the first occurrence of the character in the string, + * or a pointer to the end of the string if the character is not found. + * + */ + virtual const char *find(const char *start, const char *end, + char character) const noexcept = 0; + virtual const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept = 0; +#endif // SIMDUTF_FEATURE_BASE64 + +#ifdef SIMDUTF_INTERNAL_TESTS + // This method is exported only in developer mode, its purpose + // is to expose some internal test procedures from the given + // implementation and then use them through our standard test + // framework. + // + // Regular users should not use it, the tests of the public + // API are enough. + + struct TestProcedure { + // display name + std::string name; + + // procedure should return whether given test pass or not + void (*procedure)(const implementation &); + }; + + virtual std::vector internal_tests() const; +#endif + +protected: + /** @private Construct an implementation with the given name and description. + * For subclasses. */ + simdutf_really_inline implementation(const char *name, + const char *description, + uint32_t required_instruction_sets) + : _name(name), _description(description), + _required_instruction_sets(required_instruction_sets) {} + +protected: + ~implementation() = default; + +private: + /** + * The name of this implementation. + */ + const char *_name; + + /** + * The description of this implementation. + */ + const char *_description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdutf. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdutf */ + simdutf_really_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation *const *begin() const noexcept; + /** STL const end() iterator */ + const implementation *const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = + * simdutf::available_implementations["westmere"]; if (!impl) { exit(1); } if + * (!imp->supported_by_runtime_system()) { exit(1); } + * simdutf::active_implementation = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation *operator[](const std::string &name) const noexcept { + for (const implementation *impl : *this) { + if (impl->name() == name) { + return impl; + } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = + * simdutf::available_implementation::detect_best_supported(); + * simdutf::active_implementation = impl; + * + * @return the most advanced supported implementation for the current host, or + * an implementation that returns UNSUPPORTED_ARCHITECTURE if there is no + * supported implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + +template class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + +#if defined(SIMDUTF_NO_THREADS) + operator const T *() const { return ptr; } + const T &operator*() const { return *ptr; } + const T *operator->() const { return ptr; } + + operator T *() { return ptr; } + T &operator*() { return *ptr; } + T *operator->() { return ptr; } + atomic_ptr &operator=(T *_ptr) { + ptr = _ptr; + return *this; + } + +#else + operator const T *() const { return ptr.load(); } + const T &operator*() const { return *ptr; } + const T *operator->() const { return ptr.load(); } + + operator T *() { return ptr.load(); } + T &operator*() { return *ptr; } + T *operator->() { return ptr.load(); } + atomic_ptr &operator=(T *_ptr) { + ptr = _ptr; + return *this; + } + +#endif + +private: +#if defined(SIMDUTF_NO_THREADS) + T *ptr; +#else + std::atomic ptr; +#endif +}; + +class detect_best_supported_implementation_on_first_use; + +} // namespace internal + +/** + * The list of available implementations compiled into simdutf. + */ +extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & +get_available_implementations(); + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation + * supported by this hardware. + */ +extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr & +get_active_implementation(); + +} // namespace simdutf + +#if SIMDUTF_FEATURE_BASE64 + // this header is not part of the public api +/* begin file include/simdutf/base64_implementation.h */ +#ifndef SIMDUTF_BASE64_IMPLEMENTATION_H +#define SIMDUTF_BASE64_IMPLEMENTATION_H + +// this is not part of the public api + +namespace simdutf { + +template +simdutf_warn_unused simdutf_constexpr23 result slow_base64_to_binary_safe_impl( + const chartype *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_options) noexcept { + const bool ignore_garbage = (options & base64_default_accept_garbage) != 0; + auto ri = simdutf::scalar::base64::find_end(input, length, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + length = ri.srclen; + size_t full_input_length = ri.full_input_length; + (void)full_input_length; + if (length == 0) { + outlen = 0; + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + return {SUCCESS, 0}; + } + + // The parameters of base64_tail_decode_safe are: + // - dst: the output buffer + // - outlen: the size of the output buffer + // - srcr: the input buffer + // - length: the size of the input buffer + // - padded_characters: the number of padding characters + // - options: the options for the base64 decoder + // - last_chunk_options: the options for the last chunk + // The function will return the number of bytes written to the output buffer + // and the number of bytes read from the input buffer. + // The function will also return an error code if the input buffer is not + // valid base64. + full_result r = scalar::base64::base64_tail_decode_safe( + output, outlen, input, length, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result(r, 0, 0, equallocation, + full_input_length, last_chunk_options); + outlen = r.output_count; + if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + equalsigns > 0) { + // additional checks + if ((outlen % 3 == 0) || ((outlen % 3) + 1 + equalsigns != 4)) { + r.error = error_code::INVALID_BASE64_CHARACTER; + } + } + return {r.error, r.input_count}; // we cannot return r itself because it gets + // converted to error/output_count +} + +template +simdutf_warn_unused simdutf_constexpr23 result base64_to_binary_safe_impl( + const chartype *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options, + bool decode_up_to_bad_char) noexcept { + static_assert(std::is_same::value || + std::is_same::value, + "Only char and char16_t are supported."); + size_t remaining_input_length = length; + size_t remaining_output_length = outlen; + size_t input_position = 0; + size_t output_position = 0; + + // We also do a first pass using the fast path to decode as much as possible + size_t safe_input = (std::min)( + remaining_input_length, + base64_length_from_binary(remaining_output_length / 3 * 3, options)); + bool done_with_partial = (safe_input == remaining_input_length); + simdutf::full_result r; + +#if SIMDUTF_CPLUSPLUS23 + if consteval { + r = scalar::base64::base64_to_binary_details_impl( + input + input_position, safe_input, output + output_position, options, + done_with_partial + ? last_chunk_handling_options + : simdutf::last_chunk_handling_options::only_full_chunks); + } else +#endif + { + r = get_active_implementation()->base64_to_binary_details( + input + input_position, safe_input, output + output_position, options, + done_with_partial + ? last_chunk_handling_options + : simdutf::last_chunk_handling_options::only_full_chunks); + } + simdutf_log_assert(r.input_count <= safe_input, + "You should not read more than safe_input"); + simdutf_log_assert(r.output_count <= remaining_output_length, + "You should not write more than remaining_output_length"); + // Technically redundant, but we want to be explicit about it. + input_position += r.input_count; + output_position += r.output_count; + remaining_input_length -= r.input_count; + remaining_output_length -= r.output_count; + if (r.error != simdutf::error_code::SUCCESS) { + // There is an error. We return. + if (decode_up_to_bad_char && + r.error == error_code::INVALID_BASE64_CHARACTER) { + return slow_base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options); + } + outlen = output_position; + return {r.error, input_position}; + } + + if (done_with_partial) { + // We are done. We have decoded everything. + outlen = output_position; + return {simdutf::error_code::SUCCESS, input_position}; + } + // We have decoded some data, but we still have some data to decode. + // We need to decode the rest of the input buffer. + r = simdutf::scalar::base64::base64_to_binary_details_safe_impl( + input + input_position, remaining_input_length, output + output_position, + remaining_output_length, options, last_chunk_handling_options); + input_position += r.input_count; + output_position += r.output_count; + remaining_input_length -= r.input_count; + remaining_output_length -= r.output_count; + + if (r.error != simdutf::error_code::SUCCESS) { + // There is an error. We return. + if (decode_up_to_bad_char && + r.error == error_code::INVALID_BASE64_CHARACTER) { + return slow_base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options); + } + outlen = output_position; + return {r.error, input_position}; + } + if (input_position < length) { + // We cannot process the entire input in one go, so we need to + // process it in two steps: first the fast path, then the slow path. + // In some cases, the processing might 'eat up' trailing ignorable + // characters in the fast path, but that can be a problem. + // suppose we have just white space followed by a single base64 character. + // If we first process the white space with the fast path, it will + // eat all of it. But, by the JavaScript standard, we should consume + // no character. See + // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + while (input_position > 0 && + base64_ignorable(input[input_position - 1], options)) { + input_position--; + } + } + outlen = output_position; + return {simdutf::error_code::SUCCESS, input_position}; +} + +} // namespace simdutf +#endif // SIMDUTF_BASE64_IMPLEMENTATION_H +/* end file include/simdutf/base64_implementation.h */ + +namespace simdutf { + #if SIMDUTF_SPAN +/** + * @brief span overload + * @return a tuple of result and outlen + */ +simdutf_really_inline + simdutf_constexpr23 simdutf_warn_unused std::tuple + base64_to_binary_safe( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose, + bool decode_up_to_bad_char = false) noexcept { + size_t outlen = binary_output.size(); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + using CInput = std::decay_t; + static_assert(std::is_same_v, + "sorry, the constexpr implementation is for now limited to " + "input of type char"); + using COutput = std::decay_t; + static_assert(std::is_same_v, + "sorry, the constexpr implementation is for now limited to " + "output of type char"); + auto r = base64_to_binary_safe_impl( + input.data(), input.size(), binary_output.data(), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {r, outlen}; + } else + #endif + { + auto r = base64_to_binary_safe_impl( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {r, outlen}; + } +} + + #if SIMDUTF_SPAN +/** + * @brief span overload + * @return a tuple of result and outlen + */ +simdutf_really_inline + simdutf_warn_unused simdutf_constexpr23 std::tuple + base64_to_binary_safe( + std::span input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose, + bool decode_up_to_bad_char = false) noexcept { + size_t outlen = binary_output.size(); + #if SIMDUTF_CPLUSPLUS23 + if consteval { + auto r = base64_to_binary_safe_impl( + input.data(), input.size(), binary_output.data(), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {r, outlen}; + } else + #endif + { + auto r = base64_to_binary_safe( + input.data(), input.size(), + reinterpret_cast(binary_output.data()), outlen, options, + last_chunk_options, decode_up_to_bad_char); + return {r, outlen}; + } +} + #endif // SIMDUTF_SPAN + + #endif // SIMDUTF_SPAN +} // namespace simdutf + +#endif // SIMDUTF_FEATURE_BASE64 + +#endif // SIMDUTF_IMPLEMENTATION_H +/* end file include/simdutf/implementation.h */ + +// Implementation-internal files (must be included before the implementations +// themselves, to keep amalgamation working--otherwise, the first time a file is +// included, it might be put inside the #ifdef +// SIMDUTF_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other +// implementations can't compile unless that implementation is turned on). + +SIMDUTF_POP_DISABLE_WARNINGS + +#endif // SIMDUTF_H +/* end file include/simdutf.h */ diff --git a/Extra2D/src/app/application.cpp b/Extra2D/src/app/application.cpp index 436bc0f..7818fd4 100644 --- a/Extra2D/src/app/application.cpp +++ b/Extra2D/src/app/application.cpp @@ -1,68 +1,56 @@ #include -#include -#include -#include -#include -#include -#include -#include - -#include -#include +#include +#include +#include +#include +#include +#include namespace extra2d { -static double getTimeSeconds() { +static f64 getTimeSeconds() { +#ifdef __SWITCH__ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return static_cast(ts.tv_sec) + + static_cast(ts.tv_nsec) / 1000000000.0; +#else using namespace std::chrono; auto now = steady_clock::now(); auto duration = now.time_since_epoch(); - return duration_cast>(duration).count(); + return duration_cast>(duration).count(); +#endif } -Application &Application::instance() { +Application &Application::get() { static Application instance; return instance; } -Application::~Application() { shutdown(); } +Application::Application() { Registry::instance().setApp(this); } -bool Application::init(const AppConfig &config) { +Application::~Application() { + if (initialized_) { + shutdown(); + } +} + +bool Application::init() { if (initialized_) { - E2D_LOG_WARN("Application already initialized"); return true; } - config_ = config; - - window_ = makeUnique(); - WindowConfig winConfig; - winConfig.title = config.title; - winConfig.width = config.width; - winConfig.height = config.height; - winConfig.fullscreen = config.fullscreen; - winConfig.vsync = config.vsync; - - if (!window_->create(winConfig)) { - E2D_LOG_ERROR("Failed to create window"); + // 初始化所有模块(拓扑排序) + // 服务通过 E2D_AUTO_REGISTER_SERVICE 宏自动注册 + if (!Registry::instance().init()) { return false; } - renderer_ = makeUnique(); - if (!renderer_->init(window_.get())) { - E2D_LOG_ERROR("Failed to initialize renderer"); - window_->destroy(); - return false; - } - - eventQueue_ = makeUnique(); - eventDispatcher_ = makeUnique(); - - AudioEngine::getInstance().initialize(); + // 初始化所有服务 + ServiceLocator::instance().init(); initialized_ = true; running_ = true; - - E2D_LOG_INFO("Application initialized successfully"); return true; } @@ -70,80 +58,73 @@ void Application::shutdown() { if (!initialized_) return; - E2D_LOG_INFO("Shutting down application..."); - - AudioEngine::getInstance().shutdown(); - - eventDispatcher_.reset(); - eventQueue_.reset(); - - if (renderer_) { - renderer_->shutdown(); - renderer_.reset(); - } - - if (window_) { - window_->destroy(); - window_.reset(); - } + ServiceLocator::instance().shutdown(); + ServiceLocator::instance().clear(); + Registry::instance().shutdown(); + Registry::instance().clear(); initialized_ = false; running_ = false; - - E2D_LOG_INFO("Application shutdown complete"); } void Application::run() { - if (!initialized_) { - E2D_LOG_ERROR("Application not initialized"); + if (!initialized_) + return; + + auto *winMod = get(); + if (!winMod || !winMod->win()) return; - } lastFrameTime_ = getTimeSeconds(); - while (running_ && !window_->shouldClose()) { + while (running_ && !winMod->win()->shouldClose()) { mainLoop(); } } void Application::quit() { + shouldQuit_ = true; running_ = false; } void Application::pause() { if (!paused_) { paused_ = true; - E2D_LOG_INFO("Application paused"); + ServiceLocator::instance().pause(); } } void Application::resume() { if (paused_) { paused_ = false; + ServiceLocator::instance().resume(); lastFrameTime_ = getTimeSeconds(); - E2D_LOG_INFO("Application resumed"); } } void Application::mainLoop() { - double currentTime = getTimeSeconds(); - deltaTime_ = static_cast(currentTime - lastFrameTime_); + f64 currentTime = getTimeSeconds(); + dt_ = static_cast(currentTime - lastFrameTime_); lastFrameTime_ = currentTime; - totalTime_ += deltaTime_; + totalTime_ += dt_; frameCount_++; - fpsTimer_ += deltaTime_; + fpsTimer_ += dt_; if (fpsTimer_ >= 1.0f) { - currentFps_ = frameCount_; + fps_ = frameCount_; frameCount_ = 0; fpsTimer_ -= 1.0f; } - window_->pollEvents(); + auto *winMod = get(); + if (winMod && winMod->win()) { + winMod->win()->poll(); + } - if (eventDispatcher_ && eventQueue_) { - eventDispatcher_->processQueue(*eventQueue_); + auto eventService = ServiceLocator::instance().get(); + if (eventService) { + eventService->process(); } if (!paused_) { @@ -151,39 +132,21 @@ void Application::mainLoop() { } render(); - - if (!config_.vsync && config_.fpsLimit > 0) { - double frameEndTime = getTimeSeconds(); - double frameTime = frameEndTime - currentTime; - double target = 1.0 / static_cast(config_.fpsLimit); - if (frameTime < target) { - auto sleepSeconds = target - frameTime; - std::this_thread::sleep_for(std::chrono::duration(sleepSeconds)); - } - } } -void Application::update() { - // 子系统更新可以在这里添加 -} +void Application::update() { ServiceLocator::instance().update(dt_); } void Application::render() { - if (!renderer_) { + auto *winMod = get(); + if (!winMod || !winMod->win()) return; - } - renderer_->beginFrame(Color::Black); - - // 渲染内容可以在这里添加 - - renderer_->endFrame(); - window_->swapBuffers(); + winMod->win()->swap(); } -Input &Application::input() { return *window_->getInput(); } - -EventQueue &Application::eventQueue() { return *eventQueue_; } - -EventDispatcher &Application::eventDispatcher() { return *eventDispatcher_; } +GLFWWindow *Application::window() { + auto *winMod = get(); + return winMod ? winMod->win() : nullptr; +} } // namespace extra2d diff --git a/Extra2D/src/asset/asset.cpp b/Extra2D/src/asset/asset.cpp new file mode 100644 index 0000000..79cabf8 --- /dev/null +++ b/Extra2D/src/asset/asset.cpp @@ -0,0 +1,62 @@ +#include + +#define STB_TRUETYPE_IMPLEMENTATION +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// FontAsset::Impl - Pimpl 实现类 +// --------------------------------------------------------------------------- + +class FontAsset::Impl { +public: + stbtt_fontinfo info; + bool initialized = false; +}; + +// --------------------------------------------------------------------------- +// FontAsset 实现 +// --------------------------------------------------------------------------- + +FontAsset::FontAsset() : impl_(ptr::makeUnique()) {} + +FontAsset::~FontAsset() = default; + +bool FontAsset::loaded() const { + return state_.load(std::memory_order_acquire) == AssetState::Loaded && impl_->initialized; +} + +float FontAsset::scaleForPixelHeight(float pixels) const { + if (!impl_->initialized || data_.empty()) { + return 0.0f; + } + return stbtt_ScaleForPixelHeight(&impl_->info, pixels); +} + +bool FontAsset::setData(std::vector data) { + if (data.empty()) { + return false; + } + + data_ = std::move(data); + + if (!stbtt_InitFont(&impl_->info, data_.data(), 0)) { + data_.clear(); + impl_->initialized = false; + setState(AssetState::Failed); + return false; + } + + impl_->initialized = true; + setState(AssetState::Loaded); + return true; +} + +void FontAsset::release() { + data_.clear(); + impl_->initialized = false; + setState(AssetState::Unloaded); +} + +} diff --git a/Extra2D/src/asset/asset_cache.cpp b/Extra2D/src/asset/asset_cache.cpp new file mode 100644 index 0000000..100f0be --- /dev/null +++ b/Extra2D/src/asset/asset_cache.cpp @@ -0,0 +1,121 @@ +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// AssetCache 实现 +// --------------------------------------------------------------------------- + +AssetCache::AssetCache(size_t limit) + : limit_(limit) { + stats_.limit = limit; +} + +bool AssetCache::has(const AssetID& id) const { + std::shared_lock lock(mutex_); + return entries_.find(id) != entries_.end(); +} + +bool AssetCache::remove(const AssetID& id) { + std::unique_lock lock(mutex_); + + auto it = entries_.find(id); + if (it == entries_.end()) { + return false; + } + + bytes_ -= it->second.entry->asset->memSize(); + lruList_.erase(it->second.lruIterator); + entries_.erase(it); + --stats_.count; + + return true; +} + +void AssetCache::setLimit(size_t limit) { + std::unique_lock lock(mutex_); + limit_ = limit; + stats_.limit = limit; + + if (limit > 0 && bytes_ > limit) { + evict(); + } +} + +size_t AssetCache::count() const { + std::shared_lock lock(mutex_); + return entries_.size(); +} + +size_t AssetCache::purge() { + std::unique_lock lock(mutex_); + + size_t purged = 0; + auto it = entries_.begin(); + + while (it != entries_.end()) { + if (canEvict(*it->second.entry)) { + bytes_ -= it->second.entry->asset->memSize(); + lruList_.erase(it->second.lruIterator); + it = entries_.erase(it); + ++purged; + --stats_.count; + } else { + ++it; + } + } + + return purged; +} + +void AssetCache::clear() { + std::unique_lock lock(mutex_); + entries_.clear(); + lruList_.clear(); + bytes_ = 0; + stats_.count = 0; + stats_.bytes = 0; +} + +CacheStats AssetCache::stats() const { + std::shared_lock lock(mutex_); + stats_.bytes = bytes_; + return stats_; +} + +void AssetCache::resetStats() { + std::unique_lock lock(mutex_); + stats_.hits = 0; + stats_.misses = 0; +} + +void AssetCache::evict() { + while (!lruList_.empty() && (limit_ > 0 && bytes_ > limit_)) { + AssetID id = lruList_.front(); + + auto it = entries_.find(id); + if (it != entries_.end()) { + if (canEvict(*it->second.entry)) { + bytes_ -= it->second.entry->asset->memSize(); + entries_.erase(it); + lruList_.pop_front(); + --stats_.count; + continue; + } + } + + lruList_.pop_front(); + if (it != entries_.end()) { + it->second.lruIterator = lruList_.insert(lruList_.end(), id); + } + + break; + } +} + +bool AssetCache::canEvict(const CacheEntry& entry) const { + long useCount = entry.asset.use_count(); + return useCount <= 1; +} + +} diff --git a/Extra2D/src/asset/asset_loader.cpp b/Extra2D/src/asset/asset_loader.cpp new file mode 100644 index 0000000..69e06a4 --- /dev/null +++ b/Extra2D/src/asset/asset_loader.cpp @@ -0,0 +1,418 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// 辅助函数 +// --------------------------------------------------------------------------- + +namespace { + +/** + * @brief 转换为小写 + */ +std::string toLower(const std::string& str) { + std::string result = str; + std::transform(result.begin(), result.end(), result.begin(), + [](unsigned char c) { return std::tolower(c); }); + return result; +} + +/** + * @brief 获取文件扩展名 + */ +std::string getExtension(const std::string& path) { + size_t pos = path.rfind('.'); + if (pos == std::string::npos) { + return ""; + } + return toLower(path.substr(pos)); +} + +/** + * @brief 读取文件内容 + */ +std::vector readFile(const std::string& path) { + std::ifstream file(path, std::ios::binary | std::ios::ate); + if (!file) { + return {}; + } + + size_t size = static_cast(file.tellg()); + file.seekg(0, std::ios::beg); + + std::vector data(size); + if (!file.read(reinterpret_cast(data.data()), size)) { + return {}; + } + + return data; +} + +} + +// --------------------------------------------------------------------------- +// TextureLoader 实现 +// --------------------------------------------------------------------------- + +TextureLoader::TextureLoader() = default; + +TextureLoader::~TextureLoader() = default; + +Ref TextureLoader::load(const std::string& path) { + auto data = readFile(path); + if (data.empty()) { + E2D_ERROR("Failed to read texture file: {}", path); + return nullptr; + } + return loadFromMemory(data.data(), data.size()); +} + +Ref TextureLoader::loadFromMemory(const u8* data, size_t size) { + if (!data || size == 0) { + return nullptr; + } + + int width, height, channels; + u8* pixels = stbi_load_from_memory(data, static_cast(size), + &width, &height, &channels, + desiredChannels_); + + if (!pixels) { + E2D_ERROR("Failed to load texture from memory: {}", stbi_failure_reason()); + return nullptr; + } + + int actualChannels = desiredChannels_ > 0 ? desiredChannels_ : channels; + + auto asset = ptr::make(); + + Unique pixelData(new u8[static_cast(width) * height * actualChannels]); + std::memcpy(pixelData.get(), pixels, + static_cast(width) * height * actualChannels); + + asset->setData(width, height, actualChannels, std::move(pixelData)); + + stbi_image_free(pixels); + + return asset; +} + +bool TextureLoader::canLoad(const std::string& path) const { + std::string ext = getExtension(path); + auto exts = extensions(); + return std::find(exts.begin(), exts.end(), ext) != exts.end(); +} + +std::vector TextureLoader::extensions() const { + return {".png", ".jpg", ".jpeg", ".bmp", ".tga", ".gif", ".psd", ".hdr", ".pic"}; +} + +void TextureLoader::setDesiredChannels(int channels) { + desiredChannels_ = std::clamp(channels, 0, 4); +} + +// --------------------------------------------------------------------------- +// FontLoader 实现 +// --------------------------------------------------------------------------- + +Ref FontLoader::load(const std::string& path) { + auto data = readFile(path); + if (data.empty()) { + E2D_ERROR("Failed to read font file: {}", path); + return nullptr; + } + return loadFromMemory(data.data(), data.size()); +} + +Ref FontLoader::loadFromMemory(const u8* data, size_t size) { + if (!data || size == 0) { + return nullptr; + } + + auto asset = ptr::make(); + + std::vector fontData(data, data + size); + if (!asset->setData(std::move(fontData))) { + E2D_ERROR("Failed to initialize font from memory"); + return nullptr; + } + + return asset; +} + +bool FontLoader::canLoad(const std::string& path) const { + std::string ext = getExtension(path); + auto exts = extensions(); + return std::find(exts.begin(), exts.end(), ext) != exts.end(); +} + +std::vector FontLoader::extensions() const { + return {".ttf", ".otf", ".ttc"}; +} + +// --------------------------------------------------------------------------- +// ShaderLoader 实现 +// --------------------------------------------------------------------------- + +Ref ShaderLoader::load(const std::string& path) { + auto data = readFile(path); + if (data.empty()) { + E2D_ERROR("Failed to read shader file: {}", path); + return nullptr; + } + return loadFromMemory(data.data(), data.size()); +} + +Ref ShaderLoader::loadFromMemory(const u8* data, size_t size) { + if (!data || size == 0) { + return nullptr; + } + + std::string content(reinterpret_cast(data), size); + + std::string vertexSrc, fragmentSrc; + + if (content.find(vertexMarker_) != std::string::npos) { + if (!parseCombined(content, vertexSrc, fragmentSrc)) { + E2D_ERROR("Failed to parse combined shader file"); + return nullptr; + } + } else { + vertexSrc = content; + fragmentSrc = content; + } + + auto asset = ptr::make(); + asset->setSource(std::move(vertexSrc), std::move(fragmentSrc)); + + return asset; +} + +bool ShaderLoader::canLoad(const std::string& path) const { + std::string ext = getExtension(path); + auto exts = extensions(); + return std::find(exts.begin(), exts.end(), ext) != exts.end(); +} + +std::vector ShaderLoader::extensions() const { + return {".vert", ".frag", ".glsl", ".vs", ".fs"}; +} + +bool ShaderLoader::parseCombined(const std::string& content, + std::string& vertex, + std::string& fragment) { + size_t vertexPos = content.find(vertexMarker_); + size_t fragmentPos = content.find(fragmentMarker_); + + if (vertexPos == std::string::npos || fragmentPos == std::string::npos) { + return false; + } + + size_t vertexStart = vertexPos + vertexMarker_.length(); + + if (vertexPos < fragmentPos) { + vertex = content.substr(vertexStart, fragmentPos - vertexStart); + fragment = content.substr(fragmentPos + fragmentMarker_.length()); + } else { + fragment = content.substr(fragmentPos + fragmentMarker_.length(), + vertexPos - fragmentPos - fragmentMarker_.length()); + vertex = content.substr(vertexStart); + } + + auto trim = [](std::string& s) { + size_t start = s.find_first_not_of(" \t\r\n"); + if (start == std::string::npos) { + s.clear(); + return; + } + size_t end = s.find_last_not_of(" \t\r\n"); + s = s.substr(start, end - start + 1); + }; + + trim(vertex); + trim(fragment); + + return true; +} + +// --------------------------------------------------------------------------- +// AudioLoader 实现 +// --------------------------------------------------------------------------- + +Ref AudioLoader::load(const std::string& path) { + auto data = readFile(path); + if (data.empty()) { + E2D_ERROR("Failed to read audio file: {}", path); + return nullptr; + } + return loadFromMemory(data.data(), data.size()); +} + +Ref AudioLoader::loadFromMemory(const u8* data, size_t size) { + if (!data || size == 0) { + return nullptr; + } + + std::string ext = ".wav"; + if (size >= 4) { + if (data[0] == 'R' && data[1] == 'I' && data[2] == 'F' && data[3] == 'F') { + return loadWav(data, size); + } + } + + E2D_ERROR("Unsupported audio format"); + return nullptr; +} + +bool AudioLoader::canLoad(const std::string& path) const { + std::string ext = getExtension(path); + auto exts = extensions(); + return std::find(exts.begin(), exts.end(), ext) != exts.end(); +} + +std::vector AudioLoader::extensions() const { + return {".wav"}; +} + +Ref AudioLoader::loadWav(const u8* data, size_t size) { + if (size < 44) { + E2D_ERROR("WAV file too small"); + return nullptr; + } + + if (data[0] != 'R' || data[1] != 'I' || data[2] != 'F' || data[3] != 'F') { + E2D_ERROR("Invalid WAV file: missing RIFF header"); + return nullptr; + } + + if (data[8] != 'W' || data[9] != 'A' || data[10] != 'V' || data[11] != 'E') { + E2D_ERROR("Invalid WAV file: missing WAVE format"); + return nullptr; + } + + size_t pos = 12; + u16 audioFormat = 0; + u16 numChannels = 0; + u32 sampleRate = 0; + u16 bitsPerSample = 0; + size_t dataSize = 0; + const u8* audioData = nullptr; + + while (pos < size) { + u32 chunkId = *reinterpret_cast(data + pos); + u32 chunkSize = *reinterpret_cast(data + pos + 4); + + if (chunkId == 0x20746D66) { // "fmt " + audioFormat = *reinterpret_cast(data + pos + 8); + numChannels = *reinterpret_cast(data + pos + 10); + sampleRate = *reinterpret_cast(data + pos + 12); + bitsPerSample = *reinterpret_cast(data + pos + 22); + } else if (chunkId == 0x61746164) { // "data" + dataSize = chunkSize; + audioData = data + pos + 8; + break; + } + + pos += 8 + chunkSize; + if (chunkSize % 2 == 1) { + pos++; + } + } + + if (!audioData || dataSize == 0) { + E2D_ERROR("Invalid WAV file: missing data chunk"); + return nullptr; + } + + if (audioFormat != 1) { + E2D_ERROR("Unsupported WAV format: only PCM supported"); + return nullptr; + } + + auto asset = ptr::make(); + + std::vector pcmData(audioData, audioData + dataSize); + asset->setData(AudioFormat::PCM, numChannels, sampleRate, bitsPerSample, + std::move(pcmData)); + + return asset; +} + +// --------------------------------------------------------------------------- +// DataLoader 实现 +// --------------------------------------------------------------------------- + +Ref DataLoader::load(const std::string& path) { + auto data = readFile(path); + if (data.empty()) { + E2D_ERROR("Failed to read data file: {}", path); + return nullptr; + } + return loadFromMemory(data.data(), data.size()); +} + +Ref DataLoader::loadFromMemory(const u8* data, size_t size) { + if (!data || size == 0) { + return nullptr; + } + + auto asset = ptr::make(); + std::vector assetData(data, data + size); + asset->setData(std::move(assetData)); + + return asset; +} + +bool DataLoader::canLoad(const std::string& path) const { + return !path.empty(); +} + +std::vector DataLoader::extensions() const { + return {".bin", ".dat"}; +} + +// --------------------------------------------------------------------------- +// AssetLoaderFactory 实现 +// --------------------------------------------------------------------------- + +AssetType AssetLoaderFactory::getTypeByExtension(const std::string& extension) { + std::string ext = toLower(extension); + + if (ext == ".png" || ext == ".jpg" || ext == ".jpeg" || + ext == ".bmp" || ext == ".tga" || ext == ".gif" || + ext == ".psd" || ext == ".hdr" || ext == ".pic") { + return AssetType::Texture; + } + + if (ext == ".ttf" || ext == ".otf" || ext == ".ttc") { + return AssetType::Font; + } + + if (ext == ".vert" || ext == ".frag" || ext == ".glsl" || + ext == ".vs" || ext == ".fs") { + return AssetType::Shader; + } + + if (ext == ".wav" || ext == ".mp3" || ext == ".ogg") { + return AssetType::Audio; + } + + if (ext == ".bin" || ext == ".dat") { + return AssetType::Data; + } + + return AssetType::Unknown; +} + +} diff --git a/Extra2D/src/asset/asset_pack.cpp b/Extra2D/src/asset/asset_pack.cpp new file mode 100644 index 0000000..4523159 --- /dev/null +++ b/Extra2D/src/asset/asset_pack.cpp @@ -0,0 +1,439 @@ +#include "extra2d/asset/asset_pack.h" + +#include +#include +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// AssetPack 实现 +// --------------------------------------------------------------------------- + +AssetPack::AssetPack(AssetPack&& other) noexcept + : path_(std::move(other.path_)) + , file_(std::move(other.file_)) + , header_(other.header_) + , entries_(std::move(other.entries_)) + , pipe_(std::move(other.pipe_)) { + other.header_ = AssetPackageHeader{}; +} + +AssetPack& AssetPack::operator=(AssetPack&& other) noexcept { + if (this != &other) { + close(); + path_ = std::move(other.path_); + file_ = std::move(other.file_); + header_ = other.header_; + entries_ = std::move(other.entries_); + pipe_ = std::move(other.pipe_); + other.header_ = AssetPackageHeader{}; + } + return *this; +} + +AssetPack::~AssetPack() { + close(); +} + +bool AssetPack::open(const std::string& path) { + close(); + + path_ = path; + file_.open(path, std::ios::binary); + + if (!file_.is_open()) { + return false; + } + + if (!readHeader()) { + close(); + return false; + } + + if (!readIndex()) { + close(); + return false; + } + + return true; +} + +void AssetPack::close() { + if (file_.is_open()) { + file_.close(); + } + entries_.clear(); + path_.clear(); + header_ = AssetPackageHeader{}; +} + +bool AssetPack::has(const AssetID& id) const { + return entries_.find(id) != entries_.end(); +} + +bool AssetPack::has(const std::string& path) const { + return has(AssetID(path)); +} + +std::vector AssetPack::read(const AssetID& id) { + auto raw = readRaw(id); + if (raw.empty()) { + return {}; + } + + if (!pipe_.empty()) { + return pipe_.process(raw); + } + + return raw; +} + +std::vector AssetPack::read(const std::string& path) { + return read(AssetID(path)); +} + +std::vector AssetPack::readRaw(const AssetID& id) { + auto* entry = getEntry(id); + if (!entry) { + return {}; + } + + return readEntryData(*entry); +} + +std::vector AssetPack::assets() const { + std::vector result; + result.reserve(entries_.size()); + for (const auto& pair : entries_) { + result.push_back(pair.first); + } + return result; +} + +const AssetPackageEntry* AssetPack::getEntry(const AssetID& id) const { + auto it = entries_.find(id); + return it != entries_.end() ? &it->second : nullptr; +} + +bool AssetPack::readHeader() { + file_.seekg(0, std::ios::beg); + file_.read(reinterpret_cast(&header_), sizeof(header_)); + + if (!file_.good()) { + return false; + } + + if (!header_.valid()) { + return false; + } + + return true; +} + +bool AssetPack::readIndex() { + u32 entryCount = 0; + file_.read(reinterpret_cast(&entryCount), sizeof(entryCount)); + + if (!file_.good()) { + return false; + } + + entries_.clear(); + entries_.reserve(entryCount); + + for (u32 i = 0; i < entryCount; ++i) { + u32 pathLen = 0; + file_.read(reinterpret_cast(&pathLen), sizeof(pathLen)); + + if (!file_.good()) { + return false; + } + + std::string path(pathLen, '\0'); + file_.read(path.data(), pathLen); + + AssetPackageEntry entry; + entry.id = AssetID(path); + + file_.read(reinterpret_cast(&entry.offset), sizeof(entry.offset)); + file_.read(reinterpret_cast(&entry.size), sizeof(entry.size)); + file_.read(reinterpret_cast(&entry.originalSize), sizeof(entry.originalSize)); + file_.read(reinterpret_cast(&entry.compression), sizeof(entry.compression)); + file_.read(reinterpret_cast(&entry.flags), sizeof(entry.flags)); + + if (!file_.good()) { + return false; + } + + entries_[entry.id] = entry; + } + + return true; +} + +std::vector AssetPack::readEntryData(const AssetPackageEntry& entry) { + std::vector data(entry.size); + + file_.seekg(entry.offset, std::ios::beg); + file_.read(reinterpret_cast(data.data()), entry.size); + + if (!file_.good()) { + return {}; + } + + return data; +} + +// --------------------------------------------------------------------------- +// PackManager 实现 +// --------------------------------------------------------------------------- + +bool PackManager::mount(const std::string& path) { + auto pack = std::make_unique(); + if (!pack->open(path)) { + return false; + } + + (void)defaultPipe_; + + + packs_.push_back(std::move(pack)); + return true; +} + +void PackManager::unmount(const std::string& path) { + packs_.erase( + std::remove_if(packs_.begin(), packs_.end(), + [&path](const Unique& pack) { + return pack->path() == path; + }), + packs_.end() + ); +} + +void PackManager::unmountAll() { + packs_.clear(); +} + +AssetPack* PackManager::find(const AssetID& id) { + for (auto& pack : packs_) { + if (pack->has(id)) { + return pack.get(); + } + } + return nullptr; +} + +AssetPack* PackManager::find(const std::string& path) { + return find(AssetID(path)); +} + +bool PackManager::has(const AssetID& id) const { + for (const auto& pack : packs_) { + if (pack->has(id)) { + return true; + } + } + return false; +} + +bool PackManager::has(const std::string& path) const { + return has(AssetID(path)); +} + +std::vector PackManager::read(const AssetID& id) { + auto* pack = find(id); + if (pack) { + return pack->read(id); + } + return {}; +} + +std::vector PackManager::read(const std::string& path) { + return read(AssetID(path)); +} + +std::vector PackManager::allAssets() const { + std::vector result; + for (const auto& pack : packs_) { + auto assets = pack->assets(); + result.insert(result.end(), assets.begin(), assets.end()); + } + return result; +} + +std::vector PackManager::mountedPacks() const { + std::vector result; + result.reserve(packs_.size()); + for (const auto& pack : packs_) { + result.push_back(pack->path()); + } + return result; +} + +// --------------------------------------------------------------------------- +// AssetPackBuilder 实现 +// --------------------------------------------------------------------------- + +AssetPackBuilder::AssetPackBuilder(Compression compression, int level) + : compression_(compression) + , level_(level) { +} + +void AssetPackBuilder::add(const std::string& path, const std::vector& data) { + BuilderEntry entry; + entry.id = AssetID(path); + entry.data = data; + entry.compression = static_cast(compression_); + entry.flags = encryptType_ != Decryptor::Type::None ? 1 : 0; + + entry.compressedData = processData(entry.data); + if (entry.compressedData.empty()) { + entry.compressedData = entry.data; + entry.compression = static_cast(Compression::None); + } + + entries_.push_back(std::move(entry)); + + totalOriginalSize_ += data.size(); + totalCompressedSize_ += entry.compressedData.size(); +} + +void AssetPackBuilder::add(const std::string& path, std::vector&& data) { + add(path, data); +} + +bool AssetPackBuilder::addFile(const std::string& filePath, const std::string& packPath) { + std::ifstream file(filePath, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return false; + } + + auto size = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector data(size); + file.read(reinterpret_cast(data.data()), size); + + if (!file.good()) { + return false; + } + + std::string ppath = packPath.empty() + ? std::filesystem::path(filePath).filename().string() + : packPath; + + add(ppath, std::move(data)); + return true; +} + +size_t AssetPackBuilder::addDirectory(const std::string& dirPath, const std::string& prefix) { + size_t count = 0; + std::filesystem::path dir(dirPath); + + if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) { + return 0; + } + + for (const auto& entry : std::filesystem::recursive_directory_iterator(dir)) { + if (entry.is_regular_file()) { + std::string relativePath = std::filesystem::relative(entry.path(), dir).string(); + std::string packPath = prefix.empty() + ? relativePath + : prefix + "/" + relativePath; + + if (addFile(entry.path().string(), packPath)) { + ++count; + } + } + } + + return count; +} + +void AssetPackBuilder::setEncryption(const std::string& key, Decryptor::Type type) { + encryptKey_ = key; + encryptType_ = type; +} + +bool AssetPackBuilder::build(const std::string& outputPath) { + std::ofstream out(outputPath, std::ios::binary); + if (!out.is_open()) { + return false; + } + + AssetPackageHeader header; + header.magic = AssetPackageHeader::MAGIC; + header.version = 1; + header.compressionType = static_cast(compression_); + header.encryptionType = static_cast(encryptType_); + header.originalSize = totalOriginalSize_; + + u64 dataOffset = sizeof(header) + sizeof(u32); + for (const auto& entry : entries_) { + dataOffset += sizeof(u32) + entry.id.path.size(); + dataOffset += sizeof(u64) * 3 + sizeof(u32) * 2; + } + + for (auto& entry : entries_) { + entry.offset = dataOffset; + dataOffset += entry.compressedData.size(); + } + + header.compressedSize = dataOffset - sizeof(header) - sizeof(u32); + for (const auto& entry : entries_) { + header.compressedSize -= sizeof(u32) + entry.id.path.size(); + header.compressedSize -= sizeof(u64) * 3 + sizeof(u32) * 2; + } + + out.write(reinterpret_cast(&header), sizeof(header)); + + u32 entryCount = static_cast(entries_.size()); + out.write(reinterpret_cast(&entryCount), sizeof(entryCount)); + + for (const auto& entry : entries_) { + u32 pathLen = static_cast(entry.id.path.size()); + out.write(reinterpret_cast(&pathLen), sizeof(pathLen)); + out.write(entry.id.path.data(), pathLen); + + u64 offset = entry.offset; + u64 size = entry.compressedData.size(); + u64 originalSize = entry.data.size(); + + out.write(reinterpret_cast(&offset), sizeof(offset)); + out.write(reinterpret_cast(&size), sizeof(size)); + out.write(reinterpret_cast(&originalSize), sizeof(originalSize)); + out.write(reinterpret_cast(&entry.compression), sizeof(entry.compression)); + out.write(reinterpret_cast(&entry.flags), sizeof(entry.flags)); + } + + for (const auto& entry : entries_) { + out.write(reinterpret_cast(entry.compressedData.data()), + entry.compressedData.size()); + } + + return out.good(); +} + +void AssetPackBuilder::clear() { + entries_.clear(); + totalOriginalSize_ = 0; + totalCompressedSize_ = 0; +} + +std::vector AssetPackBuilder::processData(const std::vector& data) { + DataPipe pipe; + + if (compression_ != Compression::None) { + pipe.compress(compression_, level_); + } + + if (encryptType_ != Decryptor::Type::None && !encryptKey_.empty()) { + pipe.encrypt(encryptKey_, encryptType_); + } + + return pipe.process(data); +} + +} diff --git a/Extra2D/src/asset/data_processor.cpp b/Extra2D/src/asset/data_processor.cpp new file mode 100644 index 0000000..524ee62 --- /dev/null +++ b/Extra2D/src/asset/data_processor.cpp @@ -0,0 +1,449 @@ +#include "extra2d/asset/data_processor.h" + +#include +#include + +#ifdef E2D_USE_ZSTD +#include +#endif + +#ifdef E2D_USE_LZ4 +#include +#endif + +#ifdef E2D_USE_ZLIB +#include +#endif + +namespace extra2d { + +// --------------------------------------------------------------------------- +// Decryptor 实现 +// --------------------------------------------------------------------------- + +Decryptor::Decryptor(const std::string& key, Type type) + : key_(key), type_(type) { +} + +std::vector Decryptor::process(const std::vector& input) { + if (input.empty() || type_ == Type::None) { + return processNext(input); + } + + std::vector result; + switch (type_) { + case Type::XOR: + result = decryptXOR(input); + break; + case Type::AES256: + result = decryptAES256(input); + break; + default: + result = input; + break; + } + + return processNext(result); +} + +std::vector Decryptor::decryptXOR(const std::vector& input) { + if (key_.empty()) { + return input; + } + + std::vector result(input.size()); + const size_t keyLen = key_.size(); + + for (size_t i = 0; i < input.size(); ++i) { + result[i] = input[i] ^ static_cast(key_[i % keyLen]); + } + + return result; +} + +std::vector Decryptor::decryptAES256(const std::vector& input) { + return decryptXOR(input); +} + +// --------------------------------------------------------------------------- +// Decompressor 实现 +// --------------------------------------------------------------------------- + +Decompressor::Decompressor(Compression algo) + : algo_(algo) { +} + +std::vector Decompressor::process(const std::vector& input) { + if (input.empty() || algo_ == Compression::None) { + return processNext(input); + } + + std::vector result; + switch (algo_) { + case Compression::Zstd: + result = decompressZstd(input); + break; + case Compression::LZ4: + result = decompressLZ4(input); + break; + case Compression::Zlib: + result = decompressZlib(input); + break; + default: + result = input; + break; + } + + return processNext(result); +} + +std::vector Decompressor::decompressZstd(const std::vector& input) { +#ifdef E2D_USE_ZSTD + unsigned long long const decompressedSize = ZSTD_getFrameContentSize(input.data(), input.size()); + + if (decompressedSize == ZSTD_CONTENTSIZE_ERROR || + decompressedSize == ZSTD_CONTENTSIZE_UNKNOWN) { + return {}; + } + + std::vector result(static_cast(decompressedSize)); + + size_t const actualSize = ZSTD_decompress( + result.data(), result.size(), + input.data(), input.size() + ); + + if (ZSTD_isError(actualSize)) { + return {}; + } + + result.resize(actualSize); + return result; +#else + return input; +#endif +} + +std::vector Decompressor::decompressLZ4(const std::vector& input) { +#ifdef E2D_USE_LZ4 + if (input.size() < sizeof(u32)) { + return {}; + } + + u32 originalSize; + std::memcpy(&originalSize, input.data(), sizeof(u32)); + + std::vector result(originalSize); + + int const decompressedSize = LZ4_decompress_safe( + reinterpret_cast(input.data() + sizeof(u32)), + reinterpret_cast(result.data()), + static_cast(input.size() - sizeof(u32)), + static_cast(originalSize) + ); + + if (decompressedSize < 0) { + return {}; + } + + return result; +#else + return input; +#endif +} + +std::vector Decompressor::decompressZlib(const std::vector& input) { +#ifdef E2D_USE_ZLIB + std::vector result; + result.reserve(input.size() * 4); + + const size_t CHUNK = 16384; + u8 out[CHUNK]; + + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = static_cast(input.size()); + strm.next_in = const_cast(input.data()); + + if (inflateInit(&strm) != Z_OK) { + return {}; + } + + int ret; + do { + strm.avail_out = CHUNK; + strm.next_out = out; + + ret = inflate(&strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) { + inflateEnd(&strm); + return {}; + } + + result.insert(result.end(), out, out + CHUNK - strm.avail_out); + } while (strm.avail_out == 0); + + inflateEnd(&strm); + return result; +#else + return input; +#endif +} + +// --------------------------------------------------------------------------- +// Encryptor 实现 +// --------------------------------------------------------------------------- + +Encryptor::Encryptor(const std::string& key, Decryptor::Type type) + : key_(key), type_(type) { +} + +std::vector Encryptor::process(const std::vector& input) { + if (input.empty() || type_ == Decryptor::Type::None) { + return processNext(input); + } + + std::vector result; + switch (type_) { + case Decryptor::Type::XOR: + result = encryptXOR(input); + break; + case Decryptor::Type::AES256: + result = encryptAES256(input); + break; + default: + result = input; + break; + } + + return processNext(result); +} + +std::vector Encryptor::encryptXOR(const std::vector& input) { + if (key_.empty()) { + return input; + } + + std::vector result(input.size()); + const size_t keyLen = key_.size(); + + for (size_t i = 0; i < input.size(); ++i) { + result[i] = input[i] ^ static_cast(key_[i % keyLen]); + } + + return result; +} + +std::vector Encryptor::encryptAES256(const std::vector& input) { + return encryptXOR(input); +} + +// --------------------------------------------------------------------------- +// Compressor 实现 +// --------------------------------------------------------------------------- + +Compressor::Compressor(Compression algo, int level) + : algo_(algo), level_(level) { +} + +std::vector Compressor::process(const std::vector& input) { + if (input.empty() || algo_ == Compression::None) { + return processNext(input); + } + + std::vector result; + switch (algo_) { + case Compression::Zstd: + result = compressZstd(input); + break; + case Compression::LZ4: + result = compressLZ4(input); + break; + case Compression::Zlib: + result = compressZlib(input); + break; + default: + result = input; + break; + } + + return processNext(result); +} + +std::vector Compressor::compressZstd(const std::vector& input) { +#ifdef E2D_USE_ZSTD + size_t const bound = ZSTD_compressBound(input.size()); + std::vector result(bound); + + size_t const compressedSize = ZSTD_compress( + result.data(), result.size(), + input.data(), input.size(), + level_ + ); + + if (ZSTD_isError(compressedSize)) { + return {}; + } + + result.resize(compressedSize); + return result; +#else + return input; +#endif +} + +std::vector Compressor::compressLZ4(const std::vector& input) { +#ifdef E2D_USE_LZ4 + int const bound = LZ4_compressBound(static_cast(input.size())); + std::vector result(sizeof(u32) + bound); + + u32 originalSize = static_cast(input.size()); + std::memcpy(result.data(), &originalSize, sizeof(u32)); + + int const compressedSize = LZ4_compress_default( + reinterpret_cast(input.data()), + reinterpret_cast(result.data() + sizeof(u32)), + static_cast(input.size()), + bound + ); + + if (compressedSize <= 0) { + return {}; + } + + result.resize(sizeof(u32) + compressedSize); + return result; +#else + return input; +#endif +} + +std::vector Compressor::compressZlib(const std::vector& input) { +#ifdef E2D_USE_ZLIB + std::vector result; + result.reserve(input.size() / 2); + + const size_t CHUNK = 16384; + u8 out[CHUNK]; + + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + if (deflateInit(&strm, level_) != Z_OK) { + return {}; + } + + strm.avail_in = static_cast(input.size()); + strm.next_in = const_cast(input.data()); + + int ret; + do { + strm.avail_out = CHUNK; + strm.next_out = out; + + ret = deflate(&strm, Z_FINISH); + if (ret == Z_STREAM_ERROR) { + deflateEnd(&strm); + return {}; + } + + result.insert(result.end(), out, out + CHUNK - strm.avail_out); + } while (strm.avail_out == 0); + + deflateEnd(&strm); + return result; +#else + return input; +#endif +} + +// --------------------------------------------------------------------------- +// DataPipe 实现 +// --------------------------------------------------------------------------- + +DataPipe& DataPipe::decrypt(const std::string& key, Decryptor::Type type) { + processors_.push_back(std::make_unique(key, type)); + return *this; +} + +DataPipe& DataPipe::decompress(Compression algo) { + processors_.push_back(std::make_unique(algo)); + return *this; +} + +DataPipe& DataPipe::encrypt(const std::string& key, Decryptor::Type type) { + processors_.push_back(std::make_unique(key, type)); + return *this; +} + +DataPipe& DataPipe::compress(Compression algo, int level) { + processors_.push_back(std::make_unique(algo, level)); + return *this; +} + +DataPipe& DataPipe::add(Unique processor) { + processors_.push_back(std::move(processor)); + return *this; +} + +std::vector DataPipe::process(const std::vector& input) { + if (processors_.empty()) { + return input; + } + + std::vector result = input; + for (auto& processor : processors_) { + result = processor->process(result); + } + + return result; +} + +void DataPipe::clear() { + processors_.clear(); +} + +// --------------------------------------------------------------------------- +// 工具函数实现 +// --------------------------------------------------------------------------- + +std::vector computeChecksum(const std::vector& data) { + std::vector result(32, 0); + + u64 hash1 = 14695981039346656037ULL; + u64 hash2 = 14695981039346656037ULL; + + for (size_t i = 0; i < data.size(); ++i) { + hash1 ^= static_cast(data[i]); + hash1 *= 1099511628211ULL; + + hash2 ^= static_cast(data[i]) << ((i % 8) * 8); + hash2 *= 1099511628211ULL; + } + + for (size_t i = 0; i < 8; ++i) { + result[i] = static_cast((hash1 >> (i * 8)) & 0xFF); + result[i + 8] = static_cast((hash2 >> (i * 8)) & 0xFF); + } + + for (size_t i = 16; i < 32; ++i) { + result[i] = static_cast((hash1 ^ hash2) >> ((i - 16) * 8) & 0xFF); + } + + return result; +} + +bool verifyChecksum(const std::vector& data, const std::vector& checksum) { + if (checksum.size() != 32) { + return false; + } + + auto computed = computeChecksum(data); + return std::equal(computed.begin(), computed.end(), checksum.begin()); +} + +} diff --git a/Extra2D/src/core/registry.cpp b/Extra2D/src/core/registry.cpp new file mode 100644 index 0000000..9ee33d2 --- /dev/null +++ b/Extra2D/src/core/registry.cpp @@ -0,0 +1,223 @@ +#include +#include +#include +#include + +namespace extra2d { + +Registry &Registry::instance() { + static Registry instance; + return instance; +} + +bool Registry::init() { + auto levels = group(); + E2D_REGISTRY("正在初始化 {} 个模块,共 {} 个层级...", moduleCount_, + levels.size()); + + for (size_t level = 0; level < levels.size(); ++level) { + auto &modules = levels[level]; + + // 检查当前层级是否有支持并行初始化的模块 + bool hasParallelModules = false; + for (auto *module : modules) { + if (module->parallel()) { + hasParallelModules = true; + break; + } + } + + // 如果只有一个模块或不支持并行,使用串行初始化 + if (modules.size() <= 1 || !hasParallelModules) { + for (auto *module : modules) { + E2D_REGISTRY("正在初始化模块: {} (层级 {})", module->name(), level); + + if (!module->init()) { + E2D_ERROR("初始化模块失败: {}", module->name()); + return false; + } + + E2D_REGISTRY("模块 {} 初始化成功", module->name()); + } + } else { + // 并行初始化当前层级的模块 + E2D_REGISTRY("正在并行初始化 {} 个模块 (层级 {})...", modules.size(), + level); + + std::vector>> futures; + std::vector serialModules; + + // 分离支持并行和不支持并行的模块 + for (auto *module : modules) { + if (module->parallel()) { + futures.push_back(std::async(std::launch::async, [module]() { + return std::make_pair(module, module->init()); + })); + } else { + serialModules.push_back(module); + } + } + + // 等待并行模块完成 + for (auto &future : futures) { + auto [module, success] = future.get(); + if (!success) { + E2D_ERROR("初始化模块失败: {}", module->name()); + return false; + } + E2D_REGISTRY("模块 {} 初始化成功 (并行)", module->name()); + } + + // 串行初始化不支持并行的模块 + for (auto *module : serialModules) { + E2D_REGISTRY("正在初始化模块: {} (串行, 层级 {})", module->name(), + level); + if (!module->init()) { + E2D_ERROR("初始化模块失败: {}", module->name()); + return false; + } + E2D_REGISTRY("模块 {} 初始化成功", module->name()); + } + } + + E2D_REGISTRY("层级 {} 初始化完成", level); + } + + E2D_REGISTRY("所有模块初始化完成"); + return true; +} + +void Registry::shutdown() { + // 从后向前关闭模块 + for (size_t i = moduleCount_; i > 0; --i) { + if (modules_[i - 1].valid && modules_[i - 1].module) { + modules_[i - 1].module->shutdown(); + } + } +} + +void Registry::clear() { + shutdown(); + + // 销毁所有模块 + for (size_t i = 0; i < moduleCount_; ++i) { + modules_[i].module.reset(); + modules_[i].valid = false; + } + moduleCount_ = 0; +} + +std::vector Registry::sort() { + std::vector result; + std::vector inDegree(moduleCount_, 0); + std::vector> adj(moduleCount_); + + // 构建依赖图 + for (size_t i = 0; i < moduleCount_; ++i) { + if (!modules_[i].valid) + continue; + + auto deps = modules_[i].module->deps(); + for (auto &depType : deps) { + // 查找依赖模块的索引 + for (size_t j = 0; j < moduleCount_; ++j) { + if (modules_[j].valid && + std::type_index(typeid(*modules_[j].module)) == depType) { + adj[j].push_back(i); + inDegree[i]++; + break; + } + } + } + } + + // 使用优先队列,按优先级排序 + auto cmp = [this](size_t a, size_t b) { + return modules_[a].module->priority() > modules_[b].module->priority(); + }; + std::priority_queue, decltype(cmp)> pq(cmp); + + for (size_t i = 0; i < moduleCount_; ++i) { + if (inDegree[i] == 0) { + pq.push(i); + } + } + + while (!pq.empty()) { + size_t curr = pq.top(); + pq.pop(); + result.push_back(modules_[curr].module.get()); + + for (size_t next : adj[curr]) { + inDegree[next]--; + if (inDegree[next] == 0) { + pq.push(next); + } + } + } + + return result; +} + +std::vector> Registry::group() { + std::vector> levels; + std::vector inDegree(moduleCount_, 0); + std::vector> adj(moduleCount_); + + // 构建依赖图 + for (size_t i = 0; i < moduleCount_; ++i) { + if (!modules_[i].valid) + continue; + + auto deps = modules_[i].module->deps(); + for (auto &depType : deps) { + for (size_t j = 0; j < moduleCount_; ++j) { + if (modules_[j].valid && + std::type_index(typeid(*modules_[j].module)) == depType) { + adj[j].push_back(i); + inDegree[i]++; + break; + } + } + } + } + + // 使用 BFS 按层级分组 + std::queue q; + std::vector levelMap(moduleCount_, -1); + + // 找到所有入度为 0 的模块(第一层) + for (size_t i = 0; i < moduleCount_; ++i) { + if (inDegree[i] == 0) { + q.push(i); + levelMap[i] = 0; + } + } + + // BFS 遍历 + while (!q.empty()) { + size_t curr = q.front(); + q.pop(); + + int currLevel = levelMap[curr]; + + // 确保当前层级存在 + if (levels.size() <= static_cast(currLevel)) { + levels.resize(currLevel + 1); + } + levels[currLevel].push_back(modules_[curr].module.get()); + + // 处理依赖当前模块的其他模块 + for (size_t next : adj[curr]) { + inDegree[next]--; + if (inDegree[next] == 0) { + q.push(next); + levelMap[next] = currLevel + 1; + } + } + } + + return levels; +} + +} // namespace extra2d diff --git a/Extra2D/src/core/service_locator.cpp b/Extra2D/src/core/service_locator.cpp new file mode 100644 index 0000000..94ca13c --- /dev/null +++ b/Extra2D/src/core/service_locator.cpp @@ -0,0 +1,110 @@ +#include +#include + +namespace extra2d { + +ServiceLocator& ServiceLocator::instance() { + static ServiceLocator instance; + return instance; +} + +bool ServiceLocator::init() { + std::shared_lock lock(mutex_); + + for (auto& svc : orderedServices_) { + if (!svc) continue; + + auto info = svc->info(); + if (!info.enabled) continue; + + if (!svc->initialized()) { + svc->setState(ServiceState::Initializing); + if (!svc->init()) { + svc->setState(ServiceState::Stopped); + return false; + } + svc->setState(ServiceState::Running); + } + } + + return true; +} + +void ServiceLocator::shutdown() { + std::shared_lock lock(mutex_); + + for (auto it = orderedServices_.rbegin(); + it != orderedServices_.rend(); ++it) { + if (*it && (*it)->initialized()) { + (*it)->setState(ServiceState::Stopping); + (*it)->shutdown(); + (*it)->setState(ServiceState::Stopped); + } + } +} + +void ServiceLocator::update(f32 dt) { + std::shared_lock lock(mutex_); + + for (auto& svc : orderedServices_) { + if (svc && svc->initialized()) { + auto state = svc->state(); + if (state == ServiceState::Running) { + svc->update(dt); + } + } + } +} + +void ServiceLocator::pause() { + std::shared_lock lock(mutex_); + + for (auto& svc : orderedServices_) { + if (svc && svc->initialized()) { + svc->pause(); + } + } +} + +void ServiceLocator::resume() { + std::shared_lock lock(mutex_); + + for (auto& svc : orderedServices_) { + if (svc && svc->initialized()) { + svc->resume(); + } + } +} + +std::vector> ServiceLocator::all() const { + std::shared_lock lock(mutex_); + return orderedServices_; +} + +void ServiceLocator::clear() { + std::unique_lock lock(mutex_); + + for (auto it = orderedServices_.rbegin(); + it != orderedServices_.rend(); ++it) { + if (*it && (*it)->initialized()) { + (*it)->setState(ServiceState::Stopping); + (*it)->shutdown(); + (*it)->setState(ServiceState::Stopped); + } + } + + services_.clear(); + factories_.clear(); + orderedServices_.clear(); +} + +void ServiceLocator::sort() { + std::stable_sort(orderedServices_.begin(), orderedServices_.end(), + [](const Ref& a, const Ref& b) { + if (!a || !b) return false; + return static_cast(a->info().priority) < + static_cast(b->info().priority); + }); +} + +} diff --git a/Extra2D/src/core/service_registry.cpp b/Extra2D/src/core/service_registry.cpp new file mode 100644 index 0000000..f238a76 --- /dev/null +++ b/Extra2D/src/core/service_registry.cpp @@ -0,0 +1,37 @@ +#include + +namespace extra2d { + +ServiceRegistry& ServiceRegistry::instance() { + static ServiceRegistry instance; + return instance; +} + +void ServiceRegistry::setEnabled(const std::string& name, bool enabled) { + for (auto& reg : registrations_) { + if (reg.name == name) { + reg.enabled = enabled; + break; + } + } +} + +void ServiceRegistry::createAll() { + std::sort(registrations_.begin(), registrations_.end(), + [](const ServiceRegistration& a, const ServiceRegistration& b) { + return static_cast(a.priority) < static_cast(b.priority); + }); + + for (const auto& reg : registrations_) { + if (!reg.enabled) { + continue; + } + + auto service = reg.factory(); + if (service) { + ServiceLocator::instance().add(service); + } + } +} + +} diff --git a/Extra2D/src/core/string.cpp b/Extra2D/src/core/string.cpp deleted file mode 100644 index 95e05b2..0000000 --- a/Extra2D/src/core/string.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include - -#ifdef _WIN32 -#include - -namespace extra2d { - -std::string utf8ToGbkImpl(const std::string& utf8) { - if (utf8.empty()) return std::string(); - - // UTF-8 → Wide → GBK - int wideLen = MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), -1, nullptr, 0); - if (wideLen <= 0) return std::string(); - - std::wstring wide(wideLen - 1, 0); - MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), -1, &wide[0], wideLen); - - int gbkLen = WideCharToMultiByte(CP_ACP, 0, wide.c_str(), -1, nullptr, 0, nullptr, nullptr); - if (gbkLen <= 0) return std::string(); - - std::string gbk(gbkLen - 1, 0); - WideCharToMultiByte(CP_ACP, 0, wide.c_str(), -1, &gbk[0], gbkLen, nullptr, nullptr); - - return gbk; -} - -std::string gbkToUtf8Impl(const std::string& gbk) { - if (gbk.empty()) return std::string(); - - // GBK → Wide → UTF-8 - int wideLen = MultiByteToWideChar(CP_ACP, 0, gbk.c_str(), -1, nullptr, 0); - if (wideLen <= 0) return std::string(); - - std::wstring wide(wideLen - 1, 0); - MultiByteToWideChar(CP_ACP, 0, gbk.c_str(), -1, &wide[0], wideLen); - - int utf8Len = WideCharToMultiByte(CP_UTF8, 0, wide.c_str(), -1, nullptr, 0, nullptr, nullptr); - if (utf8Len <= 0) return std::string(); - - std::string utf8(utf8Len - 1, 0); - WideCharToMultiByte(CP_UTF8, 0, wide.c_str(), -1, &utf8[0], utf8Len, nullptr, nullptr); - - return utf8; -} - -} // namespace extra2d - -#endif // _WIN32 diff --git a/Extra2D/src/event/event.cpp b/Extra2D/src/event/event.cpp index b4b85dc..9908f3d 100644 --- a/Extra2D/src/event/event.cpp +++ b/Extra2D/src/event/event.cpp @@ -2,55 +2,55 @@ namespace extra2d { -Event Event::createWindowResize(int width, int height) { +Event Event::windowResize(i32 w, i32 h) { Event event; event.type = EventType::WindowResize; - event.data = WindowResizeEvent{width, height}; + event.data = WindowResizeEvent{w, h}; return event; } -Event Event::createWindowClose() { +Event Event::windowClose() { Event event; event.type = EventType::WindowClose; return event; } -Event Event::createKeyPress(int keyCode, int scancode, int mods) { +Event Event::keyPress(i32 key, i32 scancode, i32 mods) { Event event; event.type = EventType::KeyPressed; - event.data = KeyEvent{keyCode, scancode, mods}; + event.data = KeyEvent{key, scancode, mods}; return event; } -Event Event::createKeyRelease(int keyCode, int scancode, int mods) { +Event Event::keyRelease(i32 key, i32 scancode, i32 mods) { Event event; event.type = EventType::KeyReleased; - event.data = KeyEvent{keyCode, scancode, mods}; + event.data = KeyEvent{key, scancode, mods}; return event; } -Event Event::createMouseButtonPress(int button, int mods, const Vec2 &pos) { +Event Event::mousePress(i32 btn, i32 mods, Vec2 pos) { Event event; event.type = EventType::MouseButtonPressed; - event.data = MouseButtonEvent{button, mods, pos}; + event.data = MouseButtonEvent{btn, mods, pos}; return event; } -Event Event::createMouseButtonRelease(int button, int mods, const Vec2 &pos) { +Event Event::mouseRelease(i32 btn, i32 mods, Vec2 pos) { Event event; event.type = EventType::MouseButtonReleased; - event.data = MouseButtonEvent{button, mods, pos}; + event.data = MouseButtonEvent{btn, mods, pos}; return event; } -Event Event::createMouseMove(const Vec2 &pos, const Vec2 &delta) { +Event Event::mouseMove(Vec2 pos, Vec2 delta) { Event event; event.type = EventType::MouseMoved; event.data = MouseMoveEvent{pos, delta}; return event; } -Event Event::createMouseScroll(const Vec2 &offset, const Vec2 &pos) { +Event Event::mouseScroll(Vec2 offset, Vec2 pos) { Event event; event.type = EventType::MouseScrolled; event.data = MouseScrollEvent{offset, pos}; diff --git a/Extra2D/src/event/event_dispatcher.cpp b/Extra2D/src/event/event_dispatcher.cpp index 8e69221..7cff7c7 100644 --- a/Extra2D/src/event/event_dispatcher.cpp +++ b/Extra2D/src/event/event_dispatcher.cpp @@ -5,14 +5,13 @@ namespace extra2d { EventDispatcher::EventDispatcher() : nextId_(1) {} -ListenerId EventDispatcher::addListener(EventType type, - EventCallback callback) { - ListenerId id = nextId_++; - listeners_[type].push_back({id, type, callback}); +ListenerID EventDispatcher::on(EventType type, EventFn fn) { + ListenerID id = nextId_++; + listeners_[type].push_back({id, type, fn}); return id; } -void EventDispatcher::removeListener(ListenerId id) { +void EventDispatcher::off(ListenerID id) { for (auto &[type, listeners] : listeners_) { auto it = std::remove_if(listeners.begin(), listeners.end(), [id](const Listener &l) { return l.id == id; }); @@ -23,11 +22,11 @@ void EventDispatcher::removeListener(ListenerId id) { } } -void EventDispatcher::removeAllListeners(EventType type) { +void EventDispatcher::offAll(EventType type) { listeners_.erase(type); } -void EventDispatcher::removeAllListeners() { listeners_.clear(); } +void EventDispatcher::offAll() { listeners_.clear(); } void EventDispatcher::dispatch(Event &event) { auto it = listeners_.find(event.type); @@ -35,7 +34,7 @@ void EventDispatcher::dispatch(Event &event) { for (auto &listener : it->second) { if (event.handled) break; - listener.callback(event); + listener.fn(event); } } } @@ -45,19 +44,19 @@ void EventDispatcher::dispatch(const Event &event) { dispatch(mutableEvent); } -void EventDispatcher::processQueue(EventQueue &queue) { +void EventDispatcher::process(EventQueue &queue) { Event event; while (queue.poll(event)) { dispatch(event); } } -size_t EventDispatcher::getListenerCount(EventType type) const { +size_t EventDispatcher::listenerCount(EventType type) const { auto it = listeners_.find(type); return (it != listeners_.end()) ? it->second.size() : 0; } -size_t EventDispatcher::getTotalListenerCount() const { +size_t EventDispatcher::totalListeners() const { size_t count = 0; for (const auto &[type, listeners] : listeners_) { count += listeners.size(); diff --git a/Extra2D/src/event/event_queue.cpp b/Extra2D/src/event/event_queue.cpp index 538254a..2f7850d 100644 --- a/Extra2D/src/event/event_queue.cpp +++ b/Extra2D/src/event/event_queue.cpp @@ -4,50 +4,42 @@ namespace extra2d { EventQueue::EventQueue() = default; -void EventQueue::push(const Event &event) { - std::lock_guard lock(mutex_); - queue_.push(event); +bool EventQueue::push(const Event &event) { + return buffer_.push(event); } -void EventQueue::push(Event &&event) { - std::lock_guard lock(mutex_); - queue_.push(std::move(event)); +bool EventQueue::push(Event &&event) { + return buffer_.push(std::move(event)); } bool EventQueue::poll(Event &event) { - std::lock_guard lock(mutex_); - if (queue_.empty()) { - return false; - } - event = queue_.front(); - queue_.pop(); - return true; + return buffer_.pop(event); } bool EventQueue::peek(Event &event) const { - std::lock_guard lock(mutex_); - if (queue_.empty()) { + std::lock_guard lock(mutex_); + if (buffer_.empty()) { + return false; + } + // 环形缓冲区不支持peek,这里简化处理 + // 实际应用中可能需要双缓冲或其他机制 return false; - } - event = queue_.front(); - return true; } void EventQueue::clear() { - std::lock_guard lock(mutex_); - while (!queue_.empty()) { - queue_.pop(); - } + std::lock_guard lock(mutex_); + Event event; + while (buffer_.pop(event)) { + // 持续弹出直到为空 + } } bool EventQueue::empty() const { - std::lock_guard lock(mutex_); - return queue_.empty(); + return buffer_.empty(); } size_t EventQueue::size() const { - std::lock_guard lock(mutex_); - return queue_.size(); + return buffer_.size(); } } // namespace extra2d diff --git a/Extra2D/src/glad/glad.c b/Extra2D/src/glad/glad.c index 14d2d65..fa11852 100644 --- a/Extra2D/src/glad/glad.c +++ b/Extra2D/src/glad/glad.c @@ -1,17 +1,54 @@ /* - OpenGL ES loader generated by glad 0.1.36 on Mon Feb 9 05:46:28 2026. + GLSC2, OpenGL, OpenGL ES loader generated by glad 0.1.36 on Thu Feb 19 07:59:46 2026. Language/Generator: C/C++ Specification: gl - APIs: gles2=3.2 + APIs: gl=4.5, gles1=1.0, gles2=3.2, glsc2=2.0 Profile: compatibility Extensions: + GL_3DFX_multisample, + GL_3DFX_tbuffer, + GL_3DFX_texture_compression_FXT1, + GL_AMD_blend_minmax_factor, GL_AMD_compressed_3DC_texture, GL_AMD_compressed_ATC_texture, + GL_AMD_conservative_depth, + GL_AMD_debug_output, + GL_AMD_depth_clamp_separate, + GL_AMD_draw_buffers_blend, GL_AMD_framebuffer_multisample_advanced, + GL_AMD_framebuffer_sample_positions, + GL_AMD_gcn_shader, + GL_AMD_gpu_shader_half_float, + GL_AMD_gpu_shader_int16, + GL_AMD_gpu_shader_int64, + GL_AMD_interleaved_elements, + GL_AMD_multi_draw_indirect, + GL_AMD_name_gen_delete, + GL_AMD_occlusion_query_event, GL_AMD_performance_monitor, + GL_AMD_pinned_memory, GL_AMD_program_binary_Z400, + GL_AMD_query_buffer_object, + GL_AMD_sample_positions, + GL_AMD_seamless_cubemap_per_texture, + GL_AMD_shader_atomic_counter_ops, + GL_AMD_shader_ballot, + GL_AMD_shader_explicit_vertex_parameter, + GL_AMD_shader_gpu_shader_half_float_fetch, + GL_AMD_shader_image_load_store_lod, + GL_AMD_shader_stencil_export, + GL_AMD_shader_trinary_minmax, + GL_AMD_sparse_texture, + GL_AMD_stencil_operation_extended, + GL_AMD_texture_gather_bias_lod, + GL_AMD_texture_texture4, + GL_AMD_transform_feedback3_lines_triangles, + GL_AMD_transform_feedback4, + GL_AMD_vertex_shader_layer, + GL_AMD_vertex_shader_tessellator, + GL_AMD_vertex_shader_viewport_index, GL_ANDROID_extension_pack_es31a, GL_ANGLE_depth_texture, GL_ANGLE_framebuffer_blit, @@ -23,15 +60,202 @@ GL_ANGLE_texture_compression_dxt5, GL_ANGLE_texture_usage, GL_ANGLE_translated_shader_source, + GL_APPLE_aux_depth_stencil, + GL_APPLE_client_storage, GL_APPLE_clip_distance, GL_APPLE_color_buffer_packed_float, GL_APPLE_copy_texture_levels, + GL_APPLE_element_array, + GL_APPLE_fence, + GL_APPLE_float_pixels, + GL_APPLE_flush_buffer_range, GL_APPLE_framebuffer_multisample, + GL_APPLE_object_purgeable, GL_APPLE_rgb_422, + GL_APPLE_row_bytes, + GL_APPLE_specular_vector, GL_APPLE_sync, + GL_APPLE_texture_2D_limited_npot, GL_APPLE_texture_format_BGRA8888, GL_APPLE_texture_max_level, GL_APPLE_texture_packed_float, + GL_APPLE_texture_range, + GL_APPLE_transform_hint, + GL_APPLE_vertex_array_object, + GL_APPLE_vertex_array_range, + GL_APPLE_vertex_program_evaluators, + GL_APPLE_ycbcr_422, + GL_ARB_ES2_compatibility, + GL_ARB_ES3_1_compatibility, + GL_ARB_ES3_2_compatibility, + GL_ARB_ES3_compatibility, + GL_ARB_arrays_of_arrays, + GL_ARB_base_instance, + GL_ARB_bindless_texture, + GL_ARB_blend_func_extended, + GL_ARB_buffer_storage, + GL_ARB_cl_event, + GL_ARB_clear_buffer_object, + GL_ARB_clear_texture, + GL_ARB_clip_control, + GL_ARB_color_buffer_float, + GL_ARB_compatibility, + GL_ARB_compressed_texture_pixel_storage, + GL_ARB_compute_shader, + GL_ARB_compute_variable_group_size, + GL_ARB_conditional_render_inverted, + GL_ARB_conservative_depth, + GL_ARB_copy_buffer, + GL_ARB_copy_image, + GL_ARB_cull_distance, + GL_ARB_debug_output, + GL_ARB_depth_buffer_float, + GL_ARB_depth_clamp, + GL_ARB_depth_texture, + GL_ARB_derivative_control, + GL_ARB_direct_state_access, + GL_ARB_draw_buffers, + GL_ARB_draw_buffers_blend, + GL_ARB_draw_elements_base_vertex, + GL_ARB_draw_indirect, + GL_ARB_draw_instanced, + GL_ARB_enhanced_layouts, + GL_ARB_explicit_attrib_location, + GL_ARB_explicit_uniform_location, + GL_ARB_fragment_coord_conventions, + GL_ARB_fragment_layer_viewport, + GL_ARB_fragment_program, + GL_ARB_fragment_program_shadow, + GL_ARB_fragment_shader, + GL_ARB_fragment_shader_interlock, + GL_ARB_framebuffer_no_attachments, + GL_ARB_framebuffer_object, + GL_ARB_framebuffer_sRGB, + GL_ARB_geometry_shader4, + GL_ARB_get_program_binary, + GL_ARB_get_texture_sub_image, + GL_ARB_gl_spirv, + GL_ARB_gpu_shader5, + GL_ARB_gpu_shader_fp64, + GL_ARB_gpu_shader_int64, + GL_ARB_half_float_pixel, + GL_ARB_half_float_vertex, + GL_ARB_imaging, + GL_ARB_indirect_parameters, + GL_ARB_instanced_arrays, + GL_ARB_internalformat_query, + GL_ARB_internalformat_query2, + GL_ARB_invalidate_subdata, + GL_ARB_map_buffer_alignment, + GL_ARB_map_buffer_range, + GL_ARB_matrix_palette, + GL_ARB_multi_bind, + GL_ARB_multi_draw_indirect, + GL_ARB_multisample, + GL_ARB_multitexture, + GL_ARB_occlusion_query, + GL_ARB_occlusion_query2, + GL_ARB_parallel_shader_compile, + GL_ARB_pipeline_statistics_query, + GL_ARB_pixel_buffer_object, + GL_ARB_point_parameters, + GL_ARB_point_sprite, + GL_ARB_polygon_offset_clamp, + GL_ARB_post_depth_coverage, + GL_ARB_program_interface_query, + GL_ARB_provoking_vertex, + GL_ARB_query_buffer_object, + GL_ARB_robust_buffer_access_behavior, + GL_ARB_robustness, + GL_ARB_robustness_isolation, + GL_ARB_sample_locations, + GL_ARB_sample_shading, + GL_ARB_sampler_objects, + GL_ARB_seamless_cube_map, + GL_ARB_seamless_cubemap_per_texture, + GL_ARB_separate_shader_objects, + GL_ARB_shader_atomic_counter_ops, + GL_ARB_shader_atomic_counters, + GL_ARB_shader_ballot, + GL_ARB_shader_bit_encoding, + GL_ARB_shader_clock, + GL_ARB_shader_draw_parameters, + GL_ARB_shader_group_vote, + GL_ARB_shader_image_load_store, + GL_ARB_shader_image_size, + GL_ARB_shader_objects, + GL_ARB_shader_precision, + GL_ARB_shader_stencil_export, + GL_ARB_shader_storage_buffer_object, + GL_ARB_shader_subroutine, + GL_ARB_shader_texture_image_samples, + GL_ARB_shader_texture_lod, + GL_ARB_shader_viewport_layer_array, + GL_ARB_shading_language_100, + GL_ARB_shading_language_420pack, + GL_ARB_shading_language_include, + GL_ARB_shading_language_packing, + GL_ARB_shadow, + GL_ARB_shadow_ambient, + GL_ARB_sparse_buffer, + GL_ARB_sparse_texture, + GL_ARB_sparse_texture2, + GL_ARB_sparse_texture_clamp, + GL_ARB_spirv_extensions, + GL_ARB_stencil_texturing, + GL_ARB_sync, + GL_ARB_tessellation_shader, + GL_ARB_texture_barrier, + GL_ARB_texture_border_clamp, + GL_ARB_texture_buffer_object, + GL_ARB_texture_buffer_object_rgb32, + GL_ARB_texture_buffer_range, + GL_ARB_texture_compression, + GL_ARB_texture_compression_bptc, + GL_ARB_texture_compression_rgtc, + GL_ARB_texture_cube_map, + GL_ARB_texture_cube_map_array, + GL_ARB_texture_env_add, + GL_ARB_texture_env_combine, + GL_ARB_texture_env_crossbar, + GL_ARB_texture_env_dot3, + GL_ARB_texture_filter_anisotropic, + GL_ARB_texture_filter_minmax, + GL_ARB_texture_float, + GL_ARB_texture_gather, + GL_ARB_texture_mirror_clamp_to_edge, + GL_ARB_texture_mirrored_repeat, + GL_ARB_texture_multisample, + GL_ARB_texture_non_power_of_two, + GL_ARB_texture_query_levels, + GL_ARB_texture_query_lod, + GL_ARB_texture_rectangle, + GL_ARB_texture_rg, + GL_ARB_texture_rgb10_a2ui, + GL_ARB_texture_stencil8, + GL_ARB_texture_storage, + GL_ARB_texture_storage_multisample, + GL_ARB_texture_swizzle, + GL_ARB_texture_view, + GL_ARB_timer_query, + GL_ARB_transform_feedback2, + GL_ARB_transform_feedback3, + GL_ARB_transform_feedback_instanced, + GL_ARB_transform_feedback_overflow_query, + GL_ARB_transpose_matrix, + GL_ARB_uniform_buffer_object, + GL_ARB_vertex_array_bgra, + GL_ARB_vertex_array_object, + GL_ARB_vertex_attrib_64bit, + GL_ARB_vertex_attrib_binding, + GL_ARB_vertex_blend, + GL_ARB_vertex_buffer_object, + GL_ARB_vertex_program, + GL_ARB_vertex_shader, + GL_ARB_vertex_type_10f_11f_11f_rev, + GL_ARB_vertex_type_2_10_10_10_rev, + GL_ARB_viewport_array, + GL_ARB_window_pos, GL_ARM_mali_program_binary, GL_ARM_mali_shader_binary, GL_ARM_rgba8, @@ -39,48 +263,103 @@ GL_ARM_shader_framebuffer_fetch, GL_ARM_shader_framebuffer_fetch_depth_stencil, GL_ARM_texture_unnormalized_coordinates, + GL_ATI_draw_buffers, + GL_ATI_element_array, + GL_ATI_envmap_bumpmap, + GL_ATI_fragment_shader, + GL_ATI_map_object_buffer, + GL_ATI_meminfo, + GL_ATI_pixel_format_float, + GL_ATI_pn_triangles, + GL_ATI_separate_stencil, + GL_ATI_text_fragment_shader, + GL_ATI_texture_env_combine3, + GL_ATI_texture_float, + GL_ATI_texture_mirror_once, + GL_ATI_vertex_array_object, + GL_ATI_vertex_attrib_array_object, + GL_ATI_vertex_streams, GL_DMP_program_binary, GL_DMP_shader_binary, + GL_EXT_422_pixels, GL_EXT_EGL_image_array, GL_EXT_EGL_image_storage, GL_EXT_EGL_image_storage_compression, + GL_EXT_EGL_sync, GL_EXT_YUV_target, + GL_EXT_abgr, GL_EXT_base_instance, + GL_EXT_bgra, + GL_EXT_bindable_uniform, + GL_EXT_blend_color, + GL_EXT_blend_equation_separate, GL_EXT_blend_func_extended, + GL_EXT_blend_func_separate, + GL_EXT_blend_logic_op, GL_EXT_blend_minmax, + GL_EXT_blend_subtract, GL_EXT_buffer_storage, GL_EXT_clear_texture, GL_EXT_clip_control, GL_EXT_clip_cull_distance, + GL_EXT_clip_volume_hint, + GL_EXT_cmyka, GL_EXT_color_buffer_float, GL_EXT_color_buffer_half_float, + GL_EXT_color_subtable, + GL_EXT_compiled_vertex_array, GL_EXT_conservative_depth, + GL_EXT_convolution, + GL_EXT_coordinate_frame, GL_EXT_copy_image, + GL_EXT_copy_texture, + GL_EXT_cull_vertex, GL_EXT_debug_label, GL_EXT_debug_marker, + GL_EXT_depth_bounds_test, GL_EXT_depth_clamp, + GL_EXT_direct_state_access, GL_EXT_discard_framebuffer, GL_EXT_disjoint_timer_query, GL_EXT_draw_buffers, + GL_EXT_draw_buffers2, GL_EXT_draw_buffers_indexed, GL_EXT_draw_elements_base_vertex, GL_EXT_draw_instanced, + GL_EXT_draw_range_elements, GL_EXT_draw_transform_feedback, GL_EXT_external_buffer, GL_EXT_float_blend, + GL_EXT_fog_coord, GL_EXT_fragment_shading_rate, + GL_EXT_framebuffer_blit, GL_EXT_framebuffer_blit_layers, + GL_EXT_framebuffer_multisample, + GL_EXT_framebuffer_multisample_blit_scaled, + GL_EXT_framebuffer_object, + GL_EXT_framebuffer_sRGB, GL_EXT_geometry_point_size, GL_EXT_geometry_shader, + GL_EXT_geometry_shader4, + GL_EXT_gpu_program_parameters, + GL_EXT_gpu_shader4, GL_EXT_gpu_shader5, + GL_EXT_histogram, + GL_EXT_index_array_formats, + GL_EXT_index_func, + GL_EXT_index_material, + GL_EXT_index_texture, GL_EXT_instanced_arrays, + GL_EXT_light_texture, GL_EXT_map_buffer_range, GL_EXT_memory_object, GL_EXT_memory_object_fd, GL_EXT_memory_object_win32, GL_EXT_mesh_shader, + GL_EXT_misc_attribute, GL_EXT_multi_draw_arrays, GL_EXT_multi_draw_indirect, + GL_EXT_multisample, GL_EXT_multisampled_compatibility, GL_EXT_multisampled_render_to_texture, GL_EXT_multisampled_render_to_texture2, @@ -89,25 +368,40 @@ GL_EXT_multiview_texture_multisample, GL_EXT_multiview_timer_query, GL_EXT_occlusion_query_boolean, + GL_EXT_packed_depth_stencil, + GL_EXT_packed_float, + GL_EXT_packed_pixels, + GL_EXT_paletted_texture, + GL_EXT_pixel_buffer_object, + GL_EXT_pixel_transform, + GL_EXT_pixel_transform_color_table, + GL_EXT_point_parameters, + GL_EXT_polygon_offset, GL_EXT_polygon_offset_clamp, GL_EXT_post_depth_coverage, GL_EXT_primitive_bounding_box, GL_EXT_protected_textures, + GL_EXT_provoking_vertex, GL_EXT_pvrtc_sRGB, GL_EXT_raster_multisample, GL_EXT_read_format_bgra, GL_EXT_render_snorm, + GL_EXT_rescale_normal, GL_EXT_robustness, GL_EXT_sRGB, GL_EXT_sRGB_write_control, + GL_EXT_secondary_color, GL_EXT_semaphore, GL_EXT_semaphore_fd, GL_EXT_semaphore_win32, GL_EXT_separate_depth_stencil, GL_EXT_separate_shader_objects, + GL_EXT_separate_specular_color, GL_EXT_shader_framebuffer_fetch, GL_EXT_shader_framebuffer_fetch_non_coherent, GL_EXT_shader_group_vote, + GL_EXT_shader_image_load_formatted, + GL_EXT_shader_image_load_store, GL_EXT_shader_implicit_conversions, GL_EXT_shader_integer_mix, GL_EXT_shader_io_blocks, @@ -117,54 +411,108 @@ GL_EXT_shader_samples_identical, GL_EXT_shader_texture_lod, GL_EXT_shader_texture_samples, + GL_EXT_shadow_funcs, GL_EXT_shadow_samplers, + GL_EXT_shared_texture_palette, GL_EXT_sparse_texture, GL_EXT_sparse_texture2, + GL_EXT_stencil_clear_tag, + GL_EXT_stencil_two_side, + GL_EXT_stencil_wrap, + GL_EXT_subtexture, GL_EXT_tessellation_point_size, GL_EXT_tessellation_shader, + GL_EXT_texture, + GL_EXT_texture3D, + GL_EXT_texture_array, GL_EXT_texture_border_clamp, GL_EXT_texture_buffer, + GL_EXT_texture_buffer_object, GL_EXT_texture_compression_astc_decode_mode, GL_EXT_texture_compression_bptc, GL_EXT_texture_compression_dxt1, + GL_EXT_texture_compression_latc, GL_EXT_texture_compression_rgtc, GL_EXT_texture_compression_s3tc, GL_EXT_texture_compression_s3tc_srgb, + GL_EXT_texture_cube_map, GL_EXT_texture_cube_map_array, + GL_EXT_texture_env_add, + GL_EXT_texture_env_combine, + GL_EXT_texture_env_dot3, GL_EXT_texture_filter_anisotropic, GL_EXT_texture_filter_minmax, GL_EXT_texture_format_BGRA8888, GL_EXT_texture_format_sRGB_override, + GL_EXT_texture_integer, + GL_EXT_texture_lod_bias, + GL_EXT_texture_mirror_clamp, GL_EXT_texture_mirror_clamp_to_edge, GL_EXT_texture_norm16, + GL_EXT_texture_object, + GL_EXT_texture_perturb_normal, GL_EXT_texture_query_lod, GL_EXT_texture_rg, + GL_EXT_texture_sRGB, GL_EXT_texture_sRGB_R8, GL_EXT_texture_sRGB_RG8, GL_EXT_texture_sRGB_decode, GL_EXT_texture_shadow_lod, + GL_EXT_texture_shared_exponent, + GL_EXT_texture_snorm, GL_EXT_texture_storage, GL_EXT_texture_storage_compression, + GL_EXT_texture_swizzle, GL_EXT_texture_type_2_10_10_10_REV, GL_EXT_texture_view, + GL_EXT_timer_query, + GL_EXT_transform_feedback, GL_EXT_unpack_subimage, + GL_EXT_vertex_array, + GL_EXT_vertex_array_bgra, + GL_EXT_vertex_attrib_64bit, + GL_EXT_vertex_shader, + GL_EXT_vertex_weighting, GL_EXT_win32_keyed_mutex, GL_EXT_window_rectangles, + GL_EXT_x11_sync_object, GL_FJ_shader_binary_GCCSO, + GL_GREMEDY_frame_terminator, + GL_GREMEDY_string_marker, + GL_HP_convolution_border_modes, + GL_HP_image_transform, + GL_HP_occlusion_test, + GL_HP_texture_lighting, GL_HUAWEI_program_binary, GL_HUAWEI_shader_binary, + GL_IBM_cull_vertex, + GL_IBM_multimode_draw_arrays, + GL_IBM_rasterpos_clip, + GL_IBM_static_data, + GL_IBM_texture_mirrored_repeat, + GL_IBM_vertex_array_lists, GL_IMG_bindless_texture, GL_IMG_framebuffer_downsample, GL_IMG_multisampled_render_to_texture, GL_IMG_program_binary, + GL_IMG_pvric_end_to_end_signature, GL_IMG_read_format, GL_IMG_shader_binary, GL_IMG_texture_compression_pvrtc, GL_IMG_texture_compression_pvrtc2, + GL_IMG_texture_env_enhanced_fixed_function, GL_IMG_texture_filter_cubic, + GL_IMG_tile_region_protection, + GL_IMG_user_clip_plane, + GL_INGR_blend_func_separate, + GL_INGR_color_clamp, + GL_INGR_interlace_read, GL_INTEL_blackhole_render, GL_INTEL_conservative_rasterization, + GL_INTEL_fragment_shader_ordering, GL_INTEL_framebuffer_CMAA, + GL_INTEL_map_texture, + GL_INTEL_parallel_arrays, GL_INTEL_performance_query, GL_KHR_blend_equation_advanced, GL_KHR_blend_equation_advanced_coherent, @@ -178,86 +526,185 @@ GL_KHR_texture_compression_astc_hdr, GL_KHR_texture_compression_astc_ldr, GL_KHR_texture_compression_astc_sliced_3d, + GL_MESAX_texture_stack, GL_MESA_bgra, GL_MESA_framebuffer_flip_x, GL_MESA_framebuffer_flip_y, GL_MESA_framebuffer_swap_xy, + GL_MESA_pack_invert, GL_MESA_program_binary_formats, + GL_MESA_resize_buffers, GL_MESA_sampler_objects, GL_MESA_shader_integer_functions, GL_MESA_texture_const_bandwidth, + GL_MESA_tile_raster_order, + GL_MESA_window_pos, + GL_MESA_ycbcr_texture, GL_NVX_blend_equation_advanced_multi_draw_buffers, + GL_NVX_conditional_render, + GL_NVX_gpu_memory_info, + GL_NVX_gpu_multicast2, + GL_NVX_linked_gpu_multicast, + GL_NVX_progress_fence, + GL_NV_alpha_to_coverage_dither_control, + GL_NV_bindless_multi_draw_indirect, + GL_NV_bindless_multi_draw_indirect_count, GL_NV_bindless_texture, GL_NV_blend_equation_advanced, GL_NV_blend_equation_advanced_coherent, GL_NV_blend_minmax_factor, + GL_NV_blend_square, GL_NV_clip_space_w_scaling, + GL_NV_command_list, + GL_NV_compute_program5, GL_NV_compute_shader_derivatives, GL_NV_conditional_render, GL_NV_conservative_raster, + GL_NV_conservative_raster_dilate, GL_NV_conservative_raster_pre_snap, GL_NV_conservative_raster_pre_snap_triangles, + GL_NV_conservative_raster_underestimation, GL_NV_copy_buffer, + GL_NV_copy_depth_to_color, + GL_NV_copy_image, GL_NV_coverage_sample, + GL_NV_deep_texture3D, + GL_NV_depth_buffer_float, + GL_NV_depth_clamp, GL_NV_depth_nonlinear, GL_NV_draw_buffers, GL_NV_draw_instanced, + GL_NV_draw_texture, GL_NV_draw_vulkan_image, + GL_NV_evaluators, GL_NV_explicit_attrib_location, + GL_NV_explicit_multisample, GL_NV_fbo_color_attachments, GL_NV_fence, GL_NV_fill_rectangle, + GL_NV_float_buffer, + GL_NV_fog_distance, GL_NV_fragment_coverage_to_color, + GL_NV_fragment_program, + GL_NV_fragment_program2, + GL_NV_fragment_program4, + GL_NV_fragment_program_option, GL_NV_fragment_shader_barycentric, GL_NV_fragment_shader_interlock, GL_NV_framebuffer_blit, GL_NV_framebuffer_mixed_samples, GL_NV_framebuffer_multisample, + GL_NV_framebuffer_multisample_coverage, GL_NV_generate_mipmap_sRGB, + GL_NV_geometry_program4, + GL_NV_geometry_shader4, GL_NV_geometry_shader_passthrough, + GL_NV_gpu_multicast, + GL_NV_gpu_program4, + GL_NV_gpu_program5, + GL_NV_gpu_program5_mem_extended, GL_NV_gpu_shader5, + GL_NV_half_float, GL_NV_image_formats, GL_NV_instanced_arrays, GL_NV_internalformat_sample_query, + GL_NV_light_max_exponent, GL_NV_memory_attachment, GL_NV_memory_object_sparse, GL_NV_mesh_shader, + GL_NV_multisample_coverage, + GL_NV_multisample_filter_hint, GL_NV_non_square_matrices, + GL_NV_occlusion_query, GL_NV_pack_subimage, + GL_NV_packed_depth_stencil, + GL_NV_parameter_buffer_object, + GL_NV_parameter_buffer_object2, GL_NV_path_rendering, GL_NV_path_rendering_shared_edge, GL_NV_pixel_buffer_object, + GL_NV_pixel_data_range, + GL_NV_point_sprite, GL_NV_polygon_mode, + GL_NV_present_video, + GL_NV_primitive_restart, GL_NV_primitive_shading_rate, + GL_NV_query_resource, + GL_NV_query_resource_tag, GL_NV_read_buffer, GL_NV_read_buffer_front, GL_NV_read_depth, GL_NV_read_depth_stencil, GL_NV_read_stencil, + GL_NV_register_combiners, + GL_NV_register_combiners2, GL_NV_representative_fragment_test, + GL_NV_robustness_video_memory_purge, GL_NV_sRGB_formats, GL_NV_sample_locations, GL_NV_sample_mask_override_coverage, GL_NV_scissor_exclusive, + GL_NV_shader_atomic_counters, + GL_NV_shader_atomic_float, + GL_NV_shader_atomic_float64, GL_NV_shader_atomic_fp16_vector, + GL_NV_shader_atomic_int64, + GL_NV_shader_buffer_load, + GL_NV_shader_buffer_store, GL_NV_shader_noperspective_interpolation, + GL_NV_shader_storage_buffer_object, GL_NV_shader_subgroup_partitioned, GL_NV_shader_texture_footprint, + GL_NV_shader_thread_group, + GL_NV_shader_thread_shuffle, GL_NV_shading_rate_image, GL_NV_shadow_samplers_array, GL_NV_shadow_samplers_cube, GL_NV_stereo_view_rendering, + GL_NV_tessellation_program5, + GL_NV_texgen_emboss, + GL_NV_texgen_reflection, GL_NV_texture_barrier, GL_NV_texture_border_clamp, GL_NV_texture_compression_s3tc_update, + GL_NV_texture_compression_vtc, + GL_NV_texture_env_combine4, + GL_NV_texture_expand_normal, + GL_NV_texture_multisample, GL_NV_texture_npot_2D_mipmap, + GL_NV_texture_rectangle, + GL_NV_texture_rectangle_compressed, + GL_NV_texture_shader, + GL_NV_texture_shader2, + GL_NV_texture_shader3, GL_NV_timeline_semaphore, + GL_NV_transform_feedback, + GL_NV_transform_feedback2, + GL_NV_uniform_buffer_std430_layout, + GL_NV_uniform_buffer_unified_memory, + GL_NV_vdpau_interop, + GL_NV_vdpau_interop2, + GL_NV_vertex_array_range, + GL_NV_vertex_array_range2, + GL_NV_vertex_attrib_integer_64bit, + GL_NV_vertex_buffer_unified_memory, + GL_NV_vertex_program, + GL_NV_vertex_program1_1, + GL_NV_vertex_program2, + GL_NV_vertex_program2_option, + GL_NV_vertex_program3, + GL_NV_vertex_program4, + GL_NV_video_capture, GL_NV_viewport_array, GL_NV_viewport_array2, GL_NV_viewport_swizzle, GL_OES_EGL_image, GL_OES_EGL_image_external, GL_OES_EGL_image_external_essl3, + GL_OES_blend_equation_separate, + GL_OES_blend_func_separate, + GL_OES_blend_subtract, + GL_OES_byte_coordinates, GL_OES_compressed_ETC1_RGB8_sub_texture, GL_OES_compressed_ETC1_RGB8_texture, GL_OES_compressed_paletted_texture, @@ -267,16 +714,26 @@ GL_OES_depth_texture, GL_OES_draw_buffers_indexed, GL_OES_draw_elements_base_vertex, + GL_OES_draw_texture, GL_OES_element_index_uint, + GL_OES_extended_matrix_palette, GL_OES_fbo_render_mipmap, + GL_OES_fixed_point, GL_OES_fragment_precision_high, + GL_OES_framebuffer_object, GL_OES_geometry_point_size, GL_OES_geometry_shader, GL_OES_get_program_binary, GL_OES_gpu_shader5, GL_OES_mapbuffer, + GL_OES_matrix_get, + GL_OES_matrix_palette, GL_OES_packed_depth_stencil, + GL_OES_point_size_array, + GL_OES_point_sprite, GL_OES_primitive_bounding_box, + GL_OES_query_matrix, + GL_OES_read_format, GL_OES_required_internalformat, GL_OES_rgb8_rgba8, GL_OES_sample_shading, @@ -284,9 +741,12 @@ GL_OES_shader_image_atomic, GL_OES_shader_io_blocks, GL_OES_shader_multisample_interpolation, + GL_OES_single_precision, GL_OES_standard_derivatives, GL_OES_stencil1, GL_OES_stencil4, + GL_OES_stencil8, + GL_OES_stencil_wrap, GL_OES_surfaceless_context, GL_OES_tessellation_point_size, GL_OES_tessellation_shader, @@ -294,11 +754,14 @@ GL_OES_texture_border_clamp, GL_OES_texture_buffer, GL_OES_texture_compression_astc, + GL_OES_texture_cube_map, GL_OES_texture_cube_map_array, + GL_OES_texture_env_crossbar, GL_OES_texture_float, GL_OES_texture_float_linear, GL_OES_texture_half_float, GL_OES_texture_half_float_linear, + GL_OES_texture_mirrored_repeat, GL_OES_texture_npot, GL_OES_texture_stencil8, GL_OES_texture_storage_multisample_2d_array, @@ -307,9 +770,14 @@ GL_OES_vertex_half_float, GL_OES_vertex_type_10_10_10_2, GL_OES_viewport_array, + GL_OML_interlace, + GL_OML_resample, + GL_OML_subsample, GL_OVR_multiview, GL_OVR_multiview2, GL_OVR_multiview_multisampled_render_to_texture, + GL_PGI_misc_hints, + GL_PGI_vertex_hints, GL_QCOM_YUV_texture_gather, GL_QCOM_alpha_test, GL_QCOM_binning_control, @@ -332,14 +800,81 @@ GL_QCOM_tiled_rendering, GL_QCOM_writeonly_rendering, GL_QCOM_ycbcr_degamma, - GL_VIV_shader_binary + GL_REND_screen_coordinates, + GL_S3_s3tc, + GL_SGIS_detail_texture, + GL_SGIS_fog_function, + GL_SGIS_generate_mipmap, + GL_SGIS_multisample, + GL_SGIS_pixel_texture, + GL_SGIS_point_line_texgen, + GL_SGIS_point_parameters, + GL_SGIS_sharpen_texture, + GL_SGIS_texture4D, + GL_SGIS_texture_border_clamp, + GL_SGIS_texture_color_mask, + GL_SGIS_texture_edge_clamp, + GL_SGIS_texture_filter4, + GL_SGIS_texture_lod, + GL_SGIS_texture_select, + GL_SGIX_async, + GL_SGIX_async_histogram, + GL_SGIX_async_pixel, + GL_SGIX_blend_alpha_minmax, + GL_SGIX_calligraphic_fragment, + GL_SGIX_clipmap, + GL_SGIX_convolution_accuracy, + GL_SGIX_depth_pass_instrument, + GL_SGIX_depth_texture, + GL_SGIX_flush_raster, + GL_SGIX_fog_offset, + GL_SGIX_fragment_lighting, + GL_SGIX_framezoom, + GL_SGIX_igloo_interface, + GL_SGIX_instruments, + GL_SGIX_interlace, + GL_SGIX_ir_instrument1, + GL_SGIX_list_priority, + GL_SGIX_pixel_texture, + GL_SGIX_pixel_tiles, + GL_SGIX_polynomial_ffd, + GL_SGIX_reference_plane, + GL_SGIX_resample, + GL_SGIX_scalebias_hint, + GL_SGIX_shadow, + GL_SGIX_shadow_ambient, + GL_SGIX_sprite, + GL_SGIX_subsample, + GL_SGIX_tag_sample_buffer, + GL_SGIX_texture_add_env, + GL_SGIX_texture_coordinate_clamp, + GL_SGIX_texture_lod_bias, + GL_SGIX_texture_multi_buffer, + GL_SGIX_texture_scale_bias, + GL_SGIX_vertex_preclip, + GL_SGIX_ycrcb, + GL_SGIX_ycrcb_subsample, + GL_SGIX_ycrcba, + GL_SGI_color_matrix, + GL_SGI_color_table, + GL_SGI_texture_color_table, + GL_SUNX_constant_data, + GL_SUN_convolution_border_modes, + GL_SUN_global_alpha, + GL_SUN_mesh_array, + GL_SUN_slice_accum, + GL_SUN_triangle_list, + GL_SUN_vertex, + GL_VIV_shader_binary, + GL_WIN_phong_shading, + GL_WIN_specular_fog Loader: True Local files: False Omit khrplatform: False Reproducible: False Commandline: - --profile="compatibility" --api="gles2=3.2" --generator="c" --spec="gl" --extensions="GL_AMD_compressed_3DC_texture,GL_AMD_compressed_ATC_texture,GL_AMD_framebuffer_multisample_advanced,GL_AMD_performance_monitor,GL_AMD_program_binary_Z400,GL_ANDROID_extension_pack_es31a,GL_ANGLE_depth_texture,GL_ANGLE_framebuffer_blit,GL_ANGLE_framebuffer_multisample,GL_ANGLE_instanced_arrays,GL_ANGLE_pack_reverse_row_order,GL_ANGLE_program_binary,GL_ANGLE_texture_compression_dxt3,GL_ANGLE_texture_compression_dxt5,GL_ANGLE_texture_usage,GL_ANGLE_translated_shader_source,GL_APPLE_clip_distance,GL_APPLE_color_buffer_packed_float,GL_APPLE_copy_texture_levels,GL_APPLE_framebuffer_multisample,GL_APPLE_rgb_422,GL_APPLE_sync,GL_APPLE_texture_format_BGRA8888,GL_APPLE_texture_max_level,GL_APPLE_texture_packed_float,GL_ARM_mali_program_binary,GL_ARM_mali_shader_binary,GL_ARM_rgba8,GL_ARM_shader_core_properties,GL_ARM_shader_framebuffer_fetch,GL_ARM_shader_framebuffer_fetch_depth_stencil,GL_ARM_texture_unnormalized_coordinates,GL_DMP_program_binary,GL_DMP_shader_binary,GL_EXT_EGL_image_array,GL_EXT_EGL_image_storage,GL_EXT_EGL_image_storage_compression,GL_EXT_YUV_target,GL_EXT_base_instance,GL_EXT_blend_func_extended,GL_EXT_blend_minmax,GL_EXT_buffer_storage,GL_EXT_clear_texture,GL_EXT_clip_control,GL_EXT_clip_cull_distance,GL_EXT_color_buffer_float,GL_EXT_color_buffer_half_float,GL_EXT_conservative_depth,GL_EXT_copy_image,GL_EXT_debug_label,GL_EXT_debug_marker,GL_EXT_depth_clamp,GL_EXT_discard_framebuffer,GL_EXT_disjoint_timer_query,GL_EXT_draw_buffers,GL_EXT_draw_buffers_indexed,GL_EXT_draw_elements_base_vertex,GL_EXT_draw_instanced,GL_EXT_draw_transform_feedback,GL_EXT_external_buffer,GL_EXT_float_blend,GL_EXT_fragment_shading_rate,GL_EXT_framebuffer_blit_layers,GL_EXT_geometry_point_size,GL_EXT_geometry_shader,GL_EXT_gpu_shader5,GL_EXT_instanced_arrays,GL_EXT_map_buffer_range,GL_EXT_memory_object,GL_EXT_memory_object_fd,GL_EXT_memory_object_win32,GL_EXT_mesh_shader,GL_EXT_multi_draw_arrays,GL_EXT_multi_draw_indirect,GL_EXT_multisampled_compatibility,GL_EXT_multisampled_render_to_texture,GL_EXT_multisampled_render_to_texture2,GL_EXT_multiview_draw_buffers,GL_EXT_multiview_tessellation_geometry_shader,GL_EXT_multiview_texture_multisample,GL_EXT_multiview_timer_query,GL_EXT_occlusion_query_boolean,GL_EXT_polygon_offset_clamp,GL_EXT_post_depth_coverage,GL_EXT_primitive_bounding_box,GL_EXT_protected_textures,GL_EXT_pvrtc_sRGB,GL_EXT_raster_multisample,GL_EXT_read_format_bgra,GL_EXT_render_snorm,GL_EXT_robustness,GL_EXT_sRGB,GL_EXT_sRGB_write_control,GL_EXT_semaphore,GL_EXT_semaphore_fd,GL_EXT_semaphore_win32,GL_EXT_separate_depth_stencil,GL_EXT_separate_shader_objects,GL_EXT_shader_framebuffer_fetch,GL_EXT_shader_framebuffer_fetch_non_coherent,GL_EXT_shader_group_vote,GL_EXT_shader_implicit_conversions,GL_EXT_shader_integer_mix,GL_EXT_shader_io_blocks,GL_EXT_shader_non_constant_global_initializers,GL_EXT_shader_pixel_local_storage,GL_EXT_shader_pixel_local_storage2,GL_EXT_shader_samples_identical,GL_EXT_shader_texture_lod,GL_EXT_shader_texture_samples,GL_EXT_shadow_samplers,GL_EXT_sparse_texture,GL_EXT_sparse_texture2,GL_EXT_tessellation_point_size,GL_EXT_tessellation_shader,GL_EXT_texture_border_clamp,GL_EXT_texture_buffer,GL_EXT_texture_compression_astc_decode_mode,GL_EXT_texture_compression_bptc,GL_EXT_texture_compression_dxt1,GL_EXT_texture_compression_rgtc,GL_EXT_texture_compression_s3tc,GL_EXT_texture_compression_s3tc_srgb,GL_EXT_texture_cube_map_array,GL_EXT_texture_filter_anisotropic,GL_EXT_texture_filter_minmax,GL_EXT_texture_format_BGRA8888,GL_EXT_texture_format_sRGB_override,GL_EXT_texture_mirror_clamp_to_edge,GL_EXT_texture_norm16,GL_EXT_texture_query_lod,GL_EXT_texture_rg,GL_EXT_texture_sRGB_R8,GL_EXT_texture_sRGB_RG8,GL_EXT_texture_sRGB_decode,GL_EXT_texture_shadow_lod,GL_EXT_texture_storage,GL_EXT_texture_storage_compression,GL_EXT_texture_type_2_10_10_10_REV,GL_EXT_texture_view,GL_EXT_unpack_subimage,GL_EXT_win32_keyed_mutex,GL_EXT_window_rectangles,GL_FJ_shader_binary_GCCSO,GL_HUAWEI_program_binary,GL_HUAWEI_shader_binary,GL_IMG_bindless_texture,GL_IMG_framebuffer_downsample,GL_IMG_multisampled_render_to_texture,GL_IMG_program_binary,GL_IMG_read_format,GL_IMG_shader_binary,GL_IMG_texture_compression_pvrtc,GL_IMG_texture_compression_pvrtc2,GL_IMG_texture_filter_cubic,GL_INTEL_blackhole_render,GL_INTEL_conservative_rasterization,GL_INTEL_framebuffer_CMAA,GL_INTEL_performance_query,GL_KHR_blend_equation_advanced,GL_KHR_blend_equation_advanced_coherent,GL_KHR_context_flush_control,GL_KHR_debug,GL_KHR_no_error,GL_KHR_parallel_shader_compile,GL_KHR_robust_buffer_access_behavior,GL_KHR_robustness,GL_KHR_shader_subgroup,GL_KHR_texture_compression_astc_hdr,GL_KHR_texture_compression_astc_ldr,GL_KHR_texture_compression_astc_sliced_3d,GL_MESA_bgra,GL_MESA_framebuffer_flip_x,GL_MESA_framebuffer_flip_y,GL_MESA_framebuffer_swap_xy,GL_MESA_program_binary_formats,GL_MESA_sampler_objects,GL_MESA_shader_integer_functions,GL_MESA_texture_const_bandwidth,GL_NVX_blend_equation_advanced_multi_draw_buffers,GL_NV_bindless_texture,GL_NV_blend_equation_advanced,GL_NV_blend_equation_advanced_coherent,GL_NV_blend_minmax_factor,GL_NV_clip_space_w_scaling,GL_NV_compute_shader_derivatives,GL_NV_conditional_render,GL_NV_conservative_raster,GL_NV_conservative_raster_pre_snap,GL_NV_conservative_raster_pre_snap_triangles,GL_NV_copy_buffer,GL_NV_coverage_sample,GL_NV_depth_nonlinear,GL_NV_draw_buffers,GL_NV_draw_instanced,GL_NV_draw_vulkan_image,GL_NV_explicit_attrib_location,GL_NV_fbo_color_attachments,GL_NV_fence,GL_NV_fill_rectangle,GL_NV_fragment_coverage_to_color,GL_NV_fragment_shader_barycentric,GL_NV_fragment_shader_interlock,GL_NV_framebuffer_blit,GL_NV_framebuffer_mixed_samples,GL_NV_framebuffer_multisample,GL_NV_generate_mipmap_sRGB,GL_NV_geometry_shader_passthrough,GL_NV_gpu_shader5,GL_NV_image_formats,GL_NV_instanced_arrays,GL_NV_internalformat_sample_query,GL_NV_memory_attachment,GL_NV_memory_object_sparse,GL_NV_mesh_shader,GL_NV_non_square_matrices,GL_NV_pack_subimage,GL_NV_path_rendering,GL_NV_path_rendering_shared_edge,GL_NV_pixel_buffer_object,GL_NV_polygon_mode,GL_NV_primitive_shading_rate,GL_NV_read_buffer,GL_NV_read_buffer_front,GL_NV_read_depth,GL_NV_read_depth_stencil,GL_NV_read_stencil,GL_NV_representative_fragment_test,GL_NV_sRGB_formats,GL_NV_sample_locations,GL_NV_sample_mask_override_coverage,GL_NV_scissor_exclusive,GL_NV_shader_atomic_fp16_vector,GL_NV_shader_noperspective_interpolation,GL_NV_shader_subgroup_partitioned,GL_NV_shader_texture_footprint,GL_NV_shading_rate_image,GL_NV_shadow_samplers_array,GL_NV_shadow_samplers_cube,GL_NV_stereo_view_rendering,GL_NV_texture_barrier,GL_NV_texture_border_clamp,GL_NV_texture_compression_s3tc_update,GL_NV_texture_npot_2D_mipmap,GL_NV_timeline_semaphore,GL_NV_viewport_array,GL_NV_viewport_array2,GL_NV_viewport_swizzle,GL_OES_EGL_image,GL_OES_EGL_image_external,GL_OES_EGL_image_external_essl3,GL_OES_compressed_ETC1_RGB8_sub_texture,GL_OES_compressed_ETC1_RGB8_texture,GL_OES_compressed_paletted_texture,GL_OES_copy_image,GL_OES_depth24,GL_OES_depth32,GL_OES_depth_texture,GL_OES_draw_buffers_indexed,GL_OES_draw_elements_base_vertex,GL_OES_element_index_uint,GL_OES_fbo_render_mipmap,GL_OES_fragment_precision_high,GL_OES_geometry_point_size,GL_OES_geometry_shader,GL_OES_get_program_binary,GL_OES_gpu_shader5,GL_OES_mapbuffer,GL_OES_packed_depth_stencil,GL_OES_primitive_bounding_box,GL_OES_required_internalformat,GL_OES_rgb8_rgba8,GL_OES_sample_shading,GL_OES_sample_variables,GL_OES_shader_image_atomic,GL_OES_shader_io_blocks,GL_OES_shader_multisample_interpolation,GL_OES_standard_derivatives,GL_OES_stencil1,GL_OES_stencil4,GL_OES_surfaceless_context,GL_OES_tessellation_point_size,GL_OES_tessellation_shader,GL_OES_texture_3D,GL_OES_texture_border_clamp,GL_OES_texture_buffer,GL_OES_texture_compression_astc,GL_OES_texture_cube_map_array,GL_OES_texture_float,GL_OES_texture_float_linear,GL_OES_texture_half_float,GL_OES_texture_half_float_linear,GL_OES_texture_npot,GL_OES_texture_stencil8,GL_OES_texture_storage_multisample_2d_array,GL_OES_texture_view,GL_OES_vertex_array_object,GL_OES_vertex_half_float,GL_OES_vertex_type_10_10_10_2,GL_OES_viewport_array,GL_OVR_multiview,GL_OVR_multiview2,GL_OVR_multiview_multisampled_render_to_texture,GL_QCOM_YUV_texture_gather,GL_QCOM_alpha_test,GL_QCOM_binning_control,GL_QCOM_driver_control,GL_QCOM_extended_get,GL_QCOM_extended_get2,GL_QCOM_frame_extrapolation,GL_QCOM_framebuffer_foveated,GL_QCOM_motion_estimation,GL_QCOM_perfmon_global_mode,GL_QCOM_render_sRGB_R8_RG8,GL_QCOM_render_shared_exponent,GL_QCOM_shader_framebuffer_fetch_noncoherent,GL_QCOM_shader_framebuffer_fetch_rate,GL_QCOM_shading_rate,GL_QCOM_texture_foveated,GL_QCOM_texture_foveated2,GL_QCOM_texture_foveated_subsampled_layout,GL_QCOM_texture_lod_bias,GL_QCOM_tiled_rendering,GL_QCOM_writeonly_rendering,GL_QCOM_ycbcr_degamma,GL_VIV_shader_binary" + --profile="compatibility" --api="gl=4.5,gles1=1.0,gles2=3.2,glsc2=2.0" --generator="c" --spec="gl" --extensions="GL_3DFX_multisample,GL_3DFX_tbuffer,GL_3DFX_texture_compression_FXT1,GL_AMD_blend_minmax_factor,GL_AMD_compressed_3DC_texture,GL_AMD_compressed_ATC_texture,GL_AMD_conservative_depth,GL_AMD_debug_output,GL_AMD_depth_clamp_separate,GL_AMD_draw_buffers_blend,GL_AMD_framebuffer_multisample_advanced,GL_AMD_framebuffer_sample_positions,GL_AMD_gcn_shader,GL_AMD_gpu_shader_half_float,GL_AMD_gpu_shader_int16,GL_AMD_gpu_shader_int64,GL_AMD_interleaved_elements,GL_AMD_multi_draw_indirect,GL_AMD_name_gen_delete,GL_AMD_occlusion_query_event,GL_AMD_performance_monitor,GL_AMD_pinned_memory,GL_AMD_program_binary_Z400,GL_AMD_query_buffer_object,GL_AMD_sample_positions,GL_AMD_seamless_cubemap_per_texture,GL_AMD_shader_atomic_counter_ops,GL_AMD_shader_ballot,GL_AMD_shader_explicit_vertex_parameter,GL_AMD_shader_gpu_shader_half_float_fetch,GL_AMD_shader_image_load_store_lod,GL_AMD_shader_stencil_export,GL_AMD_shader_trinary_minmax,GL_AMD_sparse_texture,GL_AMD_stencil_operation_extended,GL_AMD_texture_gather_bias_lod,GL_AMD_texture_texture4,GL_AMD_transform_feedback3_lines_triangles,GL_AMD_transform_feedback4,GL_AMD_vertex_shader_layer,GL_AMD_vertex_shader_tessellator,GL_AMD_vertex_shader_viewport_index,GL_ANDROID_extension_pack_es31a,GL_ANGLE_depth_texture,GL_ANGLE_framebuffer_blit,GL_ANGLE_framebuffer_multisample,GL_ANGLE_instanced_arrays,GL_ANGLE_pack_reverse_row_order,GL_ANGLE_program_binary,GL_ANGLE_texture_compression_dxt3,GL_ANGLE_texture_compression_dxt5,GL_ANGLE_texture_usage,GL_ANGLE_translated_shader_source,GL_APPLE_aux_depth_stencil,GL_APPLE_client_storage,GL_APPLE_clip_distance,GL_APPLE_color_buffer_packed_float,GL_APPLE_copy_texture_levels,GL_APPLE_element_array,GL_APPLE_fence,GL_APPLE_float_pixels,GL_APPLE_flush_buffer_range,GL_APPLE_framebuffer_multisample,GL_APPLE_object_purgeable,GL_APPLE_rgb_422,GL_APPLE_row_bytes,GL_APPLE_specular_vector,GL_APPLE_sync,GL_APPLE_texture_2D_limited_npot,GL_APPLE_texture_format_BGRA8888,GL_APPLE_texture_max_level,GL_APPLE_texture_packed_float,GL_APPLE_texture_range,GL_APPLE_transform_hint,GL_APPLE_vertex_array_object,GL_APPLE_vertex_array_range,GL_APPLE_vertex_program_evaluators,GL_APPLE_ycbcr_422,GL_ARB_ES2_compatibility,GL_ARB_ES3_1_compatibility,GL_ARB_ES3_2_compatibility,GL_ARB_ES3_compatibility,GL_ARB_arrays_of_arrays,GL_ARB_base_instance,GL_ARB_bindless_texture,GL_ARB_blend_func_extended,GL_ARB_buffer_storage,GL_ARB_cl_event,GL_ARB_clear_buffer_object,GL_ARB_clear_texture,GL_ARB_clip_control,GL_ARB_color_buffer_float,GL_ARB_compatibility,GL_ARB_compressed_texture_pixel_storage,GL_ARB_compute_shader,GL_ARB_compute_variable_group_size,GL_ARB_conditional_render_inverted,GL_ARB_conservative_depth,GL_ARB_copy_buffer,GL_ARB_copy_image,GL_ARB_cull_distance,GL_ARB_debug_output,GL_ARB_depth_buffer_float,GL_ARB_depth_clamp,GL_ARB_depth_texture,GL_ARB_derivative_control,GL_ARB_direct_state_access,GL_ARB_draw_buffers,GL_ARB_draw_buffers_blend,GL_ARB_draw_elements_base_vertex,GL_ARB_draw_indirect,GL_ARB_draw_instanced,GL_ARB_enhanced_layouts,GL_ARB_explicit_attrib_location,GL_ARB_explicit_uniform_location,GL_ARB_fragment_coord_conventions,GL_ARB_fragment_layer_viewport,GL_ARB_fragment_program,GL_ARB_fragment_program_shadow,GL_ARB_fragment_shader,GL_ARB_fragment_shader_interlock,GL_ARB_framebuffer_no_attachments,GL_ARB_framebuffer_object,GL_ARB_framebuffer_sRGB,GL_ARB_geometry_shader4,GL_ARB_get_program_binary,GL_ARB_get_texture_sub_image,GL_ARB_gl_spirv,GL_ARB_gpu_shader5,GL_ARB_gpu_shader_fp64,GL_ARB_gpu_shader_int64,GL_ARB_half_float_pixel,GL_ARB_half_float_vertex,GL_ARB_imaging,GL_ARB_indirect_parameters,GL_ARB_instanced_arrays,GL_ARB_internalformat_query,GL_ARB_internalformat_query2,GL_ARB_invalidate_subdata,GL_ARB_map_buffer_alignment,GL_ARB_map_buffer_range,GL_ARB_matrix_palette,GL_ARB_multi_bind,GL_ARB_multi_draw_indirect,GL_ARB_multisample,GL_ARB_multitexture,GL_ARB_occlusion_query,GL_ARB_occlusion_query2,GL_ARB_parallel_shader_compile,GL_ARB_pipeline_statistics_query,GL_ARB_pixel_buffer_object,GL_ARB_point_parameters,GL_ARB_point_sprite,GL_ARB_polygon_offset_clamp,GL_ARB_post_depth_coverage,GL_ARB_program_interface_query,GL_ARB_provoking_vertex,GL_ARB_query_buffer_object,GL_ARB_robust_buffer_access_behavior,GL_ARB_robustness,GL_ARB_robustness_isolation,GL_ARB_sample_locations,GL_ARB_sample_shading,GL_ARB_sampler_objects,GL_ARB_seamless_cube_map,GL_ARB_seamless_cubemap_per_texture,GL_ARB_separate_shader_objects,GL_ARB_shader_atomic_counter_ops,GL_ARB_shader_atomic_counters,GL_ARB_shader_ballot,GL_ARB_shader_bit_encoding,GL_ARB_shader_clock,GL_ARB_shader_draw_parameters,GL_ARB_shader_group_vote,GL_ARB_shader_image_load_store,GL_ARB_shader_image_size,GL_ARB_shader_objects,GL_ARB_shader_precision,GL_ARB_shader_stencil_export,GL_ARB_shader_storage_buffer_object,GL_ARB_shader_subroutine,GL_ARB_shader_texture_image_samples,GL_ARB_shader_texture_lod,GL_ARB_shader_viewport_layer_array,GL_ARB_shading_language_100,GL_ARB_shading_language_420pack,GL_ARB_shading_language_include,GL_ARB_shading_language_packing,GL_ARB_shadow,GL_ARB_shadow_ambient,GL_ARB_sparse_buffer,GL_ARB_sparse_texture,GL_ARB_sparse_texture2,GL_ARB_sparse_texture_clamp,GL_ARB_spirv_extensions,GL_ARB_stencil_texturing,GL_ARB_sync,GL_ARB_tessellation_shader,GL_ARB_texture_barrier,GL_ARB_texture_border_clamp,GL_ARB_texture_buffer_object,GL_ARB_texture_buffer_object_rgb32,GL_ARB_texture_buffer_range,GL_ARB_texture_compression,GL_ARB_texture_compression_bptc,GL_ARB_texture_compression_rgtc,GL_ARB_texture_cube_map,GL_ARB_texture_cube_map_array,GL_ARB_texture_env_add,GL_ARB_texture_env_combine,GL_ARB_texture_env_crossbar,GL_ARB_texture_env_dot3,GL_ARB_texture_filter_anisotropic,GL_ARB_texture_filter_minmax,GL_ARB_texture_float,GL_ARB_texture_gather,GL_ARB_texture_mirror_clamp_to_edge,GL_ARB_texture_mirrored_repeat,GL_ARB_texture_multisample,GL_ARB_texture_non_power_of_two,GL_ARB_texture_query_levels,GL_ARB_texture_query_lod,GL_ARB_texture_rectangle,GL_ARB_texture_rg,GL_ARB_texture_rgb10_a2ui,GL_ARB_texture_stencil8,GL_ARB_texture_storage,GL_ARB_texture_storage_multisample,GL_ARB_texture_swizzle,GL_ARB_texture_view,GL_ARB_timer_query,GL_ARB_transform_feedback2,GL_ARB_transform_feedback3,GL_ARB_transform_feedback_instanced,GL_ARB_transform_feedback_overflow_query,GL_ARB_transpose_matrix,GL_ARB_uniform_buffer_object,GL_ARB_vertex_array_bgra,GL_ARB_vertex_array_object,GL_ARB_vertex_attrib_64bit,GL_ARB_vertex_attrib_binding,GL_ARB_vertex_blend,GL_ARB_vertex_buffer_object,GL_ARB_vertex_program,GL_ARB_vertex_shader,GL_ARB_vertex_type_10f_11f_11f_rev,GL_ARB_vertex_type_2_10_10_10_rev,GL_ARB_viewport_array,GL_ARB_window_pos,GL_ARM_mali_program_binary,GL_ARM_mali_shader_binary,GL_ARM_rgba8,GL_ARM_shader_core_properties,GL_ARM_shader_framebuffer_fetch,GL_ARM_shader_framebuffer_fetch_depth_stencil,GL_ARM_texture_unnormalized_coordinates,GL_ATI_draw_buffers,GL_ATI_element_array,GL_ATI_envmap_bumpmap,GL_ATI_fragment_shader,GL_ATI_map_object_buffer,GL_ATI_meminfo,GL_ATI_pixel_format_float,GL_ATI_pn_triangles,GL_ATI_separate_stencil,GL_ATI_text_fragment_shader,GL_ATI_texture_env_combine3,GL_ATI_texture_float,GL_ATI_texture_mirror_once,GL_ATI_vertex_array_object,GL_ATI_vertex_attrib_array_object,GL_ATI_vertex_streams,GL_DMP_program_binary,GL_DMP_shader_binary,GL_EXT_422_pixels,GL_EXT_EGL_image_array,GL_EXT_EGL_image_storage,GL_EXT_EGL_image_storage_compression,GL_EXT_EGL_sync,GL_EXT_YUV_target,GL_EXT_abgr,GL_EXT_base_instance,GL_EXT_bgra,GL_EXT_bindable_uniform,GL_EXT_blend_color,GL_EXT_blend_equation_separate,GL_EXT_blend_func_extended,GL_EXT_blend_func_separate,GL_EXT_blend_logic_op,GL_EXT_blend_minmax,GL_EXT_blend_subtract,GL_EXT_buffer_storage,GL_EXT_clear_texture,GL_EXT_clip_control,GL_EXT_clip_cull_distance,GL_EXT_clip_volume_hint,GL_EXT_cmyka,GL_EXT_color_buffer_float,GL_EXT_color_buffer_half_float,GL_EXT_color_subtable,GL_EXT_compiled_vertex_array,GL_EXT_conservative_depth,GL_EXT_convolution,GL_EXT_coordinate_frame,GL_EXT_copy_image,GL_EXT_copy_texture,GL_EXT_cull_vertex,GL_EXT_debug_label,GL_EXT_debug_marker,GL_EXT_depth_bounds_test,GL_EXT_depth_clamp,GL_EXT_direct_state_access,GL_EXT_discard_framebuffer,GL_EXT_disjoint_timer_query,GL_EXT_draw_buffers,GL_EXT_draw_buffers2,GL_EXT_draw_buffers_indexed,GL_EXT_draw_elements_base_vertex,GL_EXT_draw_instanced,GL_EXT_draw_range_elements,GL_EXT_draw_transform_feedback,GL_EXT_external_buffer,GL_EXT_float_blend,GL_EXT_fog_coord,GL_EXT_fragment_shading_rate,GL_EXT_framebuffer_blit,GL_EXT_framebuffer_blit_layers,GL_EXT_framebuffer_multisample,GL_EXT_framebuffer_multisample_blit_scaled,GL_EXT_framebuffer_object,GL_EXT_framebuffer_sRGB,GL_EXT_geometry_point_size,GL_EXT_geometry_shader,GL_EXT_geometry_shader4,GL_EXT_gpu_program_parameters,GL_EXT_gpu_shader4,GL_EXT_gpu_shader5,GL_EXT_histogram,GL_EXT_index_array_formats,GL_EXT_index_func,GL_EXT_index_material,GL_EXT_index_texture,GL_EXT_instanced_arrays,GL_EXT_light_texture,GL_EXT_map_buffer_range,GL_EXT_memory_object,GL_EXT_memory_object_fd,GL_EXT_memory_object_win32,GL_EXT_mesh_shader,GL_EXT_misc_attribute,GL_EXT_multi_draw_arrays,GL_EXT_multi_draw_indirect,GL_EXT_multisample,GL_EXT_multisampled_compatibility,GL_EXT_multisampled_render_to_texture,GL_EXT_multisampled_render_to_texture2,GL_EXT_multiview_draw_buffers,GL_EXT_multiview_tessellation_geometry_shader,GL_EXT_multiview_texture_multisample,GL_EXT_multiview_timer_query,GL_EXT_occlusion_query_boolean,GL_EXT_packed_depth_stencil,GL_EXT_packed_float,GL_EXT_packed_pixels,GL_EXT_paletted_texture,GL_EXT_pixel_buffer_object,GL_EXT_pixel_transform,GL_EXT_pixel_transform_color_table,GL_EXT_point_parameters,GL_EXT_polygon_offset,GL_EXT_polygon_offset_clamp,GL_EXT_post_depth_coverage,GL_EXT_primitive_bounding_box,GL_EXT_protected_textures,GL_EXT_provoking_vertex,GL_EXT_pvrtc_sRGB,GL_EXT_raster_multisample,GL_EXT_read_format_bgra,GL_EXT_render_snorm,GL_EXT_rescale_normal,GL_EXT_robustness,GL_EXT_sRGB,GL_EXT_sRGB_write_control,GL_EXT_secondary_color,GL_EXT_semaphore,GL_EXT_semaphore_fd,GL_EXT_semaphore_win32,GL_EXT_separate_depth_stencil,GL_EXT_separate_shader_objects,GL_EXT_separate_specular_color,GL_EXT_shader_framebuffer_fetch,GL_EXT_shader_framebuffer_fetch_non_coherent,GL_EXT_shader_group_vote,GL_EXT_shader_image_load_formatted,GL_EXT_shader_image_load_store,GL_EXT_shader_implicit_conversions,GL_EXT_shader_integer_mix,GL_EXT_shader_io_blocks,GL_EXT_shader_non_constant_global_initializers,GL_EXT_shader_pixel_local_storage,GL_EXT_shader_pixel_local_storage2,GL_EXT_shader_samples_identical,GL_EXT_shader_texture_lod,GL_EXT_shader_texture_samples,GL_EXT_shadow_funcs,GL_EXT_shadow_samplers,GL_EXT_shared_texture_palette,GL_EXT_sparse_texture,GL_EXT_sparse_texture2,GL_EXT_stencil_clear_tag,GL_EXT_stencil_two_side,GL_EXT_stencil_wrap,GL_EXT_subtexture,GL_EXT_tessellation_point_size,GL_EXT_tessellation_shader,GL_EXT_texture,GL_EXT_texture3D,GL_EXT_texture_array,GL_EXT_texture_border_clamp,GL_EXT_texture_buffer,GL_EXT_texture_buffer_object,GL_EXT_texture_compression_astc_decode_mode,GL_EXT_texture_compression_bptc,GL_EXT_texture_compression_dxt1,GL_EXT_texture_compression_latc,GL_EXT_texture_compression_rgtc,GL_EXT_texture_compression_s3tc,GL_EXT_texture_compression_s3tc_srgb,GL_EXT_texture_cube_map,GL_EXT_texture_cube_map_array,GL_EXT_texture_env_add,GL_EXT_texture_env_combine,GL_EXT_texture_env_dot3,GL_EXT_texture_filter_anisotropic,GL_EXT_texture_filter_minmax,GL_EXT_texture_format_BGRA8888,GL_EXT_texture_format_sRGB_override,GL_EXT_texture_integer,GL_EXT_texture_lod_bias,GL_EXT_texture_mirror_clamp,GL_EXT_texture_mirror_clamp_to_edge,GL_EXT_texture_norm16,GL_EXT_texture_object,GL_EXT_texture_perturb_normal,GL_EXT_texture_query_lod,GL_EXT_texture_rg,GL_EXT_texture_sRGB,GL_EXT_texture_sRGB_R8,GL_EXT_texture_sRGB_RG8,GL_EXT_texture_sRGB_decode,GL_EXT_texture_shadow_lod,GL_EXT_texture_shared_exponent,GL_EXT_texture_snorm,GL_EXT_texture_storage,GL_EXT_texture_storage_compression,GL_EXT_texture_swizzle,GL_EXT_texture_type_2_10_10_10_REV,GL_EXT_texture_view,GL_EXT_timer_query,GL_EXT_transform_feedback,GL_EXT_unpack_subimage,GL_EXT_vertex_array,GL_EXT_vertex_array_bgra,GL_EXT_vertex_attrib_64bit,GL_EXT_vertex_shader,GL_EXT_vertex_weighting,GL_EXT_win32_keyed_mutex,GL_EXT_window_rectangles,GL_EXT_x11_sync_object,GL_FJ_shader_binary_GCCSO,GL_GREMEDY_frame_terminator,GL_GREMEDY_string_marker,GL_HP_convolution_border_modes,GL_HP_image_transform,GL_HP_occlusion_test,GL_HP_texture_lighting,GL_HUAWEI_program_binary,GL_HUAWEI_shader_binary,GL_IBM_cull_vertex,GL_IBM_multimode_draw_arrays,GL_IBM_rasterpos_clip,GL_IBM_static_data,GL_IBM_texture_mirrored_repeat,GL_IBM_vertex_array_lists,GL_IMG_bindless_texture,GL_IMG_framebuffer_downsample,GL_IMG_multisampled_render_to_texture,GL_IMG_program_binary,GL_IMG_pvric_end_to_end_signature,GL_IMG_read_format,GL_IMG_shader_binary,GL_IMG_texture_compression_pvrtc,GL_IMG_texture_compression_pvrtc2,GL_IMG_texture_env_enhanced_fixed_function,GL_IMG_texture_filter_cubic,GL_IMG_tile_region_protection,GL_IMG_user_clip_plane,GL_INGR_blend_func_separate,GL_INGR_color_clamp,GL_INGR_interlace_read,GL_INTEL_blackhole_render,GL_INTEL_conservative_rasterization,GL_INTEL_fragment_shader_ordering,GL_INTEL_framebuffer_CMAA,GL_INTEL_map_texture,GL_INTEL_parallel_arrays,GL_INTEL_performance_query,GL_KHR_blend_equation_advanced,GL_KHR_blend_equation_advanced_coherent,GL_KHR_context_flush_control,GL_KHR_debug,GL_KHR_no_error,GL_KHR_parallel_shader_compile,GL_KHR_robust_buffer_access_behavior,GL_KHR_robustness,GL_KHR_shader_subgroup,GL_KHR_texture_compression_astc_hdr,GL_KHR_texture_compression_astc_ldr,GL_KHR_texture_compression_astc_sliced_3d,GL_MESAX_texture_stack,GL_MESA_bgra,GL_MESA_framebuffer_flip_x,GL_MESA_framebuffer_flip_y,GL_MESA_framebuffer_swap_xy,GL_MESA_pack_invert,GL_MESA_program_binary_formats,GL_MESA_resize_buffers,GL_MESA_sampler_objects,GL_MESA_shader_integer_functions,GL_MESA_texture_const_bandwidth,GL_MESA_tile_raster_order,GL_MESA_window_pos,GL_MESA_ycbcr_texture,GL_NVX_blend_equation_advanced_multi_draw_buffers,GL_NVX_conditional_render,GL_NVX_gpu_memory_info,GL_NVX_gpu_multicast2,GL_NVX_linked_gpu_multicast,GL_NVX_progress_fence,GL_NV_alpha_to_coverage_dither_control,GL_NV_bindless_multi_draw_indirect,GL_NV_bindless_multi_draw_indirect_count,GL_NV_bindless_texture,GL_NV_blend_equation_advanced,GL_NV_blend_equation_advanced_coherent,GL_NV_blend_minmax_factor,GL_NV_blend_square,GL_NV_clip_space_w_scaling,GL_NV_command_list,GL_NV_compute_program5,GL_NV_compute_shader_derivatives,GL_NV_conditional_render,GL_NV_conservative_raster,GL_NV_conservative_raster_dilate,GL_NV_conservative_raster_pre_snap,GL_NV_conservative_raster_pre_snap_triangles,GL_NV_conservative_raster_underestimation,GL_NV_copy_buffer,GL_NV_copy_depth_to_color,GL_NV_copy_image,GL_NV_coverage_sample,GL_NV_deep_texture3D,GL_NV_depth_buffer_float,GL_NV_depth_clamp,GL_NV_depth_nonlinear,GL_NV_draw_buffers,GL_NV_draw_instanced,GL_NV_draw_texture,GL_NV_draw_vulkan_image,GL_NV_evaluators,GL_NV_explicit_attrib_location,GL_NV_explicit_multisample,GL_NV_fbo_color_attachments,GL_NV_fence,GL_NV_fill_rectangle,GL_NV_float_buffer,GL_NV_fog_distance,GL_NV_fragment_coverage_to_color,GL_NV_fragment_program,GL_NV_fragment_program2,GL_NV_fragment_program4,GL_NV_fragment_program_option,GL_NV_fragment_shader_barycentric,GL_NV_fragment_shader_interlock,GL_NV_framebuffer_blit,GL_NV_framebuffer_mixed_samples,GL_NV_framebuffer_multisample,GL_NV_framebuffer_multisample_coverage,GL_NV_generate_mipmap_sRGB,GL_NV_geometry_program4,GL_NV_geometry_shader4,GL_NV_geometry_shader_passthrough,GL_NV_gpu_multicast,GL_NV_gpu_program4,GL_NV_gpu_program5,GL_NV_gpu_program5_mem_extended,GL_NV_gpu_shader5,GL_NV_half_float,GL_NV_image_formats,GL_NV_instanced_arrays,GL_NV_internalformat_sample_query,GL_NV_light_max_exponent,GL_NV_memory_attachment,GL_NV_memory_object_sparse,GL_NV_mesh_shader,GL_NV_multisample_coverage,GL_NV_multisample_filter_hint,GL_NV_non_square_matrices,GL_NV_occlusion_query,GL_NV_pack_subimage,GL_NV_packed_depth_stencil,GL_NV_parameter_buffer_object,GL_NV_parameter_buffer_object2,GL_NV_path_rendering,GL_NV_path_rendering_shared_edge,GL_NV_pixel_buffer_object,GL_NV_pixel_data_range,GL_NV_point_sprite,GL_NV_polygon_mode,GL_NV_present_video,GL_NV_primitive_restart,GL_NV_primitive_shading_rate,GL_NV_query_resource,GL_NV_query_resource_tag,GL_NV_read_buffer,GL_NV_read_buffer_front,GL_NV_read_depth,GL_NV_read_depth_stencil,GL_NV_read_stencil,GL_NV_register_combiners,GL_NV_register_combiners2,GL_NV_representative_fragment_test,GL_NV_robustness_video_memory_purge,GL_NV_sRGB_formats,GL_NV_sample_locations,GL_NV_sample_mask_override_coverage,GL_NV_scissor_exclusive,GL_NV_shader_atomic_counters,GL_NV_shader_atomic_float,GL_NV_shader_atomic_float64,GL_NV_shader_atomic_fp16_vector,GL_NV_shader_atomic_int64,GL_NV_shader_buffer_load,GL_NV_shader_buffer_store,GL_NV_shader_noperspective_interpolation,GL_NV_shader_storage_buffer_object,GL_NV_shader_subgroup_partitioned,GL_NV_shader_texture_footprint,GL_NV_shader_thread_group,GL_NV_shader_thread_shuffle,GL_NV_shading_rate_image,GL_NV_shadow_samplers_array,GL_NV_shadow_samplers_cube,GL_NV_stereo_view_rendering,GL_NV_tessellation_program5,GL_NV_texgen_emboss,GL_NV_texgen_reflection,GL_NV_texture_barrier,GL_NV_texture_border_clamp,GL_NV_texture_compression_s3tc_update,GL_NV_texture_compression_vtc,GL_NV_texture_env_combine4,GL_NV_texture_expand_normal,GL_NV_texture_multisample,GL_NV_texture_npot_2D_mipmap,GL_NV_texture_rectangle,GL_NV_texture_rectangle_compressed,GL_NV_texture_shader,GL_NV_texture_shader2,GL_NV_texture_shader3,GL_NV_timeline_semaphore,GL_NV_transform_feedback,GL_NV_transform_feedback2,GL_NV_uniform_buffer_std430_layout,GL_NV_uniform_buffer_unified_memory,GL_NV_vdpau_interop,GL_NV_vdpau_interop2,GL_NV_vertex_array_range,GL_NV_vertex_array_range2,GL_NV_vertex_attrib_integer_64bit,GL_NV_vertex_buffer_unified_memory,GL_NV_vertex_program,GL_NV_vertex_program1_1,GL_NV_vertex_program2,GL_NV_vertex_program2_option,GL_NV_vertex_program3,GL_NV_vertex_program4,GL_NV_video_capture,GL_NV_viewport_array,GL_NV_viewport_array2,GL_NV_viewport_swizzle,GL_OES_EGL_image,GL_OES_EGL_image_external,GL_OES_EGL_image_external_essl3,GL_OES_blend_equation_separate,GL_OES_blend_func_separate,GL_OES_blend_subtract,GL_OES_byte_coordinates,GL_OES_compressed_ETC1_RGB8_sub_texture,GL_OES_compressed_ETC1_RGB8_texture,GL_OES_compressed_paletted_texture,GL_OES_copy_image,GL_OES_depth24,GL_OES_depth32,GL_OES_depth_texture,GL_OES_draw_buffers_indexed,GL_OES_draw_elements_base_vertex,GL_OES_draw_texture,GL_OES_element_index_uint,GL_OES_extended_matrix_palette,GL_OES_fbo_render_mipmap,GL_OES_fixed_point,GL_OES_fragment_precision_high,GL_OES_framebuffer_object,GL_OES_geometry_point_size,GL_OES_geometry_shader,GL_OES_get_program_binary,GL_OES_gpu_shader5,GL_OES_mapbuffer,GL_OES_matrix_get,GL_OES_matrix_palette,GL_OES_packed_depth_stencil,GL_OES_point_size_array,GL_OES_point_sprite,GL_OES_primitive_bounding_box,GL_OES_query_matrix,GL_OES_read_format,GL_OES_required_internalformat,GL_OES_rgb8_rgba8,GL_OES_sample_shading,GL_OES_sample_variables,GL_OES_shader_image_atomic,GL_OES_shader_io_blocks,GL_OES_shader_multisample_interpolation,GL_OES_single_precision,GL_OES_standard_derivatives,GL_OES_stencil1,GL_OES_stencil4,GL_OES_stencil8,GL_OES_stencil_wrap,GL_OES_surfaceless_context,GL_OES_tessellation_point_size,GL_OES_tessellation_shader,GL_OES_texture_3D,GL_OES_texture_border_clamp,GL_OES_texture_buffer,GL_OES_texture_compression_astc,GL_OES_texture_cube_map,GL_OES_texture_cube_map_array,GL_OES_texture_env_crossbar,GL_OES_texture_float,GL_OES_texture_float_linear,GL_OES_texture_half_float,GL_OES_texture_half_float_linear,GL_OES_texture_mirrored_repeat,GL_OES_texture_npot,GL_OES_texture_stencil8,GL_OES_texture_storage_multisample_2d_array,GL_OES_texture_view,GL_OES_vertex_array_object,GL_OES_vertex_half_float,GL_OES_vertex_type_10_10_10_2,GL_OES_viewport_array,GL_OML_interlace,GL_OML_resample,GL_OML_subsample,GL_OVR_multiview,GL_OVR_multiview2,GL_OVR_multiview_multisampled_render_to_texture,GL_PGI_misc_hints,GL_PGI_vertex_hints,GL_QCOM_YUV_texture_gather,GL_QCOM_alpha_test,GL_QCOM_binning_control,GL_QCOM_driver_control,GL_QCOM_extended_get,GL_QCOM_extended_get2,GL_QCOM_frame_extrapolation,GL_QCOM_framebuffer_foveated,GL_QCOM_motion_estimation,GL_QCOM_perfmon_global_mode,GL_QCOM_render_sRGB_R8_RG8,GL_QCOM_render_shared_exponent,GL_QCOM_shader_framebuffer_fetch_noncoherent,GL_QCOM_shader_framebuffer_fetch_rate,GL_QCOM_shading_rate,GL_QCOM_texture_foveated,GL_QCOM_texture_foveated2,GL_QCOM_texture_foveated_subsampled_layout,GL_QCOM_texture_lod_bias,GL_QCOM_tiled_rendering,GL_QCOM_writeonly_rendering,GL_QCOM_ycbcr_degamma,GL_REND_screen_coordinates,GL_S3_s3tc,GL_SGIS_detail_texture,GL_SGIS_fog_function,GL_SGIS_generate_mipmap,GL_SGIS_multisample,GL_SGIS_pixel_texture,GL_SGIS_point_line_texgen,GL_SGIS_point_parameters,GL_SGIS_sharpen_texture,GL_SGIS_texture4D,GL_SGIS_texture_border_clamp,GL_SGIS_texture_color_mask,GL_SGIS_texture_edge_clamp,GL_SGIS_texture_filter4,GL_SGIS_texture_lod,GL_SGIS_texture_select,GL_SGIX_async,GL_SGIX_async_histogram,GL_SGIX_async_pixel,GL_SGIX_blend_alpha_minmax,GL_SGIX_calligraphic_fragment,GL_SGIX_clipmap,GL_SGIX_convolution_accuracy,GL_SGIX_depth_pass_instrument,GL_SGIX_depth_texture,GL_SGIX_flush_raster,GL_SGIX_fog_offset,GL_SGIX_fragment_lighting,GL_SGIX_framezoom,GL_SGIX_igloo_interface,GL_SGIX_instruments,GL_SGIX_interlace,GL_SGIX_ir_instrument1,GL_SGIX_list_priority,GL_SGIX_pixel_texture,GL_SGIX_pixel_tiles,GL_SGIX_polynomial_ffd,GL_SGIX_reference_plane,GL_SGIX_resample,GL_SGIX_scalebias_hint,GL_SGIX_shadow,GL_SGIX_shadow_ambient,GL_SGIX_sprite,GL_SGIX_subsample,GL_SGIX_tag_sample_buffer,GL_SGIX_texture_add_env,GL_SGIX_texture_coordinate_clamp,GL_SGIX_texture_lod_bias,GL_SGIX_texture_multi_buffer,GL_SGIX_texture_scale_bias,GL_SGIX_vertex_preclip,GL_SGIX_ycrcb,GL_SGIX_ycrcb_subsample,GL_SGIX_ycrcba,GL_SGI_color_matrix,GL_SGI_color_table,GL_SGI_texture_color_table,GL_SUNX_constant_data,GL_SUN_convolution_border_modes,GL_SUN_global_alpha,GL_SUN_mesh_array,GL_SUN_slice_accum,GL_SUN_triangle_list,GL_SUN_vertex,GL_VIV_shader_binary,GL_WIN_phong_shading,GL_WIN_specular_fog" Online: Too many extensions */ @@ -349,6 +884,138 @@ #include #include +static void* get_proc(const char *namez); + +#if defined(_WIN32) || defined(__CYGWIN__) +#ifndef _WINDOWS_ +#undef APIENTRY +#endif +#include +static HMODULE libGL; + +typedef void* (APIENTRYP PFNWGLGETPROCADDRESSPROC_PRIVATE)(const char*); +static PFNWGLGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr; + +#ifdef _MSC_VER +#ifdef __has_include + #if __has_include() + #define HAVE_WINAPIFAMILY 1 + #endif +#elif _MSC_VER >= 1700 && !_USING_V110_SDK71_ + #define HAVE_WINAPIFAMILY 1 +#endif +#endif + +#ifdef HAVE_WINAPIFAMILY + #include + #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define IS_UWP 1 + #endif +#endif + +static +int open_gl(void) { +#ifndef IS_UWP + libGL = LoadLibraryW(L"opengl32.dll"); + if(libGL != NULL) { + void (* tmp)(void); + tmp = (void(*)(void)) GetProcAddress(libGL, "wglGetProcAddress"); + gladGetProcAddressPtr = (PFNWGLGETPROCADDRESSPROC_PRIVATE) tmp; + return gladGetProcAddressPtr != NULL; + } +#endif + + return 0; +} + +static +void close_gl(void) { + if(libGL != NULL) { + FreeLibrary((HMODULE) libGL); + libGL = NULL; + } +} +#else +#include +static void* libGL; + +#if !defined(__APPLE__) && !defined(__HAIKU__) +typedef void* (APIENTRYP PFNGLXGETPROCADDRESSPROC_PRIVATE)(const char*); +static PFNGLXGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr; +#endif + +static +int open_gl(void) { +#ifdef __APPLE__ + static const char *NAMES[] = { + "../Frameworks/OpenGL.framework/OpenGL", + "/Library/Frameworks/OpenGL.framework/OpenGL", + "/System/Library/Frameworks/OpenGL.framework/OpenGL", + "/System/Library/Frameworks/OpenGL.framework/Versions/Current/OpenGL" + }; +#else + static const char *NAMES[] = {"libGL.so.1", "libGL.so"}; +#endif + + unsigned int index = 0; + for(index = 0; index < (sizeof(NAMES) / sizeof(NAMES[0])); index++) { + libGL = dlopen(NAMES[index], RTLD_NOW | RTLD_GLOBAL); + + if(libGL != NULL) { +#if defined(__APPLE__) || defined(__HAIKU__) + return 1; +#else + gladGetProcAddressPtr = (PFNGLXGETPROCADDRESSPROC_PRIVATE)dlsym(libGL, + "glXGetProcAddressARB"); + return gladGetProcAddressPtr != NULL; +#endif + } + } + + return 0; +} + +static +void close_gl(void) { + if(libGL != NULL) { + dlclose(libGL); + libGL = NULL; + } +} +#endif + +static +void* get_proc(const char *namez) { + void* result = NULL; + if(libGL == NULL) return NULL; + +#if !defined(__APPLE__) && !defined(__HAIKU__) + if(gladGetProcAddressPtr != NULL) { + result = gladGetProcAddressPtr(namez); + } +#endif + if(result == NULL) { +#if defined(_WIN32) || defined(__CYGWIN__) + result = (void*)GetProcAddress((HMODULE) libGL, namez); +#else + result = dlsym(libGL, namez); +#endif + } + + return result; +} + +int gladLoadGL(void) { + int status = 0; + + if(open_gl()) { + status = gladLoadGLLoader(&get_proc); + close_gl(); + } + + return status; +} + struct gladGLversionStruct GLVersion = { 0, 0 }; #if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0) @@ -448,28 +1115,66 @@ static int has_ext(const char *ext) { return 0; } +int GLAD_GL_VERSION_1_0 = 0; +int GLAD_GL_VERSION_1_1 = 0; +int GLAD_GL_VERSION_1_2 = 0; +int GLAD_GL_VERSION_1_3 = 0; +int GLAD_GL_VERSION_1_4 = 0; +int GLAD_GL_VERSION_1_5 = 0; +int GLAD_GL_VERSION_2_0 = 0; +int GLAD_GL_VERSION_2_1 = 0; +int GLAD_GL_VERSION_3_0 = 0; +int GLAD_GL_VERSION_3_1 = 0; +int GLAD_GL_VERSION_3_2 = 0; +int GLAD_GL_VERSION_3_3 = 0; +int GLAD_GL_VERSION_4_0 = 0; +int GLAD_GL_VERSION_4_1 = 0; +int GLAD_GL_VERSION_4_2 = 0; +int GLAD_GL_VERSION_4_3 = 0; +int GLAD_GL_VERSION_4_4 = 0; +int GLAD_GL_VERSION_4_5 = 0; +int GLAD_GL_VERSION_ES_CM_1_0 = 0; int GLAD_GL_ES_VERSION_2_0 = 0; int GLAD_GL_ES_VERSION_3_0 = 0; int GLAD_GL_ES_VERSION_3_1 = 0; int GLAD_GL_ES_VERSION_3_2 = 0; +int GLAD_GL_SC_VERSION_2_0 = 0; +PFNGLACCUMPROC glad_glAccum = NULL; PFNGLACTIVESHADERPROGRAMPROC glad_glActiveShaderProgram = NULL; PFNGLACTIVETEXTUREPROC glad_glActiveTexture = NULL; +PFNGLALPHAFUNCPROC glad_glAlphaFunc = NULL; +PFNGLALPHAFUNCXPROC glad_glAlphaFuncx = NULL; +PFNGLARETEXTURESRESIDENTPROC glad_glAreTexturesResident = NULL; +PFNGLARRAYELEMENTPROC glad_glArrayElement = NULL; PFNGLATTACHSHADERPROC glad_glAttachShader = NULL; +PFNGLBEGINPROC glad_glBegin = NULL; +PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender = NULL; PFNGLBEGINQUERYPROC glad_glBeginQuery = NULL; +PFNGLBEGINQUERYINDEXEDPROC glad_glBeginQueryIndexed = NULL; PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback = NULL; PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation = NULL; PFNGLBINDBUFFERPROC glad_glBindBuffer = NULL; PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase = NULL; PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange = NULL; +PFNGLBINDBUFFERSBASEPROC glad_glBindBuffersBase = NULL; +PFNGLBINDBUFFERSRANGEPROC glad_glBindBuffersRange = NULL; +PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation = NULL; +PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed = NULL; PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer = NULL; PFNGLBINDIMAGETEXTUREPROC glad_glBindImageTexture = NULL; +PFNGLBINDIMAGETEXTURESPROC glad_glBindImageTextures = NULL; PFNGLBINDPROGRAMPIPELINEPROC glad_glBindProgramPipeline = NULL; PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer = NULL; PFNGLBINDSAMPLERPROC glad_glBindSampler = NULL; +PFNGLBINDSAMPLERSPROC glad_glBindSamplers = NULL; PFNGLBINDTEXTUREPROC glad_glBindTexture = NULL; +PFNGLBINDTEXTUREUNITPROC glad_glBindTextureUnit = NULL; +PFNGLBINDTEXTURESPROC glad_glBindTextures = NULL; PFNGLBINDTRANSFORMFEEDBACKPROC glad_glBindTransformFeedback = NULL; PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray = NULL; PFNGLBINDVERTEXBUFFERPROC glad_glBindVertexBuffer = NULL; +PFNGLBINDVERTEXBUFFERSPROC glad_glBindVertexBuffers = NULL; +PFNGLBITMAPPROC glad_glBitmap = NULL; PFNGLBLENDBARRIERPROC glad_glBlendBarrier = NULL; PFNGLBLENDCOLORPROC glad_glBlendColor = NULL; PFNGLBLENDEQUATIONPROC glad_glBlendEquation = NULL; @@ -481,39 +1186,126 @@ PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate = NULL; PFNGLBLENDFUNCSEPARATEIPROC glad_glBlendFuncSeparatei = NULL; PFNGLBLENDFUNCIPROC glad_glBlendFunci = NULL; PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer = NULL; +PFNGLBLITNAMEDFRAMEBUFFERPROC glad_glBlitNamedFramebuffer = NULL; PFNGLBUFFERDATAPROC glad_glBufferData = NULL; +PFNGLBUFFERSTORAGEPROC glad_glBufferStorage = NULL; PFNGLBUFFERSUBDATAPROC glad_glBufferSubData = NULL; +PFNGLCALLLISTPROC glad_glCallList = NULL; +PFNGLCALLLISTSPROC glad_glCallLists = NULL; PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus = NULL; +PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC glad_glCheckNamedFramebufferStatus = NULL; +PFNGLCLAMPCOLORPROC glad_glClampColor = NULL; PFNGLCLEARPROC glad_glClear = NULL; +PFNGLCLEARACCUMPROC glad_glClearAccum = NULL; +PFNGLCLEARBUFFERDATAPROC glad_glClearBufferData = NULL; +PFNGLCLEARBUFFERSUBDATAPROC glad_glClearBufferSubData = NULL; PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi = NULL; PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv = NULL; PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv = NULL; PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv = NULL; PFNGLCLEARCOLORPROC glad_glClearColor = NULL; +PFNGLCLEARCOLORXPROC glad_glClearColorx = NULL; +PFNGLCLEARDEPTHPROC glad_glClearDepth = NULL; PFNGLCLEARDEPTHFPROC glad_glClearDepthf = NULL; +PFNGLCLEARDEPTHXPROC glad_glClearDepthx = NULL; +PFNGLCLEARINDEXPROC glad_glClearIndex = NULL; +PFNGLCLEARNAMEDBUFFERDATAPROC glad_glClearNamedBufferData = NULL; +PFNGLCLEARNAMEDBUFFERSUBDATAPROC glad_glClearNamedBufferSubData = NULL; +PFNGLCLEARNAMEDFRAMEBUFFERFIPROC glad_glClearNamedFramebufferfi = NULL; +PFNGLCLEARNAMEDFRAMEBUFFERFVPROC glad_glClearNamedFramebufferfv = NULL; +PFNGLCLEARNAMEDFRAMEBUFFERIVPROC glad_glClearNamedFramebufferiv = NULL; +PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC glad_glClearNamedFramebufferuiv = NULL; PFNGLCLEARSTENCILPROC glad_glClearStencil = NULL; +PFNGLCLEARTEXIMAGEPROC glad_glClearTexImage = NULL; +PFNGLCLEARTEXSUBIMAGEPROC glad_glClearTexSubImage = NULL; +PFNGLCLIENTACTIVETEXTUREPROC glad_glClientActiveTexture = NULL; PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync = NULL; +PFNGLCLIPCONTROLPROC glad_glClipControl = NULL; +PFNGLCLIPPLANEPROC glad_glClipPlane = NULL; +PFNGLCLIPPLANEFPROC glad_glClipPlanef = NULL; +PFNGLCLIPPLANEXPROC glad_glClipPlanex = NULL; +PFNGLCOLOR3BPROC glad_glColor3b = NULL; +PFNGLCOLOR3BVPROC glad_glColor3bv = NULL; +PFNGLCOLOR3DPROC glad_glColor3d = NULL; +PFNGLCOLOR3DVPROC glad_glColor3dv = NULL; +PFNGLCOLOR3FPROC glad_glColor3f = NULL; +PFNGLCOLOR3FVPROC glad_glColor3fv = NULL; +PFNGLCOLOR3IPROC glad_glColor3i = NULL; +PFNGLCOLOR3IVPROC glad_glColor3iv = NULL; +PFNGLCOLOR3SPROC glad_glColor3s = NULL; +PFNGLCOLOR3SVPROC glad_glColor3sv = NULL; +PFNGLCOLOR3UBPROC glad_glColor3ub = NULL; +PFNGLCOLOR3UBVPROC glad_glColor3ubv = NULL; +PFNGLCOLOR3UIPROC glad_glColor3ui = NULL; +PFNGLCOLOR3UIVPROC glad_glColor3uiv = NULL; +PFNGLCOLOR3USPROC glad_glColor3us = NULL; +PFNGLCOLOR3USVPROC glad_glColor3usv = NULL; +PFNGLCOLOR4BPROC glad_glColor4b = NULL; +PFNGLCOLOR4BVPROC glad_glColor4bv = NULL; +PFNGLCOLOR4DPROC glad_glColor4d = NULL; +PFNGLCOLOR4DVPROC glad_glColor4dv = NULL; +PFNGLCOLOR4FPROC glad_glColor4f = NULL; +PFNGLCOLOR4FVPROC glad_glColor4fv = NULL; +PFNGLCOLOR4IPROC glad_glColor4i = NULL; +PFNGLCOLOR4IVPROC glad_glColor4iv = NULL; +PFNGLCOLOR4SPROC glad_glColor4s = NULL; +PFNGLCOLOR4SVPROC glad_glColor4sv = NULL; +PFNGLCOLOR4UBPROC glad_glColor4ub = NULL; +PFNGLCOLOR4UBVPROC glad_glColor4ubv = NULL; +PFNGLCOLOR4UIPROC glad_glColor4ui = NULL; +PFNGLCOLOR4UIVPROC glad_glColor4uiv = NULL; +PFNGLCOLOR4USPROC glad_glColor4us = NULL; +PFNGLCOLOR4USVPROC glad_glColor4usv = NULL; +PFNGLCOLOR4XPROC glad_glColor4x = NULL; PFNGLCOLORMASKPROC glad_glColorMask = NULL; PFNGLCOLORMASKIPROC glad_glColorMaski = NULL; +PFNGLCOLORMATERIALPROC glad_glColorMaterial = NULL; +PFNGLCOLORP3UIPROC glad_glColorP3ui = NULL; +PFNGLCOLORP3UIVPROC glad_glColorP3uiv = NULL; +PFNGLCOLORP4UIPROC glad_glColorP4ui = NULL; +PFNGLCOLORP4UIVPROC glad_glColorP4uiv = NULL; +PFNGLCOLORPOINTERPROC glad_glColorPointer = NULL; PFNGLCOMPILESHADERPROC glad_glCompileShader = NULL; +PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D = NULL; PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D = NULL; PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D = NULL; PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D = NULL; PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC glad_glCompressedTextureSubImage1D = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC glad_glCompressedTextureSubImage2D = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC glad_glCompressedTextureSubImage3D = NULL; PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData = NULL; PFNGLCOPYIMAGESUBDATAPROC glad_glCopyImageSubData = NULL; +PFNGLCOPYNAMEDBUFFERSUBDATAPROC glad_glCopyNamedBufferSubData = NULL; +PFNGLCOPYPIXELSPROC glad_glCopyPixels = NULL; +PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D = NULL; PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D = NULL; +PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D = NULL; PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D = NULL; PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D = NULL; +PFNGLCOPYTEXTURESUBIMAGE1DPROC glad_glCopyTextureSubImage1D = NULL; +PFNGLCOPYTEXTURESUBIMAGE2DPROC glad_glCopyTextureSubImage2D = NULL; +PFNGLCOPYTEXTURESUBIMAGE3DPROC glad_glCopyTextureSubImage3D = NULL; +PFNGLCREATEBUFFERSPROC glad_glCreateBuffers = NULL; +PFNGLCREATEFRAMEBUFFERSPROC glad_glCreateFramebuffers = NULL; PFNGLCREATEPROGRAMPROC glad_glCreateProgram = NULL; +PFNGLCREATEPROGRAMPIPELINESPROC glad_glCreateProgramPipelines = NULL; +PFNGLCREATEQUERIESPROC glad_glCreateQueries = NULL; +PFNGLCREATERENDERBUFFERSPROC glad_glCreateRenderbuffers = NULL; +PFNGLCREATESAMPLERSPROC glad_glCreateSamplers = NULL; PFNGLCREATESHADERPROC glad_glCreateShader = NULL; PFNGLCREATESHADERPROGRAMVPROC glad_glCreateShaderProgramv = NULL; +PFNGLCREATETEXTURESPROC glad_glCreateTextures = NULL; +PFNGLCREATETRANSFORMFEEDBACKSPROC glad_glCreateTransformFeedbacks = NULL; +PFNGLCREATEVERTEXARRAYSPROC glad_glCreateVertexArrays = NULL; PFNGLCULLFACEPROC glad_glCullFace = NULL; PFNGLDEBUGMESSAGECALLBACKPROC glad_glDebugMessageCallback = NULL; PFNGLDEBUGMESSAGECONTROLPROC glad_glDebugMessageControl = NULL; PFNGLDEBUGMESSAGEINSERTPROC glad_glDebugMessageInsert = NULL; PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers = NULL; PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers = NULL; +PFNGLDELETELISTSPROC glad_glDeleteLists = NULL; PFNGLDELETEPROGRAMPROC glad_glDeleteProgram = NULL; PFNGLDELETEPROGRAMPIPELINESPROC glad_glDeleteProgramPipelines = NULL; PFNGLDELETEQUERIESPROC glad_glDeleteQueries = NULL; @@ -526,9 +1318,15 @@ PFNGLDELETETRANSFORMFEEDBACKSPROC glad_glDeleteTransformFeedbacks = NULL; PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays = NULL; PFNGLDEPTHFUNCPROC glad_glDepthFunc = NULL; PFNGLDEPTHMASKPROC glad_glDepthMask = NULL; +PFNGLDEPTHRANGEPROC glad_glDepthRange = NULL; +PFNGLDEPTHRANGEARRAYVPROC glad_glDepthRangeArrayv = NULL; +PFNGLDEPTHRANGEINDEXEDPROC glad_glDepthRangeIndexed = NULL; PFNGLDEPTHRANGEFPROC glad_glDepthRangef = NULL; +PFNGLDEPTHRANGEXPROC glad_glDepthRangex = NULL; PFNGLDETACHSHADERPROC glad_glDetachShader = NULL; PFNGLDISABLEPROC glad_glDisable = NULL; +PFNGLDISABLECLIENTSTATEPROC glad_glDisableClientState = NULL; +PFNGLDISABLEVERTEXARRAYATTRIBPROC glad_glDisableVertexArrayAttrib = NULL; PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray = NULL; PFNGLDISABLEIPROC glad_glDisablei = NULL; PFNGLDISPATCHCOMPUTEPROC glad_glDispatchCompute = NULL; @@ -536,31 +1334,80 @@ PFNGLDISPATCHCOMPUTEINDIRECTPROC glad_glDispatchComputeIndirect = NULL; PFNGLDRAWARRAYSPROC glad_glDrawArrays = NULL; PFNGLDRAWARRAYSINDIRECTPROC glad_glDrawArraysIndirect = NULL; PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced = NULL; +PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEPROC glad_glDrawArraysInstancedBaseInstance = NULL; +PFNGLDRAWBUFFERPROC glad_glDrawBuffer = NULL; PFNGLDRAWBUFFERSPROC glad_glDrawBuffers = NULL; PFNGLDRAWELEMENTSPROC glad_glDrawElements = NULL; PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex = NULL; PFNGLDRAWELEMENTSINDIRECTPROC glad_glDrawElementsIndirect = NULL; PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced = NULL; +PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEPROC glad_glDrawElementsInstancedBaseInstance = NULL; PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex = NULL; +PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC glad_glDrawElementsInstancedBaseVertexBaseInstance = NULL; +PFNGLDRAWPIXELSPROC glad_glDrawPixels = NULL; PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements = NULL; PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex = NULL; +PFNGLDRAWTRANSFORMFEEDBACKPROC glad_glDrawTransformFeedback = NULL; +PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC glad_glDrawTransformFeedbackInstanced = NULL; +PFNGLDRAWTRANSFORMFEEDBACKSTREAMPROC glad_glDrawTransformFeedbackStream = NULL; +PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC glad_glDrawTransformFeedbackStreamInstanced = NULL; +PFNGLEDGEFLAGPROC glad_glEdgeFlag = NULL; +PFNGLEDGEFLAGPOINTERPROC glad_glEdgeFlagPointer = NULL; +PFNGLEDGEFLAGVPROC glad_glEdgeFlagv = NULL; PFNGLENABLEPROC glad_glEnable = NULL; +PFNGLENABLECLIENTSTATEPROC glad_glEnableClientState = NULL; +PFNGLENABLEVERTEXARRAYATTRIBPROC glad_glEnableVertexArrayAttrib = NULL; PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray = NULL; PFNGLENABLEIPROC glad_glEnablei = NULL; +PFNGLENDPROC glad_glEnd = NULL; +PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender = NULL; +PFNGLENDLISTPROC glad_glEndList = NULL; PFNGLENDQUERYPROC glad_glEndQuery = NULL; +PFNGLENDQUERYINDEXEDPROC glad_glEndQueryIndexed = NULL; PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback = NULL; +PFNGLEVALCOORD1DPROC glad_glEvalCoord1d = NULL; +PFNGLEVALCOORD1DVPROC glad_glEvalCoord1dv = NULL; +PFNGLEVALCOORD1FPROC glad_glEvalCoord1f = NULL; +PFNGLEVALCOORD1FVPROC glad_glEvalCoord1fv = NULL; +PFNGLEVALCOORD2DPROC glad_glEvalCoord2d = NULL; +PFNGLEVALCOORD2DVPROC glad_glEvalCoord2dv = NULL; +PFNGLEVALCOORD2FPROC glad_glEvalCoord2f = NULL; +PFNGLEVALCOORD2FVPROC glad_glEvalCoord2fv = NULL; +PFNGLEVALMESH1PROC glad_glEvalMesh1 = NULL; +PFNGLEVALMESH2PROC glad_glEvalMesh2 = NULL; +PFNGLEVALPOINT1PROC glad_glEvalPoint1 = NULL; +PFNGLEVALPOINT2PROC glad_glEvalPoint2 = NULL; +PFNGLFEEDBACKBUFFERPROC glad_glFeedbackBuffer = NULL; PFNGLFENCESYNCPROC glad_glFenceSync = NULL; PFNGLFINISHPROC glad_glFinish = NULL; PFNGLFLUSHPROC glad_glFlush = NULL; PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange = NULL; +PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC glad_glFlushMappedNamedBufferRange = NULL; +PFNGLFOGCOORDPOINTERPROC glad_glFogCoordPointer = NULL; +PFNGLFOGCOORDDPROC glad_glFogCoordd = NULL; +PFNGLFOGCOORDDVPROC glad_glFogCoorddv = NULL; +PFNGLFOGCOORDFPROC glad_glFogCoordf = NULL; +PFNGLFOGCOORDFVPROC glad_glFogCoordfv = NULL; +PFNGLFOGFPROC glad_glFogf = NULL; +PFNGLFOGFVPROC glad_glFogfv = NULL; +PFNGLFOGIPROC glad_glFogi = NULL; +PFNGLFOGIVPROC glad_glFogiv = NULL; +PFNGLFOGXPROC glad_glFogx = NULL; +PFNGLFOGXVPROC glad_glFogxv = NULL; PFNGLFRAMEBUFFERPARAMETERIPROC glad_glFramebufferParameteri = NULL; PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer = NULL; PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture = NULL; +PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D = NULL; PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D = NULL; +PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D = NULL; PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer = NULL; PFNGLFRONTFACEPROC glad_glFrontFace = NULL; +PFNGLFRUSTUMPROC glad_glFrustum = NULL; +PFNGLFRUSTUMFPROC glad_glFrustumf = NULL; +PFNGLFRUSTUMXPROC glad_glFrustumx = NULL; PFNGLGENBUFFERSPROC glad_glGenBuffers = NULL; PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers = NULL; +PFNGLGENLISTSPROC glad_glGenLists = NULL; PFNGLGENPROGRAMPIPELINESPROC glad_glGenProgramPipelines = NULL; PFNGLGENQUERIESPROC glad_glGenQueries = NULL; PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers = NULL; @@ -569,10 +1416,16 @@ PFNGLGENTEXTURESPROC glad_glGenTextures = NULL; PFNGLGENTRANSFORMFEEDBACKSPROC glad_glGenTransformFeedbacks = NULL; PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays = NULL; PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap = NULL; +PFNGLGENERATETEXTUREMIPMAPPROC glad_glGenerateTextureMipmap = NULL; +PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC glad_glGetActiveAtomicCounterBufferiv = NULL; PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib = NULL; +PFNGLGETACTIVESUBROUTINENAMEPROC glad_glGetActiveSubroutineName = NULL; +PFNGLGETACTIVESUBROUTINEUNIFORMNAMEPROC glad_glGetActiveSubroutineUniformName = NULL; +PFNGLGETACTIVESUBROUTINEUNIFORMIVPROC glad_glGetActiveSubroutineUniformiv = NULL; PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform = NULL; PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName = NULL; PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv = NULL; +PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName = NULL; PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv = NULL; PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders = NULL; PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation = NULL; @@ -581,9 +1434,21 @@ PFNGLGETBOOLEANVPROC glad_glGetBooleanv = NULL; PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v = NULL; PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv = NULL; PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv = NULL; +PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData = NULL; +PFNGLGETCLIPPLANEPROC glad_glGetClipPlane = NULL; +PFNGLGETCLIPPLANEFPROC glad_glGetClipPlanef = NULL; +PFNGLGETCLIPPLANEXPROC glad_glGetClipPlanex = NULL; +PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage = NULL; +PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC glad_glGetCompressedTextureImage = NULL; +PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC glad_glGetCompressedTextureSubImage = NULL; PFNGLGETDEBUGMESSAGELOGPROC glad_glGetDebugMessageLog = NULL; +PFNGLGETDOUBLEI_VPROC glad_glGetDoublei_v = NULL; +PFNGLGETDOUBLEVPROC glad_glGetDoublev = NULL; PFNGLGETERRORPROC glad_glGetError = NULL; +PFNGLGETFIXEDVPROC glad_glGetFixedv = NULL; +PFNGLGETFLOATI_VPROC glad_glGetFloati_v = NULL; PFNGLGETFLOATVPROC glad_glGetFloatv = NULL; +PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex = NULL; PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation = NULL; PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv = NULL; PFNGLGETFRAMEBUFFERPARAMETERIVPROC glad_glGetFramebufferParameteriv = NULL; @@ -592,11 +1457,32 @@ PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v = NULL; PFNGLGETINTEGER64VPROC glad_glGetInteger64v = NULL; PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v = NULL; PFNGLGETINTEGERVPROC glad_glGetIntegerv = NULL; +PFNGLGETINTERNALFORMATI64VPROC glad_glGetInternalformati64v = NULL; PFNGLGETINTERNALFORMATIVPROC glad_glGetInternalformativ = NULL; +PFNGLGETLIGHTFVPROC glad_glGetLightfv = NULL; +PFNGLGETLIGHTIVPROC glad_glGetLightiv = NULL; +PFNGLGETLIGHTXVPROC glad_glGetLightxv = NULL; +PFNGLGETMAPDVPROC glad_glGetMapdv = NULL; +PFNGLGETMAPFVPROC glad_glGetMapfv = NULL; +PFNGLGETMAPIVPROC glad_glGetMapiv = NULL; +PFNGLGETMATERIALFVPROC glad_glGetMaterialfv = NULL; +PFNGLGETMATERIALIVPROC glad_glGetMaterialiv = NULL; +PFNGLGETMATERIALXVPROC glad_glGetMaterialxv = NULL; PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv = NULL; +PFNGLGETNAMEDBUFFERPARAMETERI64VPROC glad_glGetNamedBufferParameteri64v = NULL; +PFNGLGETNAMEDBUFFERPARAMETERIVPROC glad_glGetNamedBufferParameteriv = NULL; +PFNGLGETNAMEDBUFFERPOINTERVPROC glad_glGetNamedBufferPointerv = NULL; +PFNGLGETNAMEDBUFFERSUBDATAPROC glad_glGetNamedBufferSubData = NULL; +PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetNamedFramebufferAttachmentParameteriv = NULL; +PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC glad_glGetNamedFramebufferParameteriv = NULL; +PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC glad_glGetNamedRenderbufferParameteriv = NULL; PFNGLGETOBJECTLABELPROC glad_glGetObjectLabel = NULL; PFNGLGETOBJECTPTRLABELPROC glad_glGetObjectPtrLabel = NULL; +PFNGLGETPIXELMAPFVPROC glad_glGetPixelMapfv = NULL; +PFNGLGETPIXELMAPUIVPROC glad_glGetPixelMapuiv = NULL; +PFNGLGETPIXELMAPUSVPROC glad_glGetPixelMapusv = NULL; PFNGLGETPOINTERVPROC glad_glGetPointerv = NULL; +PFNGLGETPOLYGONSTIPPLEPROC glad_glGetPolygonStipple = NULL; PFNGLGETPROGRAMBINARYPROC glad_glGetProgramBinary = NULL; PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog = NULL; PFNGLGETPROGRAMINTERFACEIVPROC glad_glGetProgramInterfaceiv = NULL; @@ -604,9 +1490,19 @@ PFNGLGETPROGRAMPIPELINEINFOLOGPROC glad_glGetProgramPipelineInfoLog = NULL; PFNGLGETPROGRAMPIPELINEIVPROC glad_glGetProgramPipelineiv = NULL; PFNGLGETPROGRAMRESOURCEINDEXPROC glad_glGetProgramResourceIndex = NULL; PFNGLGETPROGRAMRESOURCELOCATIONPROC glad_glGetProgramResourceLocation = NULL; +PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC glad_glGetProgramResourceLocationIndex = NULL; PFNGLGETPROGRAMRESOURCENAMEPROC glad_glGetProgramResourceName = NULL; PFNGLGETPROGRAMRESOURCEIVPROC glad_glGetProgramResourceiv = NULL; +PFNGLGETPROGRAMSTAGEIVPROC glad_glGetProgramStageiv = NULL; PFNGLGETPROGRAMIVPROC glad_glGetProgramiv = NULL; +PFNGLGETQUERYBUFFEROBJECTI64VPROC glad_glGetQueryBufferObjecti64v = NULL; +PFNGLGETQUERYBUFFEROBJECTIVPROC glad_glGetQueryBufferObjectiv = NULL; +PFNGLGETQUERYBUFFEROBJECTUI64VPROC glad_glGetQueryBufferObjectui64v = NULL; +PFNGLGETQUERYBUFFEROBJECTUIVPROC glad_glGetQueryBufferObjectuiv = NULL; +PFNGLGETQUERYINDEXEDIVPROC glad_glGetQueryIndexediv = NULL; +PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v = NULL; +PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv = NULL; +PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v = NULL; PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv = NULL; PFNGLGETQUERYIVPROC glad_glGetQueryiv = NULL; PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv = NULL; @@ -620,35 +1516,99 @@ PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource = NULL; PFNGLGETSHADERIVPROC glad_glGetShaderiv = NULL; PFNGLGETSTRINGPROC glad_glGetString = NULL; PFNGLGETSTRINGIPROC glad_glGetStringi = NULL; +PFNGLGETSUBROUTINEINDEXPROC glad_glGetSubroutineIndex = NULL; +PFNGLGETSUBROUTINEUNIFORMLOCATIONPROC glad_glGetSubroutineUniformLocation = NULL; PFNGLGETSYNCIVPROC glad_glGetSynciv = NULL; +PFNGLGETTEXENVFVPROC glad_glGetTexEnvfv = NULL; +PFNGLGETTEXENVIVPROC glad_glGetTexEnviv = NULL; +PFNGLGETTEXENVXVPROC glad_glGetTexEnvxv = NULL; +PFNGLGETTEXGENDVPROC glad_glGetTexGendv = NULL; +PFNGLGETTEXGENFVPROC glad_glGetTexGenfv = NULL; +PFNGLGETTEXGENIVPROC glad_glGetTexGeniv = NULL; +PFNGLGETTEXIMAGEPROC glad_glGetTexImage = NULL; PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv = NULL; PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv = NULL; PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv = NULL; PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv = NULL; PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv = NULL; PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv = NULL; +PFNGLGETTEXPARAMETERXVPROC glad_glGetTexParameterxv = NULL; +PFNGLGETTEXTUREIMAGEPROC glad_glGetTextureImage = NULL; +PFNGLGETTEXTURELEVELPARAMETERFVPROC glad_glGetTextureLevelParameterfv = NULL; +PFNGLGETTEXTURELEVELPARAMETERIVPROC glad_glGetTextureLevelParameteriv = NULL; +PFNGLGETTEXTUREPARAMETERIIVPROC glad_glGetTextureParameterIiv = NULL; +PFNGLGETTEXTUREPARAMETERIUIVPROC glad_glGetTextureParameterIuiv = NULL; +PFNGLGETTEXTUREPARAMETERFVPROC glad_glGetTextureParameterfv = NULL; +PFNGLGETTEXTUREPARAMETERIVPROC glad_glGetTextureParameteriv = NULL; +PFNGLGETTEXTURESUBIMAGEPROC glad_glGetTextureSubImage = NULL; PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying = NULL; +PFNGLGETTRANSFORMFEEDBACKI64_VPROC glad_glGetTransformFeedbacki64_v = NULL; +PFNGLGETTRANSFORMFEEDBACKI_VPROC glad_glGetTransformFeedbacki_v = NULL; +PFNGLGETTRANSFORMFEEDBACKIVPROC glad_glGetTransformFeedbackiv = NULL; PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex = NULL; PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices = NULL; PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation = NULL; +PFNGLGETUNIFORMSUBROUTINEUIVPROC glad_glGetUniformSubroutineuiv = NULL; +PFNGLGETUNIFORMDVPROC glad_glGetUniformdv = NULL; PFNGLGETUNIFORMFVPROC glad_glGetUniformfv = NULL; PFNGLGETUNIFORMIVPROC glad_glGetUniformiv = NULL; PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv = NULL; +PFNGLGETVERTEXARRAYINDEXED64IVPROC glad_glGetVertexArrayIndexed64iv = NULL; +PFNGLGETVERTEXARRAYINDEXEDIVPROC glad_glGetVertexArrayIndexediv = NULL; +PFNGLGETVERTEXARRAYIVPROC glad_glGetVertexArrayiv = NULL; PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv = NULL; PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv = NULL; +PFNGLGETVERTEXATTRIBLDVPROC glad_glGetVertexAttribLdv = NULL; PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv = NULL; +PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv = NULL; PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv = NULL; PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv = NULL; +PFNGLGETNCOLORTABLEPROC glad_glGetnColorTable = NULL; +PFNGLGETNCOMPRESSEDTEXIMAGEPROC glad_glGetnCompressedTexImage = NULL; +PFNGLGETNCONVOLUTIONFILTERPROC glad_glGetnConvolutionFilter = NULL; +PFNGLGETNHISTOGRAMPROC glad_glGetnHistogram = NULL; +PFNGLGETNMAPDVPROC glad_glGetnMapdv = NULL; +PFNGLGETNMAPFVPROC glad_glGetnMapfv = NULL; +PFNGLGETNMAPIVPROC glad_glGetnMapiv = NULL; +PFNGLGETNMINMAXPROC glad_glGetnMinmax = NULL; +PFNGLGETNPIXELMAPFVPROC glad_glGetnPixelMapfv = NULL; +PFNGLGETNPIXELMAPUIVPROC glad_glGetnPixelMapuiv = NULL; +PFNGLGETNPIXELMAPUSVPROC glad_glGetnPixelMapusv = NULL; +PFNGLGETNPOLYGONSTIPPLEPROC glad_glGetnPolygonStipple = NULL; +PFNGLGETNSEPARABLEFILTERPROC glad_glGetnSeparableFilter = NULL; +PFNGLGETNTEXIMAGEPROC glad_glGetnTexImage = NULL; +PFNGLGETNUNIFORMDVPROC glad_glGetnUniformdv = NULL; PFNGLGETNUNIFORMFVPROC glad_glGetnUniformfv = NULL; PFNGLGETNUNIFORMIVPROC glad_glGetnUniformiv = NULL; PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv = NULL; PFNGLHINTPROC glad_glHint = NULL; +PFNGLINDEXMASKPROC glad_glIndexMask = NULL; +PFNGLINDEXPOINTERPROC glad_glIndexPointer = NULL; +PFNGLINDEXDPROC glad_glIndexd = NULL; +PFNGLINDEXDVPROC glad_glIndexdv = NULL; +PFNGLINDEXFPROC glad_glIndexf = NULL; +PFNGLINDEXFVPROC glad_glIndexfv = NULL; +PFNGLINDEXIPROC glad_glIndexi = NULL; +PFNGLINDEXIVPROC glad_glIndexiv = NULL; +PFNGLINDEXSPROC glad_glIndexs = NULL; +PFNGLINDEXSVPROC glad_glIndexsv = NULL; +PFNGLINDEXUBPROC glad_glIndexub = NULL; +PFNGLINDEXUBVPROC glad_glIndexubv = NULL; +PFNGLINITNAMESPROC glad_glInitNames = NULL; +PFNGLINTERLEAVEDARRAYSPROC glad_glInterleavedArrays = NULL; +PFNGLINVALIDATEBUFFERDATAPROC glad_glInvalidateBufferData = NULL; +PFNGLINVALIDATEBUFFERSUBDATAPROC glad_glInvalidateBufferSubData = NULL; PFNGLINVALIDATEFRAMEBUFFERPROC glad_glInvalidateFramebuffer = NULL; +PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC glad_glInvalidateNamedFramebufferData = NULL; +PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC glad_glInvalidateNamedFramebufferSubData = NULL; PFNGLINVALIDATESUBFRAMEBUFFERPROC glad_glInvalidateSubFramebuffer = NULL; +PFNGLINVALIDATETEXIMAGEPROC glad_glInvalidateTexImage = NULL; +PFNGLINVALIDATETEXSUBIMAGEPROC glad_glInvalidateTexSubImage = NULL; PFNGLISBUFFERPROC glad_glIsBuffer = NULL; PFNGLISENABLEDPROC glad_glIsEnabled = NULL; PFNGLISENABLEDIPROC glad_glIsEnabledi = NULL; PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer = NULL; +PFNGLISLISTPROC glad_glIsList = NULL; PFNGLISPROGRAMPROC glad_glIsProgram = NULL; PFNGLISPROGRAMPIPELINEPROC glad_glIsProgramPipeline = NULL; PFNGLISQUERYPROC glad_glIsQuery = NULL; @@ -659,64 +1619,272 @@ PFNGLISSYNCPROC glad_glIsSync = NULL; PFNGLISTEXTUREPROC glad_glIsTexture = NULL; PFNGLISTRANSFORMFEEDBACKPROC glad_glIsTransformFeedback = NULL; PFNGLISVERTEXARRAYPROC glad_glIsVertexArray = NULL; +PFNGLLIGHTMODELFPROC glad_glLightModelf = NULL; +PFNGLLIGHTMODELFVPROC glad_glLightModelfv = NULL; +PFNGLLIGHTMODELIPROC glad_glLightModeli = NULL; +PFNGLLIGHTMODELIVPROC glad_glLightModeliv = NULL; +PFNGLLIGHTMODELXPROC glad_glLightModelx = NULL; +PFNGLLIGHTMODELXVPROC glad_glLightModelxv = NULL; +PFNGLLIGHTFPROC glad_glLightf = NULL; +PFNGLLIGHTFVPROC glad_glLightfv = NULL; +PFNGLLIGHTIPROC glad_glLighti = NULL; +PFNGLLIGHTIVPROC glad_glLightiv = NULL; +PFNGLLIGHTXPROC glad_glLightx = NULL; +PFNGLLIGHTXVPROC glad_glLightxv = NULL; +PFNGLLINESTIPPLEPROC glad_glLineStipple = NULL; PFNGLLINEWIDTHPROC glad_glLineWidth = NULL; +PFNGLLINEWIDTHXPROC glad_glLineWidthx = NULL; PFNGLLINKPROGRAMPROC glad_glLinkProgram = NULL; +PFNGLLISTBASEPROC glad_glListBase = NULL; +PFNGLLOADIDENTITYPROC glad_glLoadIdentity = NULL; +PFNGLLOADMATRIXDPROC glad_glLoadMatrixd = NULL; +PFNGLLOADMATRIXFPROC glad_glLoadMatrixf = NULL; +PFNGLLOADMATRIXXPROC glad_glLoadMatrixx = NULL; +PFNGLLOADNAMEPROC glad_glLoadName = NULL; +PFNGLLOADTRANSPOSEMATRIXDPROC glad_glLoadTransposeMatrixd = NULL; +PFNGLLOADTRANSPOSEMATRIXFPROC glad_glLoadTransposeMatrixf = NULL; +PFNGLLOGICOPPROC glad_glLogicOp = NULL; +PFNGLMAP1DPROC glad_glMap1d = NULL; +PFNGLMAP1FPROC glad_glMap1f = NULL; +PFNGLMAP2DPROC glad_glMap2d = NULL; +PFNGLMAP2FPROC glad_glMap2f = NULL; +PFNGLMAPBUFFERPROC glad_glMapBuffer = NULL; PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange = NULL; +PFNGLMAPGRID1DPROC glad_glMapGrid1d = NULL; +PFNGLMAPGRID1FPROC glad_glMapGrid1f = NULL; +PFNGLMAPGRID2DPROC glad_glMapGrid2d = NULL; +PFNGLMAPGRID2FPROC glad_glMapGrid2f = NULL; +PFNGLMAPNAMEDBUFFERPROC glad_glMapNamedBuffer = NULL; +PFNGLMAPNAMEDBUFFERRANGEPROC glad_glMapNamedBufferRange = NULL; +PFNGLMATERIALFPROC glad_glMaterialf = NULL; +PFNGLMATERIALFVPROC glad_glMaterialfv = NULL; +PFNGLMATERIALIPROC glad_glMateriali = NULL; +PFNGLMATERIALIVPROC glad_glMaterialiv = NULL; +PFNGLMATERIALXPROC glad_glMaterialx = NULL; +PFNGLMATERIALXVPROC glad_glMaterialxv = NULL; +PFNGLMATRIXMODEPROC glad_glMatrixMode = NULL; PFNGLMEMORYBARRIERPROC glad_glMemoryBarrier = NULL; PFNGLMEMORYBARRIERBYREGIONPROC glad_glMemoryBarrierByRegion = NULL; PFNGLMINSAMPLESHADINGPROC glad_glMinSampleShading = NULL; +PFNGLMULTMATRIXDPROC glad_glMultMatrixd = NULL; +PFNGLMULTMATRIXFPROC glad_glMultMatrixf = NULL; +PFNGLMULTMATRIXXPROC glad_glMultMatrixx = NULL; +PFNGLMULTTRANSPOSEMATRIXDPROC glad_glMultTransposeMatrixd = NULL; +PFNGLMULTTRANSPOSEMATRIXFPROC glad_glMultTransposeMatrixf = NULL; +PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays = NULL; +PFNGLMULTIDRAWARRAYSINDIRECTPROC glad_glMultiDrawArraysIndirect = NULL; +PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements = NULL; +PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex = NULL; +PFNGLMULTIDRAWELEMENTSINDIRECTPROC glad_glMultiDrawElementsIndirect = NULL; +PFNGLMULTITEXCOORD1DPROC glad_glMultiTexCoord1d = NULL; +PFNGLMULTITEXCOORD1DVPROC glad_glMultiTexCoord1dv = NULL; +PFNGLMULTITEXCOORD1FPROC glad_glMultiTexCoord1f = NULL; +PFNGLMULTITEXCOORD1FVPROC glad_glMultiTexCoord1fv = NULL; +PFNGLMULTITEXCOORD1IPROC glad_glMultiTexCoord1i = NULL; +PFNGLMULTITEXCOORD1IVPROC glad_glMultiTexCoord1iv = NULL; +PFNGLMULTITEXCOORD1SPROC glad_glMultiTexCoord1s = NULL; +PFNGLMULTITEXCOORD1SVPROC glad_glMultiTexCoord1sv = NULL; +PFNGLMULTITEXCOORD2DPROC glad_glMultiTexCoord2d = NULL; +PFNGLMULTITEXCOORD2DVPROC glad_glMultiTexCoord2dv = NULL; +PFNGLMULTITEXCOORD2FPROC glad_glMultiTexCoord2f = NULL; +PFNGLMULTITEXCOORD2FVPROC glad_glMultiTexCoord2fv = NULL; +PFNGLMULTITEXCOORD2IPROC glad_glMultiTexCoord2i = NULL; +PFNGLMULTITEXCOORD2IVPROC glad_glMultiTexCoord2iv = NULL; +PFNGLMULTITEXCOORD2SPROC glad_glMultiTexCoord2s = NULL; +PFNGLMULTITEXCOORD2SVPROC glad_glMultiTexCoord2sv = NULL; +PFNGLMULTITEXCOORD3DPROC glad_glMultiTexCoord3d = NULL; +PFNGLMULTITEXCOORD3DVPROC glad_glMultiTexCoord3dv = NULL; +PFNGLMULTITEXCOORD3FPROC glad_glMultiTexCoord3f = NULL; +PFNGLMULTITEXCOORD3FVPROC glad_glMultiTexCoord3fv = NULL; +PFNGLMULTITEXCOORD3IPROC glad_glMultiTexCoord3i = NULL; +PFNGLMULTITEXCOORD3IVPROC glad_glMultiTexCoord3iv = NULL; +PFNGLMULTITEXCOORD3SPROC glad_glMultiTexCoord3s = NULL; +PFNGLMULTITEXCOORD3SVPROC glad_glMultiTexCoord3sv = NULL; +PFNGLMULTITEXCOORD4DPROC glad_glMultiTexCoord4d = NULL; +PFNGLMULTITEXCOORD4DVPROC glad_glMultiTexCoord4dv = NULL; +PFNGLMULTITEXCOORD4FPROC glad_glMultiTexCoord4f = NULL; +PFNGLMULTITEXCOORD4FVPROC glad_glMultiTexCoord4fv = NULL; +PFNGLMULTITEXCOORD4IPROC glad_glMultiTexCoord4i = NULL; +PFNGLMULTITEXCOORD4IVPROC glad_glMultiTexCoord4iv = NULL; +PFNGLMULTITEXCOORD4SPROC glad_glMultiTexCoord4s = NULL; +PFNGLMULTITEXCOORD4SVPROC glad_glMultiTexCoord4sv = NULL; +PFNGLMULTITEXCOORD4XPROC glad_glMultiTexCoord4x = NULL; +PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui = NULL; +PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv = NULL; +PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui = NULL; +PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv = NULL; +PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui = NULL; +PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv = NULL; +PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui = NULL; +PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv = NULL; +PFNGLNAMEDBUFFERDATAPROC glad_glNamedBufferData = NULL; +PFNGLNAMEDBUFFERSTORAGEPROC glad_glNamedBufferStorage = NULL; +PFNGLNAMEDBUFFERSUBDATAPROC glad_glNamedBufferSubData = NULL; +PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC glad_glNamedFramebufferDrawBuffer = NULL; +PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC glad_glNamedFramebufferDrawBuffers = NULL; +PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC glad_glNamedFramebufferParameteri = NULL; +PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC glad_glNamedFramebufferReadBuffer = NULL; +PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC glad_glNamedFramebufferRenderbuffer = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTUREPROC glad_glNamedFramebufferTexture = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC glad_glNamedFramebufferTextureLayer = NULL; +PFNGLNAMEDRENDERBUFFERSTORAGEPROC glad_glNamedRenderbufferStorage = NULL; +PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glNamedRenderbufferStorageMultisample = NULL; +PFNGLNEWLISTPROC glad_glNewList = NULL; +PFNGLNORMAL3BPROC glad_glNormal3b = NULL; +PFNGLNORMAL3BVPROC glad_glNormal3bv = NULL; +PFNGLNORMAL3DPROC glad_glNormal3d = NULL; +PFNGLNORMAL3DVPROC glad_glNormal3dv = NULL; +PFNGLNORMAL3FPROC glad_glNormal3f = NULL; +PFNGLNORMAL3FVPROC glad_glNormal3fv = NULL; +PFNGLNORMAL3IPROC glad_glNormal3i = NULL; +PFNGLNORMAL3IVPROC glad_glNormal3iv = NULL; +PFNGLNORMAL3SPROC glad_glNormal3s = NULL; +PFNGLNORMAL3SVPROC glad_glNormal3sv = NULL; +PFNGLNORMAL3XPROC glad_glNormal3x = NULL; +PFNGLNORMALP3UIPROC glad_glNormalP3ui = NULL; +PFNGLNORMALP3UIVPROC glad_glNormalP3uiv = NULL; +PFNGLNORMALPOINTERPROC glad_glNormalPointer = NULL; PFNGLOBJECTLABELPROC glad_glObjectLabel = NULL; PFNGLOBJECTPTRLABELPROC glad_glObjectPtrLabel = NULL; +PFNGLORTHOPROC glad_glOrtho = NULL; +PFNGLORTHOFPROC glad_glOrthof = NULL; +PFNGLORTHOXPROC glad_glOrthox = NULL; +PFNGLPASSTHROUGHPROC glad_glPassThrough = NULL; +PFNGLPATCHPARAMETERFVPROC glad_glPatchParameterfv = NULL; PFNGLPATCHPARAMETERIPROC glad_glPatchParameteri = NULL; PFNGLPAUSETRANSFORMFEEDBACKPROC glad_glPauseTransformFeedback = NULL; +PFNGLPIXELMAPFVPROC glad_glPixelMapfv = NULL; +PFNGLPIXELMAPUIVPROC glad_glPixelMapuiv = NULL; +PFNGLPIXELMAPUSVPROC glad_glPixelMapusv = NULL; +PFNGLPIXELSTOREFPROC glad_glPixelStoref = NULL; PFNGLPIXELSTOREIPROC glad_glPixelStorei = NULL; +PFNGLPIXELTRANSFERFPROC glad_glPixelTransferf = NULL; +PFNGLPIXELTRANSFERIPROC glad_glPixelTransferi = NULL; +PFNGLPIXELZOOMPROC glad_glPixelZoom = NULL; +PFNGLPOINTPARAMETERFPROC glad_glPointParameterf = NULL; +PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv = NULL; +PFNGLPOINTPARAMETERIPROC glad_glPointParameteri = NULL; +PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv = NULL; +PFNGLPOINTPARAMETERXPROC glad_glPointParameterx = NULL; +PFNGLPOINTPARAMETERXVPROC glad_glPointParameterxv = NULL; +PFNGLPOINTSIZEPROC glad_glPointSize = NULL; +PFNGLPOINTSIZEXPROC glad_glPointSizex = NULL; +PFNGLPOLYGONMODEPROC glad_glPolygonMode = NULL; PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset = NULL; +PFNGLPOLYGONOFFSETXPROC glad_glPolygonOffsetx = NULL; +PFNGLPOLYGONSTIPPLEPROC glad_glPolygonStipple = NULL; +PFNGLPOPATTRIBPROC glad_glPopAttrib = NULL; +PFNGLPOPCLIENTATTRIBPROC glad_glPopClientAttrib = NULL; PFNGLPOPDEBUGGROUPPROC glad_glPopDebugGroup = NULL; +PFNGLPOPMATRIXPROC glad_glPopMatrix = NULL; +PFNGLPOPNAMEPROC glad_glPopName = NULL; PFNGLPRIMITIVEBOUNDINGBOXPROC glad_glPrimitiveBoundingBox = NULL; +PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex = NULL; +PFNGLPRIORITIZETEXTURESPROC glad_glPrioritizeTextures = NULL; PFNGLPROGRAMBINARYPROC glad_glProgramBinary = NULL; PFNGLPROGRAMPARAMETERIPROC glad_glProgramParameteri = NULL; +PFNGLPROGRAMUNIFORM1DPROC glad_glProgramUniform1d = NULL; +PFNGLPROGRAMUNIFORM1DVPROC glad_glProgramUniform1dv = NULL; PFNGLPROGRAMUNIFORM1FPROC glad_glProgramUniform1f = NULL; PFNGLPROGRAMUNIFORM1FVPROC glad_glProgramUniform1fv = NULL; PFNGLPROGRAMUNIFORM1IPROC glad_glProgramUniform1i = NULL; PFNGLPROGRAMUNIFORM1IVPROC glad_glProgramUniform1iv = NULL; PFNGLPROGRAMUNIFORM1UIPROC glad_glProgramUniform1ui = NULL; PFNGLPROGRAMUNIFORM1UIVPROC glad_glProgramUniform1uiv = NULL; +PFNGLPROGRAMUNIFORM2DPROC glad_glProgramUniform2d = NULL; +PFNGLPROGRAMUNIFORM2DVPROC glad_glProgramUniform2dv = NULL; PFNGLPROGRAMUNIFORM2FPROC glad_glProgramUniform2f = NULL; PFNGLPROGRAMUNIFORM2FVPROC glad_glProgramUniform2fv = NULL; PFNGLPROGRAMUNIFORM2IPROC glad_glProgramUniform2i = NULL; PFNGLPROGRAMUNIFORM2IVPROC glad_glProgramUniform2iv = NULL; PFNGLPROGRAMUNIFORM2UIPROC glad_glProgramUniform2ui = NULL; PFNGLPROGRAMUNIFORM2UIVPROC glad_glProgramUniform2uiv = NULL; +PFNGLPROGRAMUNIFORM3DPROC glad_glProgramUniform3d = NULL; +PFNGLPROGRAMUNIFORM3DVPROC glad_glProgramUniform3dv = NULL; PFNGLPROGRAMUNIFORM3FPROC glad_glProgramUniform3f = NULL; PFNGLPROGRAMUNIFORM3FVPROC glad_glProgramUniform3fv = NULL; PFNGLPROGRAMUNIFORM3IPROC glad_glProgramUniform3i = NULL; PFNGLPROGRAMUNIFORM3IVPROC glad_glProgramUniform3iv = NULL; PFNGLPROGRAMUNIFORM3UIPROC glad_glProgramUniform3ui = NULL; PFNGLPROGRAMUNIFORM3UIVPROC glad_glProgramUniform3uiv = NULL; +PFNGLPROGRAMUNIFORM4DPROC glad_glProgramUniform4d = NULL; +PFNGLPROGRAMUNIFORM4DVPROC glad_glProgramUniform4dv = NULL; PFNGLPROGRAMUNIFORM4FPROC glad_glProgramUniform4f = NULL; PFNGLPROGRAMUNIFORM4FVPROC glad_glProgramUniform4fv = NULL; PFNGLPROGRAMUNIFORM4IPROC glad_glProgramUniform4i = NULL; PFNGLPROGRAMUNIFORM4IVPROC glad_glProgramUniform4iv = NULL; PFNGLPROGRAMUNIFORM4UIPROC glad_glProgramUniform4ui = NULL; PFNGLPROGRAMUNIFORM4UIVPROC glad_glProgramUniform4uiv = NULL; +PFNGLPROGRAMUNIFORMMATRIX2DVPROC glad_glProgramUniformMatrix2dv = NULL; PFNGLPROGRAMUNIFORMMATRIX2FVPROC glad_glProgramUniformMatrix2fv = NULL; +PFNGLPROGRAMUNIFORMMATRIX2X3DVPROC glad_glProgramUniformMatrix2x3dv = NULL; PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC glad_glProgramUniformMatrix2x3fv = NULL; +PFNGLPROGRAMUNIFORMMATRIX2X4DVPROC glad_glProgramUniformMatrix2x4dv = NULL; PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC glad_glProgramUniformMatrix2x4fv = NULL; +PFNGLPROGRAMUNIFORMMATRIX3DVPROC glad_glProgramUniformMatrix3dv = NULL; PFNGLPROGRAMUNIFORMMATRIX3FVPROC glad_glProgramUniformMatrix3fv = NULL; +PFNGLPROGRAMUNIFORMMATRIX3X2DVPROC glad_glProgramUniformMatrix3x2dv = NULL; PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC glad_glProgramUniformMatrix3x2fv = NULL; +PFNGLPROGRAMUNIFORMMATRIX3X4DVPROC glad_glProgramUniformMatrix3x4dv = NULL; PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC glad_glProgramUniformMatrix3x4fv = NULL; +PFNGLPROGRAMUNIFORMMATRIX4DVPROC glad_glProgramUniformMatrix4dv = NULL; PFNGLPROGRAMUNIFORMMATRIX4FVPROC glad_glProgramUniformMatrix4fv = NULL; +PFNGLPROGRAMUNIFORMMATRIX4X2DVPROC glad_glProgramUniformMatrix4x2dv = NULL; PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC glad_glProgramUniformMatrix4x2fv = NULL; +PFNGLPROGRAMUNIFORMMATRIX4X3DVPROC glad_glProgramUniformMatrix4x3dv = NULL; PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC glad_glProgramUniformMatrix4x3fv = NULL; +PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex = NULL; +PFNGLPUSHATTRIBPROC glad_glPushAttrib = NULL; +PFNGLPUSHCLIENTATTRIBPROC glad_glPushClientAttrib = NULL; PFNGLPUSHDEBUGGROUPPROC glad_glPushDebugGroup = NULL; +PFNGLPUSHMATRIXPROC glad_glPushMatrix = NULL; +PFNGLPUSHNAMEPROC glad_glPushName = NULL; +PFNGLQUERYCOUNTERPROC glad_glQueryCounter = NULL; +PFNGLRASTERPOS2DPROC glad_glRasterPos2d = NULL; +PFNGLRASTERPOS2DVPROC glad_glRasterPos2dv = NULL; +PFNGLRASTERPOS2FPROC glad_glRasterPos2f = NULL; +PFNGLRASTERPOS2FVPROC glad_glRasterPos2fv = NULL; +PFNGLRASTERPOS2IPROC glad_glRasterPos2i = NULL; +PFNGLRASTERPOS2IVPROC glad_glRasterPos2iv = NULL; +PFNGLRASTERPOS2SPROC glad_glRasterPos2s = NULL; +PFNGLRASTERPOS2SVPROC glad_glRasterPos2sv = NULL; +PFNGLRASTERPOS3DPROC glad_glRasterPos3d = NULL; +PFNGLRASTERPOS3DVPROC glad_glRasterPos3dv = NULL; +PFNGLRASTERPOS3FPROC glad_glRasterPos3f = NULL; +PFNGLRASTERPOS3FVPROC glad_glRasterPos3fv = NULL; +PFNGLRASTERPOS3IPROC glad_glRasterPos3i = NULL; +PFNGLRASTERPOS3IVPROC glad_glRasterPos3iv = NULL; +PFNGLRASTERPOS3SPROC glad_glRasterPos3s = NULL; +PFNGLRASTERPOS3SVPROC glad_glRasterPos3sv = NULL; +PFNGLRASTERPOS4DPROC glad_glRasterPos4d = NULL; +PFNGLRASTERPOS4DVPROC glad_glRasterPos4dv = NULL; +PFNGLRASTERPOS4FPROC glad_glRasterPos4f = NULL; +PFNGLRASTERPOS4FVPROC glad_glRasterPos4fv = NULL; +PFNGLRASTERPOS4IPROC glad_glRasterPos4i = NULL; +PFNGLRASTERPOS4IVPROC glad_glRasterPos4iv = NULL; +PFNGLRASTERPOS4SPROC glad_glRasterPos4s = NULL; +PFNGLRASTERPOS4SVPROC glad_glRasterPos4sv = NULL; PFNGLREADBUFFERPROC glad_glReadBuffer = NULL; PFNGLREADPIXELSPROC glad_glReadPixels = NULL; PFNGLREADNPIXELSPROC glad_glReadnPixels = NULL; +PFNGLRECTDPROC glad_glRectd = NULL; +PFNGLRECTDVPROC glad_glRectdv = NULL; +PFNGLRECTFPROC glad_glRectf = NULL; +PFNGLRECTFVPROC glad_glRectfv = NULL; +PFNGLRECTIPROC glad_glRecti = NULL; +PFNGLRECTIVPROC glad_glRectiv = NULL; +PFNGLRECTSPROC glad_glRects = NULL; +PFNGLRECTSVPROC glad_glRectsv = NULL; PFNGLRELEASESHADERCOMPILERPROC glad_glReleaseShaderCompiler = NULL; +PFNGLRENDERMODEPROC glad_glRenderMode = NULL; PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage = NULL; PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample = NULL; PFNGLRESUMETRANSFORMFEEDBACKPROC glad_glResumeTransformFeedback = NULL; +PFNGLROTATEDPROC glad_glRotated = NULL; +PFNGLROTATEFPROC glad_glRotatef = NULL; +PFNGLROTATEXPROC glad_glRotatex = NULL; PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage = NULL; +PFNGLSAMPLECOVERAGEXPROC glad_glSampleCoveragex = NULL; PFNGLSAMPLEMASKIPROC glad_glSampleMaski = NULL; PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv = NULL; PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv = NULL; @@ -724,9 +1892,37 @@ PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf = NULL; PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv = NULL; PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri = NULL; PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv = NULL; +PFNGLSCALEDPROC glad_glScaled = NULL; +PFNGLSCALEFPROC glad_glScalef = NULL; +PFNGLSCALEXPROC glad_glScalex = NULL; PFNGLSCISSORPROC glad_glScissor = NULL; +PFNGLSCISSORARRAYVPROC glad_glScissorArrayv = NULL; +PFNGLSCISSORINDEXEDPROC glad_glScissorIndexed = NULL; +PFNGLSCISSORINDEXEDVPROC glad_glScissorIndexedv = NULL; +PFNGLSECONDARYCOLOR3BPROC glad_glSecondaryColor3b = NULL; +PFNGLSECONDARYCOLOR3BVPROC glad_glSecondaryColor3bv = NULL; +PFNGLSECONDARYCOLOR3DPROC glad_glSecondaryColor3d = NULL; +PFNGLSECONDARYCOLOR3DVPROC glad_glSecondaryColor3dv = NULL; +PFNGLSECONDARYCOLOR3FPROC glad_glSecondaryColor3f = NULL; +PFNGLSECONDARYCOLOR3FVPROC glad_glSecondaryColor3fv = NULL; +PFNGLSECONDARYCOLOR3IPROC glad_glSecondaryColor3i = NULL; +PFNGLSECONDARYCOLOR3IVPROC glad_glSecondaryColor3iv = NULL; +PFNGLSECONDARYCOLOR3SPROC glad_glSecondaryColor3s = NULL; +PFNGLSECONDARYCOLOR3SVPROC glad_glSecondaryColor3sv = NULL; +PFNGLSECONDARYCOLOR3UBPROC glad_glSecondaryColor3ub = NULL; +PFNGLSECONDARYCOLOR3UBVPROC glad_glSecondaryColor3ubv = NULL; +PFNGLSECONDARYCOLOR3UIPROC glad_glSecondaryColor3ui = NULL; +PFNGLSECONDARYCOLOR3UIVPROC glad_glSecondaryColor3uiv = NULL; +PFNGLSECONDARYCOLOR3USPROC glad_glSecondaryColor3us = NULL; +PFNGLSECONDARYCOLOR3USVPROC glad_glSecondaryColor3usv = NULL; +PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui = NULL; +PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv = NULL; +PFNGLSECONDARYCOLORPOINTERPROC glad_glSecondaryColorPointer = NULL; +PFNGLSELECTBUFFERPROC glad_glSelectBuffer = NULL; +PFNGLSHADEMODELPROC glad_glShadeModel = NULL; PFNGLSHADERBINARYPROC glad_glShaderBinary = NULL; PFNGLSHADERSOURCEPROC glad_glShaderSource = NULL; +PFNGLSHADERSTORAGEBLOCKBINDINGPROC glad_glShaderStorageBlockBinding = NULL; PFNGLSTENCILFUNCPROC glad_glStencilFunc = NULL; PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate = NULL; PFNGLSTENCILMASKPROC glad_glStencilMask = NULL; @@ -735,39 +1931,130 @@ PFNGLSTENCILOPPROC glad_glStencilOp = NULL; PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate = NULL; PFNGLTEXBUFFERPROC glad_glTexBuffer = NULL; PFNGLTEXBUFFERRANGEPROC glad_glTexBufferRange = NULL; +PFNGLTEXCOORD1DPROC glad_glTexCoord1d = NULL; +PFNGLTEXCOORD1DVPROC glad_glTexCoord1dv = NULL; +PFNGLTEXCOORD1FPROC glad_glTexCoord1f = NULL; +PFNGLTEXCOORD1FVPROC glad_glTexCoord1fv = NULL; +PFNGLTEXCOORD1IPROC glad_glTexCoord1i = NULL; +PFNGLTEXCOORD1IVPROC glad_glTexCoord1iv = NULL; +PFNGLTEXCOORD1SPROC glad_glTexCoord1s = NULL; +PFNGLTEXCOORD1SVPROC glad_glTexCoord1sv = NULL; +PFNGLTEXCOORD2DPROC glad_glTexCoord2d = NULL; +PFNGLTEXCOORD2DVPROC glad_glTexCoord2dv = NULL; +PFNGLTEXCOORD2FPROC glad_glTexCoord2f = NULL; +PFNGLTEXCOORD2FVPROC glad_glTexCoord2fv = NULL; +PFNGLTEXCOORD2IPROC glad_glTexCoord2i = NULL; +PFNGLTEXCOORD2IVPROC glad_glTexCoord2iv = NULL; +PFNGLTEXCOORD2SPROC glad_glTexCoord2s = NULL; +PFNGLTEXCOORD2SVPROC glad_glTexCoord2sv = NULL; +PFNGLTEXCOORD3DPROC glad_glTexCoord3d = NULL; +PFNGLTEXCOORD3DVPROC glad_glTexCoord3dv = NULL; +PFNGLTEXCOORD3FPROC glad_glTexCoord3f = NULL; +PFNGLTEXCOORD3FVPROC glad_glTexCoord3fv = NULL; +PFNGLTEXCOORD3IPROC glad_glTexCoord3i = NULL; +PFNGLTEXCOORD3IVPROC glad_glTexCoord3iv = NULL; +PFNGLTEXCOORD3SPROC glad_glTexCoord3s = NULL; +PFNGLTEXCOORD3SVPROC glad_glTexCoord3sv = NULL; +PFNGLTEXCOORD4DPROC glad_glTexCoord4d = NULL; +PFNGLTEXCOORD4DVPROC glad_glTexCoord4dv = NULL; +PFNGLTEXCOORD4FPROC glad_glTexCoord4f = NULL; +PFNGLTEXCOORD4FVPROC glad_glTexCoord4fv = NULL; +PFNGLTEXCOORD4IPROC glad_glTexCoord4i = NULL; +PFNGLTEXCOORD4IVPROC glad_glTexCoord4iv = NULL; +PFNGLTEXCOORD4SPROC glad_glTexCoord4s = NULL; +PFNGLTEXCOORD4SVPROC glad_glTexCoord4sv = NULL; +PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui = NULL; +PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv = NULL; +PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui = NULL; +PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv = NULL; +PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui = NULL; +PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv = NULL; +PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui = NULL; +PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv = NULL; +PFNGLTEXCOORDPOINTERPROC glad_glTexCoordPointer = NULL; +PFNGLTEXENVFPROC glad_glTexEnvf = NULL; +PFNGLTEXENVFVPROC glad_glTexEnvfv = NULL; +PFNGLTEXENVIPROC glad_glTexEnvi = NULL; +PFNGLTEXENVIVPROC glad_glTexEnviv = NULL; +PFNGLTEXENVXPROC glad_glTexEnvx = NULL; +PFNGLTEXENVXVPROC glad_glTexEnvxv = NULL; +PFNGLTEXGENDPROC glad_glTexGend = NULL; +PFNGLTEXGENDVPROC glad_glTexGendv = NULL; +PFNGLTEXGENFPROC glad_glTexGenf = NULL; +PFNGLTEXGENFVPROC glad_glTexGenfv = NULL; +PFNGLTEXGENIPROC glad_glTexGeni = NULL; +PFNGLTEXGENIVPROC glad_glTexGeniv = NULL; +PFNGLTEXIMAGE1DPROC glad_glTexImage1D = NULL; PFNGLTEXIMAGE2DPROC glad_glTexImage2D = NULL; +PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample = NULL; PFNGLTEXIMAGE3DPROC glad_glTexImage3D = NULL; +PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample = NULL; PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv = NULL; PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv = NULL; PFNGLTEXPARAMETERFPROC glad_glTexParameterf = NULL; PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv = NULL; PFNGLTEXPARAMETERIPROC glad_glTexParameteri = NULL; PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv = NULL; +PFNGLTEXPARAMETERXPROC glad_glTexParameterx = NULL; +PFNGLTEXPARAMETERXVPROC glad_glTexParameterxv = NULL; +PFNGLTEXSTORAGE1DPROC glad_glTexStorage1D = NULL; PFNGLTEXSTORAGE2DPROC glad_glTexStorage2D = NULL; PFNGLTEXSTORAGE2DMULTISAMPLEPROC glad_glTexStorage2DMultisample = NULL; PFNGLTEXSTORAGE3DPROC glad_glTexStorage3D = NULL; PFNGLTEXSTORAGE3DMULTISAMPLEPROC glad_glTexStorage3DMultisample = NULL; +PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D = NULL; PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D = NULL; PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D = NULL; +PFNGLTEXTUREBARRIERPROC glad_glTextureBarrier = NULL; +PFNGLTEXTUREBUFFERPROC glad_glTextureBuffer = NULL; +PFNGLTEXTUREBUFFERRANGEPROC glad_glTextureBufferRange = NULL; +PFNGLTEXTUREPARAMETERIIVPROC glad_glTextureParameterIiv = NULL; +PFNGLTEXTUREPARAMETERIUIVPROC glad_glTextureParameterIuiv = NULL; +PFNGLTEXTUREPARAMETERFPROC glad_glTextureParameterf = NULL; +PFNGLTEXTUREPARAMETERFVPROC glad_glTextureParameterfv = NULL; +PFNGLTEXTUREPARAMETERIPROC glad_glTextureParameteri = NULL; +PFNGLTEXTUREPARAMETERIVPROC glad_glTextureParameteriv = NULL; +PFNGLTEXTURESTORAGE1DPROC glad_glTextureStorage1D = NULL; +PFNGLTEXTURESTORAGE2DPROC glad_glTextureStorage2D = NULL; +PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC glad_glTextureStorage2DMultisample = NULL; +PFNGLTEXTURESTORAGE3DPROC glad_glTextureStorage3D = NULL; +PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC glad_glTextureStorage3DMultisample = NULL; +PFNGLTEXTURESUBIMAGE1DPROC glad_glTextureSubImage1D = NULL; +PFNGLTEXTURESUBIMAGE2DPROC glad_glTextureSubImage2D = NULL; +PFNGLTEXTURESUBIMAGE3DPROC glad_glTextureSubImage3D = NULL; +PFNGLTEXTUREVIEWPROC glad_glTextureView = NULL; +PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC glad_glTransformFeedbackBufferBase = NULL; +PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC glad_glTransformFeedbackBufferRange = NULL; PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings = NULL; +PFNGLTRANSLATEDPROC glad_glTranslated = NULL; +PFNGLTRANSLATEFPROC glad_glTranslatef = NULL; +PFNGLTRANSLATEXPROC glad_glTranslatex = NULL; +PFNGLUNIFORM1DPROC glad_glUniform1d = NULL; +PFNGLUNIFORM1DVPROC glad_glUniform1dv = NULL; PFNGLUNIFORM1FPROC glad_glUniform1f = NULL; PFNGLUNIFORM1FVPROC glad_glUniform1fv = NULL; PFNGLUNIFORM1IPROC glad_glUniform1i = NULL; PFNGLUNIFORM1IVPROC glad_glUniform1iv = NULL; PFNGLUNIFORM1UIPROC glad_glUniform1ui = NULL; PFNGLUNIFORM1UIVPROC glad_glUniform1uiv = NULL; +PFNGLUNIFORM2DPROC glad_glUniform2d = NULL; +PFNGLUNIFORM2DVPROC glad_glUniform2dv = NULL; PFNGLUNIFORM2FPROC glad_glUniform2f = NULL; PFNGLUNIFORM2FVPROC glad_glUniform2fv = NULL; PFNGLUNIFORM2IPROC glad_glUniform2i = NULL; PFNGLUNIFORM2IVPROC glad_glUniform2iv = NULL; PFNGLUNIFORM2UIPROC glad_glUniform2ui = NULL; PFNGLUNIFORM2UIVPROC glad_glUniform2uiv = NULL; +PFNGLUNIFORM3DPROC glad_glUniform3d = NULL; +PFNGLUNIFORM3DVPROC glad_glUniform3dv = NULL; PFNGLUNIFORM3FPROC glad_glUniform3f = NULL; PFNGLUNIFORM3FVPROC glad_glUniform3fv = NULL; PFNGLUNIFORM3IPROC glad_glUniform3i = NULL; PFNGLUNIFORM3IVPROC glad_glUniform3iv = NULL; PFNGLUNIFORM3UIPROC glad_glUniform3ui = NULL; PFNGLUNIFORM3UIVPROC glad_glUniform3uiv = NULL; +PFNGLUNIFORM4DPROC glad_glUniform4d = NULL; +PFNGLUNIFORM4DVPROC glad_glUniform4dv = NULL; PFNGLUNIFORM4FPROC glad_glUniform4f = NULL; PFNGLUNIFORM4FVPROC glad_glUniform4fv = NULL; PFNGLUNIFORM4IPROC glad_glUniform4i = NULL; @@ -775,46 +2062,214 @@ PFNGLUNIFORM4IVPROC glad_glUniform4iv = NULL; PFNGLUNIFORM4UIPROC glad_glUniform4ui = NULL; PFNGLUNIFORM4UIVPROC glad_glUniform4uiv = NULL; PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding = NULL; +PFNGLUNIFORMMATRIX2DVPROC glad_glUniformMatrix2dv = NULL; PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv = NULL; +PFNGLUNIFORMMATRIX2X3DVPROC glad_glUniformMatrix2x3dv = NULL; PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv = NULL; +PFNGLUNIFORMMATRIX2X4DVPROC glad_glUniformMatrix2x4dv = NULL; PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv = NULL; +PFNGLUNIFORMMATRIX3DVPROC glad_glUniformMatrix3dv = NULL; PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv = NULL; +PFNGLUNIFORMMATRIX3X2DVPROC glad_glUniformMatrix3x2dv = NULL; PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv = NULL; +PFNGLUNIFORMMATRIX3X4DVPROC glad_glUniformMatrix3x4dv = NULL; PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv = NULL; +PFNGLUNIFORMMATRIX4DVPROC glad_glUniformMatrix4dv = NULL; PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv = NULL; +PFNGLUNIFORMMATRIX4X2DVPROC glad_glUniformMatrix4x2dv = NULL; PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv = NULL; +PFNGLUNIFORMMATRIX4X3DVPROC glad_glUniformMatrix4x3dv = NULL; PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv = NULL; +PFNGLUNIFORMSUBROUTINESUIVPROC glad_glUniformSubroutinesuiv = NULL; PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer = NULL; +PFNGLUNMAPNAMEDBUFFERPROC glad_glUnmapNamedBuffer = NULL; PFNGLUSEPROGRAMPROC glad_glUseProgram = NULL; PFNGLUSEPROGRAMSTAGESPROC glad_glUseProgramStages = NULL; PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram = NULL; PFNGLVALIDATEPROGRAMPIPELINEPROC glad_glValidateProgramPipeline = NULL; +PFNGLVERTEX2DPROC glad_glVertex2d = NULL; +PFNGLVERTEX2DVPROC glad_glVertex2dv = NULL; +PFNGLVERTEX2FPROC glad_glVertex2f = NULL; +PFNGLVERTEX2FVPROC glad_glVertex2fv = NULL; +PFNGLVERTEX2IPROC glad_glVertex2i = NULL; +PFNGLVERTEX2IVPROC glad_glVertex2iv = NULL; +PFNGLVERTEX2SPROC glad_glVertex2s = NULL; +PFNGLVERTEX2SVPROC glad_glVertex2sv = NULL; +PFNGLVERTEX3DPROC glad_glVertex3d = NULL; +PFNGLVERTEX3DVPROC glad_glVertex3dv = NULL; +PFNGLVERTEX3FPROC glad_glVertex3f = NULL; +PFNGLVERTEX3FVPROC glad_glVertex3fv = NULL; +PFNGLVERTEX3IPROC glad_glVertex3i = NULL; +PFNGLVERTEX3IVPROC glad_glVertex3iv = NULL; +PFNGLVERTEX3SPROC glad_glVertex3s = NULL; +PFNGLVERTEX3SVPROC glad_glVertex3sv = NULL; +PFNGLVERTEX4DPROC glad_glVertex4d = NULL; +PFNGLVERTEX4DVPROC glad_glVertex4dv = NULL; +PFNGLVERTEX4FPROC glad_glVertex4f = NULL; +PFNGLVERTEX4FVPROC glad_glVertex4fv = NULL; +PFNGLVERTEX4IPROC glad_glVertex4i = NULL; +PFNGLVERTEX4IVPROC glad_glVertex4iv = NULL; +PFNGLVERTEX4SPROC glad_glVertex4s = NULL; +PFNGLVERTEX4SVPROC glad_glVertex4sv = NULL; +PFNGLVERTEXARRAYATTRIBBINDINGPROC glad_glVertexArrayAttribBinding = NULL; +PFNGLVERTEXARRAYATTRIBFORMATPROC glad_glVertexArrayAttribFormat = NULL; +PFNGLVERTEXARRAYATTRIBIFORMATPROC glad_glVertexArrayAttribIFormat = NULL; +PFNGLVERTEXARRAYATTRIBLFORMATPROC glad_glVertexArrayAttribLFormat = NULL; +PFNGLVERTEXARRAYBINDINGDIVISORPROC glad_glVertexArrayBindingDivisor = NULL; +PFNGLVERTEXARRAYELEMENTBUFFERPROC glad_glVertexArrayElementBuffer = NULL; +PFNGLVERTEXARRAYVERTEXBUFFERPROC glad_glVertexArrayVertexBuffer = NULL; +PFNGLVERTEXARRAYVERTEXBUFFERSPROC glad_glVertexArrayVertexBuffers = NULL; +PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d = NULL; +PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv = NULL; PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f = NULL; PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv = NULL; +PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s = NULL; +PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv = NULL; +PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d = NULL; +PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv = NULL; PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f = NULL; PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv = NULL; +PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s = NULL; +PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv = NULL; +PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d = NULL; +PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv = NULL; PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f = NULL; PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv = NULL; +PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s = NULL; +PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv = NULL; +PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv = NULL; +PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv = NULL; +PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv = NULL; +PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub = NULL; +PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv = NULL; +PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv = NULL; +PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv = NULL; +PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv = NULL; +PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d = NULL; +PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv = NULL; PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f = NULL; PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv = NULL; +PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv = NULL; +PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s = NULL; +PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv = NULL; +PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv = NULL; +PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv = NULL; +PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv = NULL; PFNGLVERTEXATTRIBBINDINGPROC glad_glVertexAttribBinding = NULL; PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor = NULL; PFNGLVERTEXATTRIBFORMATPROC glad_glVertexAttribFormat = NULL; +PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i = NULL; +PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv = NULL; +PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui = NULL; +PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv = NULL; +PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i = NULL; +PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv = NULL; +PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui = NULL; +PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv = NULL; +PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i = NULL; +PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv = NULL; +PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui = NULL; +PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv = NULL; +PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv = NULL; PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i = NULL; PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv = NULL; +PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv = NULL; +PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv = NULL; PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui = NULL; PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv = NULL; +PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv = NULL; PFNGLVERTEXATTRIBIFORMATPROC glad_glVertexAttribIFormat = NULL; PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer = NULL; +PFNGLVERTEXATTRIBL1DPROC glad_glVertexAttribL1d = NULL; +PFNGLVERTEXATTRIBL1DVPROC glad_glVertexAttribL1dv = NULL; +PFNGLVERTEXATTRIBL2DPROC glad_glVertexAttribL2d = NULL; +PFNGLVERTEXATTRIBL2DVPROC glad_glVertexAttribL2dv = NULL; +PFNGLVERTEXATTRIBL3DPROC glad_glVertexAttribL3d = NULL; +PFNGLVERTEXATTRIBL3DVPROC glad_glVertexAttribL3dv = NULL; +PFNGLVERTEXATTRIBL4DPROC glad_glVertexAttribL4d = NULL; +PFNGLVERTEXATTRIBL4DVPROC glad_glVertexAttribL4dv = NULL; +PFNGLVERTEXATTRIBLFORMATPROC glad_glVertexAttribLFormat = NULL; +PFNGLVERTEXATTRIBLPOINTERPROC glad_glVertexAttribLPointer = NULL; +PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui = NULL; +PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv = NULL; +PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui = NULL; +PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv = NULL; +PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui = NULL; +PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv = NULL; +PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui = NULL; +PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv = NULL; PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer = NULL; PFNGLVERTEXBINDINGDIVISORPROC glad_glVertexBindingDivisor = NULL; +PFNGLVERTEXP2UIPROC glad_glVertexP2ui = NULL; +PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv = NULL; +PFNGLVERTEXP3UIPROC glad_glVertexP3ui = NULL; +PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv = NULL; +PFNGLVERTEXP4UIPROC glad_glVertexP4ui = NULL; +PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv = NULL; +PFNGLVERTEXPOINTERPROC glad_glVertexPointer = NULL; PFNGLVIEWPORTPROC glad_glViewport = NULL; +PFNGLVIEWPORTARRAYVPROC glad_glViewportArrayv = NULL; +PFNGLVIEWPORTINDEXEDFPROC glad_glViewportIndexedf = NULL; +PFNGLVIEWPORTINDEXEDFVPROC glad_glViewportIndexedfv = NULL; PFNGLWAITSYNCPROC glad_glWaitSync = NULL; +PFNGLWINDOWPOS2DPROC glad_glWindowPos2d = NULL; +PFNGLWINDOWPOS2DVPROC glad_glWindowPos2dv = NULL; +PFNGLWINDOWPOS2FPROC glad_glWindowPos2f = NULL; +PFNGLWINDOWPOS2FVPROC glad_glWindowPos2fv = NULL; +PFNGLWINDOWPOS2IPROC glad_glWindowPos2i = NULL; +PFNGLWINDOWPOS2IVPROC glad_glWindowPos2iv = NULL; +PFNGLWINDOWPOS2SPROC glad_glWindowPos2s = NULL; +PFNGLWINDOWPOS2SVPROC glad_glWindowPos2sv = NULL; +PFNGLWINDOWPOS3DPROC glad_glWindowPos3d = NULL; +PFNGLWINDOWPOS3DVPROC glad_glWindowPos3dv = NULL; +PFNGLWINDOWPOS3FPROC glad_glWindowPos3f = NULL; +PFNGLWINDOWPOS3FVPROC glad_glWindowPos3fv = NULL; +PFNGLWINDOWPOS3IPROC glad_glWindowPos3i = NULL; +PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv = NULL; +PFNGLWINDOWPOS3SPROC glad_glWindowPos3s = NULL; +PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv = NULL; +int GLAD_GL_3DFX_multisample = 0; +int GLAD_GL_3DFX_tbuffer = 0; +int GLAD_GL_3DFX_texture_compression_FXT1 = 0; +int GLAD_GL_AMD_blend_minmax_factor = 0; int GLAD_GL_AMD_compressed_3DC_texture = 0; int GLAD_GL_AMD_compressed_ATC_texture = 0; +int GLAD_GL_AMD_conservative_depth = 0; +int GLAD_GL_AMD_debug_output = 0; +int GLAD_GL_AMD_depth_clamp_separate = 0; +int GLAD_GL_AMD_draw_buffers_blend = 0; int GLAD_GL_AMD_framebuffer_multisample_advanced = 0; +int GLAD_GL_AMD_framebuffer_sample_positions = 0; +int GLAD_GL_AMD_gcn_shader = 0; +int GLAD_GL_AMD_gpu_shader_half_float = 0; +int GLAD_GL_AMD_gpu_shader_int16 = 0; +int GLAD_GL_AMD_gpu_shader_int64 = 0; +int GLAD_GL_AMD_interleaved_elements = 0; +int GLAD_GL_AMD_multi_draw_indirect = 0; +int GLAD_GL_AMD_name_gen_delete = 0; +int GLAD_GL_AMD_occlusion_query_event = 0; int GLAD_GL_AMD_performance_monitor = 0; +int GLAD_GL_AMD_pinned_memory = 0; int GLAD_GL_AMD_program_binary_Z400 = 0; +int GLAD_GL_AMD_query_buffer_object = 0; +int GLAD_GL_AMD_sample_positions = 0; +int GLAD_GL_AMD_seamless_cubemap_per_texture = 0; +int GLAD_GL_AMD_shader_atomic_counter_ops = 0; +int GLAD_GL_AMD_shader_ballot = 0; +int GLAD_GL_AMD_shader_explicit_vertex_parameter = 0; +int GLAD_GL_AMD_shader_gpu_shader_half_float_fetch = 0; +int GLAD_GL_AMD_shader_image_load_store_lod = 0; +int GLAD_GL_AMD_shader_stencil_export = 0; +int GLAD_GL_AMD_shader_trinary_minmax = 0; +int GLAD_GL_AMD_sparse_texture = 0; +int GLAD_GL_AMD_stencil_operation_extended = 0; +int GLAD_GL_AMD_texture_gather_bias_lod = 0; +int GLAD_GL_AMD_texture_texture4 = 0; +int GLAD_GL_AMD_transform_feedback3_lines_triangles = 0; +int GLAD_GL_AMD_transform_feedback4 = 0; +int GLAD_GL_AMD_vertex_shader_layer = 0; +int GLAD_GL_AMD_vertex_shader_tessellator = 0; +int GLAD_GL_AMD_vertex_shader_viewport_index = 0; int GLAD_GL_ANDROID_extension_pack_es31a = 0; int GLAD_GL_ANGLE_depth_texture = 0; int GLAD_GL_ANGLE_framebuffer_blit = 0; @@ -826,15 +2281,202 @@ int GLAD_GL_ANGLE_texture_compression_dxt3 = 0; int GLAD_GL_ANGLE_texture_compression_dxt5 = 0; int GLAD_GL_ANGLE_texture_usage = 0; int GLAD_GL_ANGLE_translated_shader_source = 0; +int GLAD_GL_APPLE_aux_depth_stencil = 0; +int GLAD_GL_APPLE_client_storage = 0; int GLAD_GL_APPLE_clip_distance = 0; int GLAD_GL_APPLE_color_buffer_packed_float = 0; int GLAD_GL_APPLE_copy_texture_levels = 0; +int GLAD_GL_APPLE_element_array = 0; +int GLAD_GL_APPLE_fence = 0; +int GLAD_GL_APPLE_float_pixels = 0; +int GLAD_GL_APPLE_flush_buffer_range = 0; int GLAD_GL_APPLE_framebuffer_multisample = 0; +int GLAD_GL_APPLE_object_purgeable = 0; int GLAD_GL_APPLE_rgb_422 = 0; +int GLAD_GL_APPLE_row_bytes = 0; +int GLAD_GL_APPLE_specular_vector = 0; int GLAD_GL_APPLE_sync = 0; +int GLAD_GL_APPLE_texture_2D_limited_npot = 0; int GLAD_GL_APPLE_texture_format_BGRA8888 = 0; int GLAD_GL_APPLE_texture_max_level = 0; int GLAD_GL_APPLE_texture_packed_float = 0; +int GLAD_GL_APPLE_texture_range = 0; +int GLAD_GL_APPLE_transform_hint = 0; +int GLAD_GL_APPLE_vertex_array_object = 0; +int GLAD_GL_APPLE_vertex_array_range = 0; +int GLAD_GL_APPLE_vertex_program_evaluators = 0; +int GLAD_GL_APPLE_ycbcr_422 = 0; +int GLAD_GL_ARB_ES2_compatibility = 0; +int GLAD_GL_ARB_ES3_1_compatibility = 0; +int GLAD_GL_ARB_ES3_2_compatibility = 0; +int GLAD_GL_ARB_ES3_compatibility = 0; +int GLAD_GL_ARB_arrays_of_arrays = 0; +int GLAD_GL_ARB_base_instance = 0; +int GLAD_GL_ARB_bindless_texture = 0; +int GLAD_GL_ARB_blend_func_extended = 0; +int GLAD_GL_ARB_buffer_storage = 0; +int GLAD_GL_ARB_cl_event = 0; +int GLAD_GL_ARB_clear_buffer_object = 0; +int GLAD_GL_ARB_clear_texture = 0; +int GLAD_GL_ARB_clip_control = 0; +int GLAD_GL_ARB_color_buffer_float = 0; +int GLAD_GL_ARB_compatibility = 0; +int GLAD_GL_ARB_compressed_texture_pixel_storage = 0; +int GLAD_GL_ARB_compute_shader = 0; +int GLAD_GL_ARB_compute_variable_group_size = 0; +int GLAD_GL_ARB_conditional_render_inverted = 0; +int GLAD_GL_ARB_conservative_depth = 0; +int GLAD_GL_ARB_copy_buffer = 0; +int GLAD_GL_ARB_copy_image = 0; +int GLAD_GL_ARB_cull_distance = 0; +int GLAD_GL_ARB_debug_output = 0; +int GLAD_GL_ARB_depth_buffer_float = 0; +int GLAD_GL_ARB_depth_clamp = 0; +int GLAD_GL_ARB_depth_texture = 0; +int GLAD_GL_ARB_derivative_control = 0; +int GLAD_GL_ARB_direct_state_access = 0; +int GLAD_GL_ARB_draw_buffers = 0; +int GLAD_GL_ARB_draw_buffers_blend = 0; +int GLAD_GL_ARB_draw_elements_base_vertex = 0; +int GLAD_GL_ARB_draw_indirect = 0; +int GLAD_GL_ARB_draw_instanced = 0; +int GLAD_GL_ARB_enhanced_layouts = 0; +int GLAD_GL_ARB_explicit_attrib_location = 0; +int GLAD_GL_ARB_explicit_uniform_location = 0; +int GLAD_GL_ARB_fragment_coord_conventions = 0; +int GLAD_GL_ARB_fragment_layer_viewport = 0; +int GLAD_GL_ARB_fragment_program = 0; +int GLAD_GL_ARB_fragment_program_shadow = 0; +int GLAD_GL_ARB_fragment_shader = 0; +int GLAD_GL_ARB_fragment_shader_interlock = 0; +int GLAD_GL_ARB_framebuffer_no_attachments = 0; +int GLAD_GL_ARB_framebuffer_object = 0; +int GLAD_GL_ARB_framebuffer_sRGB = 0; +int GLAD_GL_ARB_geometry_shader4 = 0; +int GLAD_GL_ARB_get_program_binary = 0; +int GLAD_GL_ARB_get_texture_sub_image = 0; +int GLAD_GL_ARB_gl_spirv = 0; +int GLAD_GL_ARB_gpu_shader5 = 0; +int GLAD_GL_ARB_gpu_shader_fp64 = 0; +int GLAD_GL_ARB_gpu_shader_int64 = 0; +int GLAD_GL_ARB_half_float_pixel = 0; +int GLAD_GL_ARB_half_float_vertex = 0; +int GLAD_GL_ARB_imaging = 0; +int GLAD_GL_ARB_indirect_parameters = 0; +int GLAD_GL_ARB_instanced_arrays = 0; +int GLAD_GL_ARB_internalformat_query = 0; +int GLAD_GL_ARB_internalformat_query2 = 0; +int GLAD_GL_ARB_invalidate_subdata = 0; +int GLAD_GL_ARB_map_buffer_alignment = 0; +int GLAD_GL_ARB_map_buffer_range = 0; +int GLAD_GL_ARB_matrix_palette = 0; +int GLAD_GL_ARB_multi_bind = 0; +int GLAD_GL_ARB_multi_draw_indirect = 0; +int GLAD_GL_ARB_multisample = 0; +int GLAD_GL_ARB_multitexture = 0; +int GLAD_GL_ARB_occlusion_query = 0; +int GLAD_GL_ARB_occlusion_query2 = 0; +int GLAD_GL_ARB_parallel_shader_compile = 0; +int GLAD_GL_ARB_pipeline_statistics_query = 0; +int GLAD_GL_ARB_pixel_buffer_object = 0; +int GLAD_GL_ARB_point_parameters = 0; +int GLAD_GL_ARB_point_sprite = 0; +int GLAD_GL_ARB_polygon_offset_clamp = 0; +int GLAD_GL_ARB_post_depth_coverage = 0; +int GLAD_GL_ARB_program_interface_query = 0; +int GLAD_GL_ARB_provoking_vertex = 0; +int GLAD_GL_ARB_query_buffer_object = 0; +int GLAD_GL_ARB_robust_buffer_access_behavior = 0; +int GLAD_GL_ARB_robustness = 0; +int GLAD_GL_ARB_robustness_isolation = 0; +int GLAD_GL_ARB_sample_locations = 0; +int GLAD_GL_ARB_sample_shading = 0; +int GLAD_GL_ARB_sampler_objects = 0; +int GLAD_GL_ARB_seamless_cube_map = 0; +int GLAD_GL_ARB_seamless_cubemap_per_texture = 0; +int GLAD_GL_ARB_separate_shader_objects = 0; +int GLAD_GL_ARB_shader_atomic_counter_ops = 0; +int GLAD_GL_ARB_shader_atomic_counters = 0; +int GLAD_GL_ARB_shader_ballot = 0; +int GLAD_GL_ARB_shader_bit_encoding = 0; +int GLAD_GL_ARB_shader_clock = 0; +int GLAD_GL_ARB_shader_draw_parameters = 0; +int GLAD_GL_ARB_shader_group_vote = 0; +int GLAD_GL_ARB_shader_image_load_store = 0; +int GLAD_GL_ARB_shader_image_size = 0; +int GLAD_GL_ARB_shader_objects = 0; +int GLAD_GL_ARB_shader_precision = 0; +int GLAD_GL_ARB_shader_stencil_export = 0; +int GLAD_GL_ARB_shader_storage_buffer_object = 0; +int GLAD_GL_ARB_shader_subroutine = 0; +int GLAD_GL_ARB_shader_texture_image_samples = 0; +int GLAD_GL_ARB_shader_texture_lod = 0; +int GLAD_GL_ARB_shader_viewport_layer_array = 0; +int GLAD_GL_ARB_shading_language_100 = 0; +int GLAD_GL_ARB_shading_language_420pack = 0; +int GLAD_GL_ARB_shading_language_include = 0; +int GLAD_GL_ARB_shading_language_packing = 0; +int GLAD_GL_ARB_shadow = 0; +int GLAD_GL_ARB_shadow_ambient = 0; +int GLAD_GL_ARB_sparse_buffer = 0; +int GLAD_GL_ARB_sparse_texture = 0; +int GLAD_GL_ARB_sparse_texture2 = 0; +int GLAD_GL_ARB_sparse_texture_clamp = 0; +int GLAD_GL_ARB_spirv_extensions = 0; +int GLAD_GL_ARB_stencil_texturing = 0; +int GLAD_GL_ARB_sync = 0; +int GLAD_GL_ARB_tessellation_shader = 0; +int GLAD_GL_ARB_texture_barrier = 0; +int GLAD_GL_ARB_texture_border_clamp = 0; +int GLAD_GL_ARB_texture_buffer_object = 0; +int GLAD_GL_ARB_texture_buffer_object_rgb32 = 0; +int GLAD_GL_ARB_texture_buffer_range = 0; +int GLAD_GL_ARB_texture_compression = 0; +int GLAD_GL_ARB_texture_compression_bptc = 0; +int GLAD_GL_ARB_texture_compression_rgtc = 0; +int GLAD_GL_ARB_texture_cube_map = 0; +int GLAD_GL_ARB_texture_cube_map_array = 0; +int GLAD_GL_ARB_texture_env_add = 0; +int GLAD_GL_ARB_texture_env_combine = 0; +int GLAD_GL_ARB_texture_env_crossbar = 0; +int GLAD_GL_ARB_texture_env_dot3 = 0; +int GLAD_GL_ARB_texture_filter_anisotropic = 0; +int GLAD_GL_ARB_texture_filter_minmax = 0; +int GLAD_GL_ARB_texture_float = 0; +int GLAD_GL_ARB_texture_gather = 0; +int GLAD_GL_ARB_texture_mirror_clamp_to_edge = 0; +int GLAD_GL_ARB_texture_mirrored_repeat = 0; +int GLAD_GL_ARB_texture_multisample = 0; +int GLAD_GL_ARB_texture_non_power_of_two = 0; +int GLAD_GL_ARB_texture_query_levels = 0; +int GLAD_GL_ARB_texture_query_lod = 0; +int GLAD_GL_ARB_texture_rectangle = 0; +int GLAD_GL_ARB_texture_rg = 0; +int GLAD_GL_ARB_texture_rgb10_a2ui = 0; +int GLAD_GL_ARB_texture_stencil8 = 0; +int GLAD_GL_ARB_texture_storage = 0; +int GLAD_GL_ARB_texture_storage_multisample = 0; +int GLAD_GL_ARB_texture_swizzle = 0; +int GLAD_GL_ARB_texture_view = 0; +int GLAD_GL_ARB_timer_query = 0; +int GLAD_GL_ARB_transform_feedback2 = 0; +int GLAD_GL_ARB_transform_feedback3 = 0; +int GLAD_GL_ARB_transform_feedback_instanced = 0; +int GLAD_GL_ARB_transform_feedback_overflow_query = 0; +int GLAD_GL_ARB_transpose_matrix = 0; +int GLAD_GL_ARB_uniform_buffer_object = 0; +int GLAD_GL_ARB_vertex_array_bgra = 0; +int GLAD_GL_ARB_vertex_array_object = 0; +int GLAD_GL_ARB_vertex_attrib_64bit = 0; +int GLAD_GL_ARB_vertex_attrib_binding = 0; +int GLAD_GL_ARB_vertex_blend = 0; +int GLAD_GL_ARB_vertex_buffer_object = 0; +int GLAD_GL_ARB_vertex_program = 0; +int GLAD_GL_ARB_vertex_shader = 0; +int GLAD_GL_ARB_vertex_type_10f_11f_11f_rev = 0; +int GLAD_GL_ARB_vertex_type_2_10_10_10_rev = 0; +int GLAD_GL_ARB_viewport_array = 0; +int GLAD_GL_ARB_window_pos = 0; int GLAD_GL_ARM_mali_program_binary = 0; int GLAD_GL_ARM_mali_shader_binary = 0; int GLAD_GL_ARM_rgba8 = 0; @@ -842,48 +2484,103 @@ int GLAD_GL_ARM_shader_core_properties = 0; int GLAD_GL_ARM_shader_framebuffer_fetch = 0; int GLAD_GL_ARM_shader_framebuffer_fetch_depth_stencil = 0; int GLAD_GL_ARM_texture_unnormalized_coordinates = 0; +int GLAD_GL_ATI_draw_buffers = 0; +int GLAD_GL_ATI_element_array = 0; +int GLAD_GL_ATI_envmap_bumpmap = 0; +int GLAD_GL_ATI_fragment_shader = 0; +int GLAD_GL_ATI_map_object_buffer = 0; +int GLAD_GL_ATI_meminfo = 0; +int GLAD_GL_ATI_pixel_format_float = 0; +int GLAD_GL_ATI_pn_triangles = 0; +int GLAD_GL_ATI_separate_stencil = 0; +int GLAD_GL_ATI_text_fragment_shader = 0; +int GLAD_GL_ATI_texture_env_combine3 = 0; +int GLAD_GL_ATI_texture_float = 0; +int GLAD_GL_ATI_texture_mirror_once = 0; +int GLAD_GL_ATI_vertex_array_object = 0; +int GLAD_GL_ATI_vertex_attrib_array_object = 0; +int GLAD_GL_ATI_vertex_streams = 0; int GLAD_GL_DMP_program_binary = 0; int GLAD_GL_DMP_shader_binary = 0; +int GLAD_GL_EXT_422_pixels = 0; int GLAD_GL_EXT_EGL_image_array = 0; int GLAD_GL_EXT_EGL_image_storage = 0; int GLAD_GL_EXT_EGL_image_storage_compression = 0; +int GLAD_GL_EXT_EGL_sync = 0; int GLAD_GL_EXT_YUV_target = 0; +int GLAD_GL_EXT_abgr = 0; int GLAD_GL_EXT_base_instance = 0; +int GLAD_GL_EXT_bgra = 0; +int GLAD_GL_EXT_bindable_uniform = 0; +int GLAD_GL_EXT_blend_color = 0; +int GLAD_GL_EXT_blend_equation_separate = 0; int GLAD_GL_EXT_blend_func_extended = 0; +int GLAD_GL_EXT_blend_func_separate = 0; +int GLAD_GL_EXT_blend_logic_op = 0; int GLAD_GL_EXT_blend_minmax = 0; +int GLAD_GL_EXT_blend_subtract = 0; int GLAD_GL_EXT_buffer_storage = 0; int GLAD_GL_EXT_clear_texture = 0; int GLAD_GL_EXT_clip_control = 0; int GLAD_GL_EXT_clip_cull_distance = 0; +int GLAD_GL_EXT_clip_volume_hint = 0; +int GLAD_GL_EXT_cmyka = 0; int GLAD_GL_EXT_color_buffer_float = 0; int GLAD_GL_EXT_color_buffer_half_float = 0; +int GLAD_GL_EXT_color_subtable = 0; +int GLAD_GL_EXT_compiled_vertex_array = 0; int GLAD_GL_EXT_conservative_depth = 0; +int GLAD_GL_EXT_convolution = 0; +int GLAD_GL_EXT_coordinate_frame = 0; int GLAD_GL_EXT_copy_image = 0; +int GLAD_GL_EXT_copy_texture = 0; +int GLAD_GL_EXT_cull_vertex = 0; int GLAD_GL_EXT_debug_label = 0; int GLAD_GL_EXT_debug_marker = 0; +int GLAD_GL_EXT_depth_bounds_test = 0; int GLAD_GL_EXT_depth_clamp = 0; +int GLAD_GL_EXT_direct_state_access = 0; int GLAD_GL_EXT_discard_framebuffer = 0; int GLAD_GL_EXT_disjoint_timer_query = 0; int GLAD_GL_EXT_draw_buffers = 0; +int GLAD_GL_EXT_draw_buffers2 = 0; int GLAD_GL_EXT_draw_buffers_indexed = 0; int GLAD_GL_EXT_draw_elements_base_vertex = 0; int GLAD_GL_EXT_draw_instanced = 0; +int GLAD_GL_EXT_draw_range_elements = 0; int GLAD_GL_EXT_draw_transform_feedback = 0; int GLAD_GL_EXT_external_buffer = 0; int GLAD_GL_EXT_float_blend = 0; +int GLAD_GL_EXT_fog_coord = 0; int GLAD_GL_EXT_fragment_shading_rate = 0; +int GLAD_GL_EXT_framebuffer_blit = 0; int GLAD_GL_EXT_framebuffer_blit_layers = 0; +int GLAD_GL_EXT_framebuffer_multisample = 0; +int GLAD_GL_EXT_framebuffer_multisample_blit_scaled = 0; +int GLAD_GL_EXT_framebuffer_object = 0; +int GLAD_GL_EXT_framebuffer_sRGB = 0; int GLAD_GL_EXT_geometry_point_size = 0; int GLAD_GL_EXT_geometry_shader = 0; +int GLAD_GL_EXT_geometry_shader4 = 0; +int GLAD_GL_EXT_gpu_program_parameters = 0; +int GLAD_GL_EXT_gpu_shader4 = 0; int GLAD_GL_EXT_gpu_shader5 = 0; +int GLAD_GL_EXT_histogram = 0; +int GLAD_GL_EXT_index_array_formats = 0; +int GLAD_GL_EXT_index_func = 0; +int GLAD_GL_EXT_index_material = 0; +int GLAD_GL_EXT_index_texture = 0; int GLAD_GL_EXT_instanced_arrays = 0; +int GLAD_GL_EXT_light_texture = 0; int GLAD_GL_EXT_map_buffer_range = 0; int GLAD_GL_EXT_memory_object = 0; int GLAD_GL_EXT_memory_object_fd = 0; int GLAD_GL_EXT_memory_object_win32 = 0; int GLAD_GL_EXT_mesh_shader = 0; +int GLAD_GL_EXT_misc_attribute = 0; int GLAD_GL_EXT_multi_draw_arrays = 0; int GLAD_GL_EXT_multi_draw_indirect = 0; +int GLAD_GL_EXT_multisample = 0; int GLAD_GL_EXT_multisampled_compatibility = 0; int GLAD_GL_EXT_multisampled_render_to_texture = 0; int GLAD_GL_EXT_multisampled_render_to_texture2 = 0; @@ -892,25 +2589,40 @@ int GLAD_GL_EXT_multiview_tessellation_geometry_shader = 0; int GLAD_GL_EXT_multiview_texture_multisample = 0; int GLAD_GL_EXT_multiview_timer_query = 0; int GLAD_GL_EXT_occlusion_query_boolean = 0; +int GLAD_GL_EXT_packed_depth_stencil = 0; +int GLAD_GL_EXT_packed_float = 0; +int GLAD_GL_EXT_packed_pixels = 0; +int GLAD_GL_EXT_paletted_texture = 0; +int GLAD_GL_EXT_pixel_buffer_object = 0; +int GLAD_GL_EXT_pixel_transform = 0; +int GLAD_GL_EXT_pixel_transform_color_table = 0; +int GLAD_GL_EXT_point_parameters = 0; +int GLAD_GL_EXT_polygon_offset = 0; int GLAD_GL_EXT_polygon_offset_clamp = 0; int GLAD_GL_EXT_post_depth_coverage = 0; int GLAD_GL_EXT_primitive_bounding_box = 0; int GLAD_GL_EXT_protected_textures = 0; +int GLAD_GL_EXT_provoking_vertex = 0; int GLAD_GL_EXT_pvrtc_sRGB = 0; int GLAD_GL_EXT_raster_multisample = 0; int GLAD_GL_EXT_read_format_bgra = 0; int GLAD_GL_EXT_render_snorm = 0; +int GLAD_GL_EXT_rescale_normal = 0; int GLAD_GL_EXT_robustness = 0; int GLAD_GL_EXT_sRGB = 0; int GLAD_GL_EXT_sRGB_write_control = 0; +int GLAD_GL_EXT_secondary_color = 0; int GLAD_GL_EXT_semaphore = 0; int GLAD_GL_EXT_semaphore_fd = 0; int GLAD_GL_EXT_semaphore_win32 = 0; int GLAD_GL_EXT_separate_depth_stencil = 0; int GLAD_GL_EXT_separate_shader_objects = 0; +int GLAD_GL_EXT_separate_specular_color = 0; int GLAD_GL_EXT_shader_framebuffer_fetch = 0; int GLAD_GL_EXT_shader_framebuffer_fetch_non_coherent = 0; int GLAD_GL_EXT_shader_group_vote = 0; +int GLAD_GL_EXT_shader_image_load_formatted = 0; +int GLAD_GL_EXT_shader_image_load_store = 0; int GLAD_GL_EXT_shader_implicit_conversions = 0; int GLAD_GL_EXT_shader_integer_mix = 0; int GLAD_GL_EXT_shader_io_blocks = 0; @@ -920,54 +2632,108 @@ int GLAD_GL_EXT_shader_pixel_local_storage2 = 0; int GLAD_GL_EXT_shader_samples_identical = 0; int GLAD_GL_EXT_shader_texture_lod = 0; int GLAD_GL_EXT_shader_texture_samples = 0; +int GLAD_GL_EXT_shadow_funcs = 0; int GLAD_GL_EXT_shadow_samplers = 0; +int GLAD_GL_EXT_shared_texture_palette = 0; int GLAD_GL_EXT_sparse_texture = 0; int GLAD_GL_EXT_sparse_texture2 = 0; +int GLAD_GL_EXT_stencil_clear_tag = 0; +int GLAD_GL_EXT_stencil_two_side = 0; +int GLAD_GL_EXT_stencil_wrap = 0; +int GLAD_GL_EXT_subtexture = 0; int GLAD_GL_EXT_tessellation_point_size = 0; int GLAD_GL_EXT_tessellation_shader = 0; +int GLAD_GL_EXT_texture = 0; +int GLAD_GL_EXT_texture3D = 0; +int GLAD_GL_EXT_texture_array = 0; int GLAD_GL_EXT_texture_border_clamp = 0; int GLAD_GL_EXT_texture_buffer = 0; +int GLAD_GL_EXT_texture_buffer_object = 0; int GLAD_GL_EXT_texture_compression_astc_decode_mode = 0; int GLAD_GL_EXT_texture_compression_bptc = 0; int GLAD_GL_EXT_texture_compression_dxt1 = 0; +int GLAD_GL_EXT_texture_compression_latc = 0; int GLAD_GL_EXT_texture_compression_rgtc = 0; int GLAD_GL_EXT_texture_compression_s3tc = 0; int GLAD_GL_EXT_texture_compression_s3tc_srgb = 0; +int GLAD_GL_EXT_texture_cube_map = 0; int GLAD_GL_EXT_texture_cube_map_array = 0; +int GLAD_GL_EXT_texture_env_add = 0; +int GLAD_GL_EXT_texture_env_combine = 0; +int GLAD_GL_EXT_texture_env_dot3 = 0; int GLAD_GL_EXT_texture_filter_anisotropic = 0; int GLAD_GL_EXT_texture_filter_minmax = 0; int GLAD_GL_EXT_texture_format_BGRA8888 = 0; int GLAD_GL_EXT_texture_format_sRGB_override = 0; +int GLAD_GL_EXT_texture_integer = 0; +int GLAD_GL_EXT_texture_lod_bias = 0; +int GLAD_GL_EXT_texture_mirror_clamp = 0; int GLAD_GL_EXT_texture_mirror_clamp_to_edge = 0; int GLAD_GL_EXT_texture_norm16 = 0; +int GLAD_GL_EXT_texture_object = 0; +int GLAD_GL_EXT_texture_perturb_normal = 0; int GLAD_GL_EXT_texture_query_lod = 0; int GLAD_GL_EXT_texture_rg = 0; +int GLAD_GL_EXT_texture_sRGB = 0; int GLAD_GL_EXT_texture_sRGB_R8 = 0; int GLAD_GL_EXT_texture_sRGB_RG8 = 0; int GLAD_GL_EXT_texture_sRGB_decode = 0; int GLAD_GL_EXT_texture_shadow_lod = 0; +int GLAD_GL_EXT_texture_shared_exponent = 0; +int GLAD_GL_EXT_texture_snorm = 0; int GLAD_GL_EXT_texture_storage = 0; int GLAD_GL_EXT_texture_storage_compression = 0; +int GLAD_GL_EXT_texture_swizzle = 0; int GLAD_GL_EXT_texture_type_2_10_10_10_REV = 0; int GLAD_GL_EXT_texture_view = 0; +int GLAD_GL_EXT_timer_query = 0; +int GLAD_GL_EXT_transform_feedback = 0; int GLAD_GL_EXT_unpack_subimage = 0; +int GLAD_GL_EXT_vertex_array = 0; +int GLAD_GL_EXT_vertex_array_bgra = 0; +int GLAD_GL_EXT_vertex_attrib_64bit = 0; +int GLAD_GL_EXT_vertex_shader = 0; +int GLAD_GL_EXT_vertex_weighting = 0; int GLAD_GL_EXT_win32_keyed_mutex = 0; int GLAD_GL_EXT_window_rectangles = 0; +int GLAD_GL_EXT_x11_sync_object = 0; int GLAD_GL_FJ_shader_binary_GCCSO = 0; +int GLAD_GL_GREMEDY_frame_terminator = 0; +int GLAD_GL_GREMEDY_string_marker = 0; +int GLAD_GL_HP_convolution_border_modes = 0; +int GLAD_GL_HP_image_transform = 0; +int GLAD_GL_HP_occlusion_test = 0; +int GLAD_GL_HP_texture_lighting = 0; int GLAD_GL_HUAWEI_program_binary = 0; int GLAD_GL_HUAWEI_shader_binary = 0; +int GLAD_GL_IBM_cull_vertex = 0; +int GLAD_GL_IBM_multimode_draw_arrays = 0; +int GLAD_GL_IBM_rasterpos_clip = 0; +int GLAD_GL_IBM_static_data = 0; +int GLAD_GL_IBM_texture_mirrored_repeat = 0; +int GLAD_GL_IBM_vertex_array_lists = 0; int GLAD_GL_IMG_bindless_texture = 0; int GLAD_GL_IMG_framebuffer_downsample = 0; int GLAD_GL_IMG_multisampled_render_to_texture = 0; int GLAD_GL_IMG_program_binary = 0; +int GLAD_GL_IMG_pvric_end_to_end_signature = 0; int GLAD_GL_IMG_read_format = 0; int GLAD_GL_IMG_shader_binary = 0; int GLAD_GL_IMG_texture_compression_pvrtc = 0; int GLAD_GL_IMG_texture_compression_pvrtc2 = 0; +int GLAD_GL_IMG_texture_env_enhanced_fixed_function = 0; int GLAD_GL_IMG_texture_filter_cubic = 0; +int GLAD_GL_IMG_tile_region_protection = 0; +int GLAD_GL_IMG_user_clip_plane = 0; +int GLAD_GL_INGR_blend_func_separate = 0; +int GLAD_GL_INGR_color_clamp = 0; +int GLAD_GL_INGR_interlace_read = 0; int GLAD_GL_INTEL_blackhole_render = 0; int GLAD_GL_INTEL_conservative_rasterization = 0; +int GLAD_GL_INTEL_fragment_shader_ordering = 0; int GLAD_GL_INTEL_framebuffer_CMAA = 0; +int GLAD_GL_INTEL_map_texture = 0; +int GLAD_GL_INTEL_parallel_arrays = 0; int GLAD_GL_INTEL_performance_query = 0; int GLAD_GL_KHR_blend_equation_advanced = 0; int GLAD_GL_KHR_blend_equation_advanced_coherent = 0; @@ -981,86 +2747,185 @@ int GLAD_GL_KHR_shader_subgroup = 0; int GLAD_GL_KHR_texture_compression_astc_hdr = 0; int GLAD_GL_KHR_texture_compression_astc_ldr = 0; int GLAD_GL_KHR_texture_compression_astc_sliced_3d = 0; +int GLAD_GL_MESAX_texture_stack = 0; int GLAD_GL_MESA_bgra = 0; int GLAD_GL_MESA_framebuffer_flip_x = 0; int GLAD_GL_MESA_framebuffer_flip_y = 0; int GLAD_GL_MESA_framebuffer_swap_xy = 0; +int GLAD_GL_MESA_pack_invert = 0; int GLAD_GL_MESA_program_binary_formats = 0; +int GLAD_GL_MESA_resize_buffers = 0; int GLAD_GL_MESA_sampler_objects = 0; int GLAD_GL_MESA_shader_integer_functions = 0; int GLAD_GL_MESA_texture_const_bandwidth = 0; +int GLAD_GL_MESA_tile_raster_order = 0; +int GLAD_GL_MESA_window_pos = 0; +int GLAD_GL_MESA_ycbcr_texture = 0; int GLAD_GL_NVX_blend_equation_advanced_multi_draw_buffers = 0; +int GLAD_GL_NVX_conditional_render = 0; +int GLAD_GL_NVX_gpu_memory_info = 0; +int GLAD_GL_NVX_gpu_multicast2 = 0; +int GLAD_GL_NVX_linked_gpu_multicast = 0; +int GLAD_GL_NVX_progress_fence = 0; +int GLAD_GL_NV_alpha_to_coverage_dither_control = 0; +int GLAD_GL_NV_bindless_multi_draw_indirect = 0; +int GLAD_GL_NV_bindless_multi_draw_indirect_count = 0; int GLAD_GL_NV_bindless_texture = 0; int GLAD_GL_NV_blend_equation_advanced = 0; int GLAD_GL_NV_blend_equation_advanced_coherent = 0; int GLAD_GL_NV_blend_minmax_factor = 0; +int GLAD_GL_NV_blend_square = 0; int GLAD_GL_NV_clip_space_w_scaling = 0; +int GLAD_GL_NV_command_list = 0; +int GLAD_GL_NV_compute_program5 = 0; int GLAD_GL_NV_compute_shader_derivatives = 0; int GLAD_GL_NV_conditional_render = 0; int GLAD_GL_NV_conservative_raster = 0; +int GLAD_GL_NV_conservative_raster_dilate = 0; int GLAD_GL_NV_conservative_raster_pre_snap = 0; int GLAD_GL_NV_conservative_raster_pre_snap_triangles = 0; +int GLAD_GL_NV_conservative_raster_underestimation = 0; int GLAD_GL_NV_copy_buffer = 0; +int GLAD_GL_NV_copy_depth_to_color = 0; +int GLAD_GL_NV_copy_image = 0; int GLAD_GL_NV_coverage_sample = 0; +int GLAD_GL_NV_deep_texture3D = 0; +int GLAD_GL_NV_depth_buffer_float = 0; +int GLAD_GL_NV_depth_clamp = 0; int GLAD_GL_NV_depth_nonlinear = 0; int GLAD_GL_NV_draw_buffers = 0; int GLAD_GL_NV_draw_instanced = 0; +int GLAD_GL_NV_draw_texture = 0; int GLAD_GL_NV_draw_vulkan_image = 0; +int GLAD_GL_NV_evaluators = 0; int GLAD_GL_NV_explicit_attrib_location = 0; +int GLAD_GL_NV_explicit_multisample = 0; int GLAD_GL_NV_fbo_color_attachments = 0; int GLAD_GL_NV_fence = 0; int GLAD_GL_NV_fill_rectangle = 0; +int GLAD_GL_NV_float_buffer = 0; +int GLAD_GL_NV_fog_distance = 0; int GLAD_GL_NV_fragment_coverage_to_color = 0; +int GLAD_GL_NV_fragment_program = 0; +int GLAD_GL_NV_fragment_program2 = 0; +int GLAD_GL_NV_fragment_program4 = 0; +int GLAD_GL_NV_fragment_program_option = 0; int GLAD_GL_NV_fragment_shader_barycentric = 0; int GLAD_GL_NV_fragment_shader_interlock = 0; int GLAD_GL_NV_framebuffer_blit = 0; int GLAD_GL_NV_framebuffer_mixed_samples = 0; int GLAD_GL_NV_framebuffer_multisample = 0; +int GLAD_GL_NV_framebuffer_multisample_coverage = 0; int GLAD_GL_NV_generate_mipmap_sRGB = 0; +int GLAD_GL_NV_geometry_program4 = 0; +int GLAD_GL_NV_geometry_shader4 = 0; int GLAD_GL_NV_geometry_shader_passthrough = 0; +int GLAD_GL_NV_gpu_multicast = 0; +int GLAD_GL_NV_gpu_program4 = 0; +int GLAD_GL_NV_gpu_program5 = 0; +int GLAD_GL_NV_gpu_program5_mem_extended = 0; int GLAD_GL_NV_gpu_shader5 = 0; +int GLAD_GL_NV_half_float = 0; int GLAD_GL_NV_image_formats = 0; int GLAD_GL_NV_instanced_arrays = 0; int GLAD_GL_NV_internalformat_sample_query = 0; +int GLAD_GL_NV_light_max_exponent = 0; int GLAD_GL_NV_memory_attachment = 0; int GLAD_GL_NV_memory_object_sparse = 0; int GLAD_GL_NV_mesh_shader = 0; +int GLAD_GL_NV_multisample_coverage = 0; +int GLAD_GL_NV_multisample_filter_hint = 0; int GLAD_GL_NV_non_square_matrices = 0; +int GLAD_GL_NV_occlusion_query = 0; int GLAD_GL_NV_pack_subimage = 0; +int GLAD_GL_NV_packed_depth_stencil = 0; +int GLAD_GL_NV_parameter_buffer_object = 0; +int GLAD_GL_NV_parameter_buffer_object2 = 0; int GLAD_GL_NV_path_rendering = 0; int GLAD_GL_NV_path_rendering_shared_edge = 0; int GLAD_GL_NV_pixel_buffer_object = 0; +int GLAD_GL_NV_pixel_data_range = 0; +int GLAD_GL_NV_point_sprite = 0; int GLAD_GL_NV_polygon_mode = 0; +int GLAD_GL_NV_present_video = 0; +int GLAD_GL_NV_primitive_restart = 0; int GLAD_GL_NV_primitive_shading_rate = 0; +int GLAD_GL_NV_query_resource = 0; +int GLAD_GL_NV_query_resource_tag = 0; int GLAD_GL_NV_read_buffer = 0; int GLAD_GL_NV_read_buffer_front = 0; int GLAD_GL_NV_read_depth = 0; int GLAD_GL_NV_read_depth_stencil = 0; int GLAD_GL_NV_read_stencil = 0; +int GLAD_GL_NV_register_combiners = 0; +int GLAD_GL_NV_register_combiners2 = 0; int GLAD_GL_NV_representative_fragment_test = 0; +int GLAD_GL_NV_robustness_video_memory_purge = 0; int GLAD_GL_NV_sRGB_formats = 0; int GLAD_GL_NV_sample_locations = 0; int GLAD_GL_NV_sample_mask_override_coverage = 0; int GLAD_GL_NV_scissor_exclusive = 0; +int GLAD_GL_NV_shader_atomic_counters = 0; +int GLAD_GL_NV_shader_atomic_float = 0; +int GLAD_GL_NV_shader_atomic_float64 = 0; int GLAD_GL_NV_shader_atomic_fp16_vector = 0; +int GLAD_GL_NV_shader_atomic_int64 = 0; +int GLAD_GL_NV_shader_buffer_load = 0; +int GLAD_GL_NV_shader_buffer_store = 0; int GLAD_GL_NV_shader_noperspective_interpolation = 0; +int GLAD_GL_NV_shader_storage_buffer_object = 0; int GLAD_GL_NV_shader_subgroup_partitioned = 0; int GLAD_GL_NV_shader_texture_footprint = 0; +int GLAD_GL_NV_shader_thread_group = 0; +int GLAD_GL_NV_shader_thread_shuffle = 0; int GLAD_GL_NV_shading_rate_image = 0; int GLAD_GL_NV_shadow_samplers_array = 0; int GLAD_GL_NV_shadow_samplers_cube = 0; int GLAD_GL_NV_stereo_view_rendering = 0; +int GLAD_GL_NV_tessellation_program5 = 0; +int GLAD_GL_NV_texgen_emboss = 0; +int GLAD_GL_NV_texgen_reflection = 0; int GLAD_GL_NV_texture_barrier = 0; int GLAD_GL_NV_texture_border_clamp = 0; int GLAD_GL_NV_texture_compression_s3tc_update = 0; +int GLAD_GL_NV_texture_compression_vtc = 0; +int GLAD_GL_NV_texture_env_combine4 = 0; +int GLAD_GL_NV_texture_expand_normal = 0; +int GLAD_GL_NV_texture_multisample = 0; int GLAD_GL_NV_texture_npot_2D_mipmap = 0; +int GLAD_GL_NV_texture_rectangle = 0; +int GLAD_GL_NV_texture_rectangle_compressed = 0; +int GLAD_GL_NV_texture_shader = 0; +int GLAD_GL_NV_texture_shader2 = 0; +int GLAD_GL_NV_texture_shader3 = 0; int GLAD_GL_NV_timeline_semaphore = 0; +int GLAD_GL_NV_transform_feedback = 0; +int GLAD_GL_NV_transform_feedback2 = 0; +int GLAD_GL_NV_uniform_buffer_std430_layout = 0; +int GLAD_GL_NV_uniform_buffer_unified_memory = 0; +int GLAD_GL_NV_vdpau_interop = 0; +int GLAD_GL_NV_vdpau_interop2 = 0; +int GLAD_GL_NV_vertex_array_range = 0; +int GLAD_GL_NV_vertex_array_range2 = 0; +int GLAD_GL_NV_vertex_attrib_integer_64bit = 0; +int GLAD_GL_NV_vertex_buffer_unified_memory = 0; +int GLAD_GL_NV_vertex_program = 0; +int GLAD_GL_NV_vertex_program1_1 = 0; +int GLAD_GL_NV_vertex_program2 = 0; +int GLAD_GL_NV_vertex_program2_option = 0; +int GLAD_GL_NV_vertex_program3 = 0; +int GLAD_GL_NV_vertex_program4 = 0; +int GLAD_GL_NV_video_capture = 0; int GLAD_GL_NV_viewport_array = 0; int GLAD_GL_NV_viewport_array2 = 0; int GLAD_GL_NV_viewport_swizzle = 0; int GLAD_GL_OES_EGL_image = 0; int GLAD_GL_OES_EGL_image_external = 0; int GLAD_GL_OES_EGL_image_external_essl3 = 0; +int GLAD_GL_OES_blend_equation_separate = 0; +int GLAD_GL_OES_blend_func_separate = 0; +int GLAD_GL_OES_blend_subtract = 0; +int GLAD_GL_OES_byte_coordinates = 0; int GLAD_GL_OES_compressed_ETC1_RGB8_sub_texture = 0; int GLAD_GL_OES_compressed_ETC1_RGB8_texture = 0; int GLAD_GL_OES_compressed_paletted_texture = 0; @@ -1070,16 +2935,26 @@ int GLAD_GL_OES_depth32 = 0; int GLAD_GL_OES_depth_texture = 0; int GLAD_GL_OES_draw_buffers_indexed = 0; int GLAD_GL_OES_draw_elements_base_vertex = 0; +int GLAD_GL_OES_draw_texture = 0; int GLAD_GL_OES_element_index_uint = 0; +int GLAD_GL_OES_extended_matrix_palette = 0; int GLAD_GL_OES_fbo_render_mipmap = 0; +int GLAD_GL_OES_fixed_point = 0; int GLAD_GL_OES_fragment_precision_high = 0; +int GLAD_GL_OES_framebuffer_object = 0; int GLAD_GL_OES_geometry_point_size = 0; int GLAD_GL_OES_geometry_shader = 0; int GLAD_GL_OES_get_program_binary = 0; int GLAD_GL_OES_gpu_shader5 = 0; int GLAD_GL_OES_mapbuffer = 0; +int GLAD_GL_OES_matrix_get = 0; +int GLAD_GL_OES_matrix_palette = 0; int GLAD_GL_OES_packed_depth_stencil = 0; +int GLAD_GL_OES_point_size_array = 0; +int GLAD_GL_OES_point_sprite = 0; int GLAD_GL_OES_primitive_bounding_box = 0; +int GLAD_GL_OES_query_matrix = 0; +int GLAD_GL_OES_read_format = 0; int GLAD_GL_OES_required_internalformat = 0; int GLAD_GL_OES_rgb8_rgba8 = 0; int GLAD_GL_OES_sample_shading = 0; @@ -1087,9 +2962,12 @@ int GLAD_GL_OES_sample_variables = 0; int GLAD_GL_OES_shader_image_atomic = 0; int GLAD_GL_OES_shader_io_blocks = 0; int GLAD_GL_OES_shader_multisample_interpolation = 0; +int GLAD_GL_OES_single_precision = 0; int GLAD_GL_OES_standard_derivatives = 0; int GLAD_GL_OES_stencil1 = 0; int GLAD_GL_OES_stencil4 = 0; +int GLAD_GL_OES_stencil8 = 0; +int GLAD_GL_OES_stencil_wrap = 0; int GLAD_GL_OES_surfaceless_context = 0; int GLAD_GL_OES_tessellation_point_size = 0; int GLAD_GL_OES_tessellation_shader = 0; @@ -1097,11 +2975,14 @@ int GLAD_GL_OES_texture_3D = 0; int GLAD_GL_OES_texture_border_clamp = 0; int GLAD_GL_OES_texture_buffer = 0; int GLAD_GL_OES_texture_compression_astc = 0; +int GLAD_GL_OES_texture_cube_map = 0; int GLAD_GL_OES_texture_cube_map_array = 0; +int GLAD_GL_OES_texture_env_crossbar = 0; int GLAD_GL_OES_texture_float = 0; int GLAD_GL_OES_texture_float_linear = 0; int GLAD_GL_OES_texture_half_float = 0; int GLAD_GL_OES_texture_half_float_linear = 0; +int GLAD_GL_OES_texture_mirrored_repeat = 0; int GLAD_GL_OES_texture_npot = 0; int GLAD_GL_OES_texture_stencil8 = 0; int GLAD_GL_OES_texture_storage_multisample_2d_array = 0; @@ -1110,9 +2991,14 @@ int GLAD_GL_OES_vertex_array_object = 0; int GLAD_GL_OES_vertex_half_float = 0; int GLAD_GL_OES_vertex_type_10_10_10_2 = 0; int GLAD_GL_OES_viewport_array = 0; +int GLAD_GL_OML_interlace = 0; +int GLAD_GL_OML_resample = 0; +int GLAD_GL_OML_subsample = 0; int GLAD_GL_OVR_multiview = 0; int GLAD_GL_OVR_multiview2 = 0; int GLAD_GL_OVR_multiview_multisampled_render_to_texture = 0; +int GLAD_GL_PGI_misc_hints = 0; +int GLAD_GL_PGI_vertex_hints = 0; int GLAD_GL_QCOM_YUV_texture_gather = 0; int GLAD_GL_QCOM_alpha_test = 0; int GLAD_GL_QCOM_binning_control = 0; @@ -1135,9 +3021,130 @@ int GLAD_GL_QCOM_texture_lod_bias = 0; int GLAD_GL_QCOM_tiled_rendering = 0; int GLAD_GL_QCOM_writeonly_rendering = 0; int GLAD_GL_QCOM_ycbcr_degamma = 0; +int GLAD_GL_REND_screen_coordinates = 0; +int GLAD_GL_S3_s3tc = 0; +int GLAD_GL_SGIS_detail_texture = 0; +int GLAD_GL_SGIS_fog_function = 0; +int GLAD_GL_SGIS_generate_mipmap = 0; +int GLAD_GL_SGIS_multisample = 0; +int GLAD_GL_SGIS_pixel_texture = 0; +int GLAD_GL_SGIS_point_line_texgen = 0; +int GLAD_GL_SGIS_point_parameters = 0; +int GLAD_GL_SGIS_sharpen_texture = 0; +int GLAD_GL_SGIS_texture4D = 0; +int GLAD_GL_SGIS_texture_border_clamp = 0; +int GLAD_GL_SGIS_texture_color_mask = 0; +int GLAD_GL_SGIS_texture_edge_clamp = 0; +int GLAD_GL_SGIS_texture_filter4 = 0; +int GLAD_GL_SGIS_texture_lod = 0; +int GLAD_GL_SGIS_texture_select = 0; +int GLAD_GL_SGIX_async = 0; +int GLAD_GL_SGIX_async_histogram = 0; +int GLAD_GL_SGIX_async_pixel = 0; +int GLAD_GL_SGIX_blend_alpha_minmax = 0; +int GLAD_GL_SGIX_calligraphic_fragment = 0; +int GLAD_GL_SGIX_clipmap = 0; +int GLAD_GL_SGIX_convolution_accuracy = 0; +int GLAD_GL_SGIX_depth_pass_instrument = 0; +int GLAD_GL_SGIX_depth_texture = 0; +int GLAD_GL_SGIX_flush_raster = 0; +int GLAD_GL_SGIX_fog_offset = 0; +int GLAD_GL_SGIX_fragment_lighting = 0; +int GLAD_GL_SGIX_framezoom = 0; +int GLAD_GL_SGIX_igloo_interface = 0; +int GLAD_GL_SGIX_instruments = 0; +int GLAD_GL_SGIX_interlace = 0; +int GLAD_GL_SGIX_ir_instrument1 = 0; +int GLAD_GL_SGIX_list_priority = 0; +int GLAD_GL_SGIX_pixel_texture = 0; +int GLAD_GL_SGIX_pixel_tiles = 0; +int GLAD_GL_SGIX_polynomial_ffd = 0; +int GLAD_GL_SGIX_reference_plane = 0; +int GLAD_GL_SGIX_resample = 0; +int GLAD_GL_SGIX_scalebias_hint = 0; +int GLAD_GL_SGIX_shadow = 0; +int GLAD_GL_SGIX_shadow_ambient = 0; +int GLAD_GL_SGIX_sprite = 0; +int GLAD_GL_SGIX_subsample = 0; +int GLAD_GL_SGIX_tag_sample_buffer = 0; +int GLAD_GL_SGIX_texture_add_env = 0; +int GLAD_GL_SGIX_texture_coordinate_clamp = 0; +int GLAD_GL_SGIX_texture_lod_bias = 0; +int GLAD_GL_SGIX_texture_multi_buffer = 0; +int GLAD_GL_SGIX_texture_scale_bias = 0; +int GLAD_GL_SGIX_vertex_preclip = 0; +int GLAD_GL_SGIX_ycrcb = 0; +int GLAD_GL_SGIX_ycrcb_subsample = 0; +int GLAD_GL_SGIX_ycrcba = 0; +int GLAD_GL_SGI_color_matrix = 0; +int GLAD_GL_SGI_color_table = 0; +int GLAD_GL_SGI_texture_color_table = 0; +int GLAD_GL_SUNX_constant_data = 0; +int GLAD_GL_SUN_convolution_border_modes = 0; +int GLAD_GL_SUN_global_alpha = 0; +int GLAD_GL_SUN_mesh_array = 0; +int GLAD_GL_SUN_slice_accum = 0; +int GLAD_GL_SUN_triangle_list = 0; +int GLAD_GL_SUN_vertex = 0; int GLAD_GL_VIV_shader_binary = 0; +int GLAD_GL_WIN_phong_shading = 0; +int GLAD_GL_WIN_specular_fog = 0; +PFNGLTBUFFERMASK3DFXPROC glad_glTbufferMask3DFX = NULL; +PFNGLDEBUGMESSAGEENABLEAMDPROC glad_glDebugMessageEnableAMD = NULL; +PFNGLDEBUGMESSAGEINSERTAMDPROC glad_glDebugMessageInsertAMD = NULL; +PFNGLDEBUGMESSAGECALLBACKAMDPROC glad_glDebugMessageCallbackAMD = NULL; +PFNGLGETDEBUGMESSAGELOGAMDPROC glad_glGetDebugMessageLogAMD = NULL; +PFNGLBLENDFUNCINDEXEDAMDPROC glad_glBlendFuncIndexedAMD = NULL; +PFNGLBLENDFUNCSEPARATEINDEXEDAMDPROC glad_glBlendFuncSeparateIndexedAMD = NULL; +PFNGLBLENDEQUATIONINDEXEDAMDPROC glad_glBlendEquationIndexedAMD = NULL; +PFNGLBLENDEQUATIONSEPARATEINDEXEDAMDPROC glad_glBlendEquationSeparateIndexedAMD = NULL; PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC glad_glRenderbufferStorageMultisampleAdvancedAMD = NULL; PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC glad_glNamedRenderbufferStorageMultisampleAdvancedAMD = NULL; +PFNGLFRAMEBUFFERSAMPLEPOSITIONSFVAMDPROC glad_glFramebufferSamplePositionsfvAMD = NULL; +PFNGLNAMEDFRAMEBUFFERSAMPLEPOSITIONSFVAMDPROC glad_glNamedFramebufferSamplePositionsfvAMD = NULL; +PFNGLGETFRAMEBUFFERPARAMETERFVAMDPROC glad_glGetFramebufferParameterfvAMD = NULL; +PFNGLGETNAMEDFRAMEBUFFERPARAMETERFVAMDPROC glad_glGetNamedFramebufferParameterfvAMD = NULL; +PFNGLUNIFORM1I64NVPROC glad_glUniform1i64NV = NULL; +PFNGLUNIFORM2I64NVPROC glad_glUniform2i64NV = NULL; +PFNGLUNIFORM3I64NVPROC glad_glUniform3i64NV = NULL; +PFNGLUNIFORM4I64NVPROC glad_glUniform4i64NV = NULL; +PFNGLUNIFORM1I64VNVPROC glad_glUniform1i64vNV = NULL; +PFNGLUNIFORM2I64VNVPROC glad_glUniform2i64vNV = NULL; +PFNGLUNIFORM3I64VNVPROC glad_glUniform3i64vNV = NULL; +PFNGLUNIFORM4I64VNVPROC glad_glUniform4i64vNV = NULL; +PFNGLUNIFORM1UI64NVPROC glad_glUniform1ui64NV = NULL; +PFNGLUNIFORM2UI64NVPROC glad_glUniform2ui64NV = NULL; +PFNGLUNIFORM3UI64NVPROC glad_glUniform3ui64NV = NULL; +PFNGLUNIFORM4UI64NVPROC glad_glUniform4ui64NV = NULL; +PFNGLUNIFORM1UI64VNVPROC glad_glUniform1ui64vNV = NULL; +PFNGLUNIFORM2UI64VNVPROC glad_glUniform2ui64vNV = NULL; +PFNGLUNIFORM3UI64VNVPROC glad_glUniform3ui64vNV = NULL; +PFNGLUNIFORM4UI64VNVPROC glad_glUniform4ui64vNV = NULL; +PFNGLGETUNIFORMI64VNVPROC glad_glGetUniformi64vNV = NULL; +PFNGLGETUNIFORMUI64VNVPROC glad_glGetUniformui64vNV = NULL; +PFNGLPROGRAMUNIFORM1I64NVPROC glad_glProgramUniform1i64NV = NULL; +PFNGLPROGRAMUNIFORM2I64NVPROC glad_glProgramUniform2i64NV = NULL; +PFNGLPROGRAMUNIFORM3I64NVPROC glad_glProgramUniform3i64NV = NULL; +PFNGLPROGRAMUNIFORM4I64NVPROC glad_glProgramUniform4i64NV = NULL; +PFNGLPROGRAMUNIFORM1I64VNVPROC glad_glProgramUniform1i64vNV = NULL; +PFNGLPROGRAMUNIFORM2I64VNVPROC glad_glProgramUniform2i64vNV = NULL; +PFNGLPROGRAMUNIFORM3I64VNVPROC glad_glProgramUniform3i64vNV = NULL; +PFNGLPROGRAMUNIFORM4I64VNVPROC glad_glProgramUniform4i64vNV = NULL; +PFNGLPROGRAMUNIFORM1UI64NVPROC glad_glProgramUniform1ui64NV = NULL; +PFNGLPROGRAMUNIFORM2UI64NVPROC glad_glProgramUniform2ui64NV = NULL; +PFNGLPROGRAMUNIFORM3UI64NVPROC glad_glProgramUniform3ui64NV = NULL; +PFNGLPROGRAMUNIFORM4UI64NVPROC glad_glProgramUniform4ui64NV = NULL; +PFNGLPROGRAMUNIFORM1UI64VNVPROC glad_glProgramUniform1ui64vNV = NULL; +PFNGLPROGRAMUNIFORM2UI64VNVPROC glad_glProgramUniform2ui64vNV = NULL; +PFNGLPROGRAMUNIFORM3UI64VNVPROC glad_glProgramUniform3ui64vNV = NULL; +PFNGLPROGRAMUNIFORM4UI64VNVPROC glad_glProgramUniform4ui64vNV = NULL; +PFNGLVERTEXATTRIBPARAMETERIAMDPROC glad_glVertexAttribParameteriAMD = NULL; +PFNGLMULTIDRAWARRAYSINDIRECTAMDPROC glad_glMultiDrawArraysIndirectAMD = NULL; +PFNGLMULTIDRAWELEMENTSINDIRECTAMDPROC glad_glMultiDrawElementsIndirectAMD = NULL; +PFNGLGENNAMESAMDPROC glad_glGenNamesAMD = NULL; +PFNGLDELETENAMESAMDPROC glad_glDeleteNamesAMD = NULL; +PFNGLISNAMEAMDPROC glad_glIsNameAMD = NULL; +PFNGLQUERYOBJECTPARAMETERUIAMDPROC glad_glQueryObjectParameteruiAMD = NULL; PFNGLGETPERFMONITORGROUPSAMDPROC glad_glGetPerfMonitorGroupsAMD = NULL; PFNGLGETPERFMONITORCOUNTERSAMDPROC glad_glGetPerfMonitorCountersAMD = NULL; PFNGLGETPERFMONITORGROUPSTRINGAMDPROC glad_glGetPerfMonitorGroupStringAMD = NULL; @@ -1149,85 +3156,886 @@ PFNGLSELECTPERFMONITORCOUNTERSAMDPROC glad_glSelectPerfMonitorCountersAMD = NULL PFNGLBEGINPERFMONITORAMDPROC glad_glBeginPerfMonitorAMD = NULL; PFNGLENDPERFMONITORAMDPROC glad_glEndPerfMonitorAMD = NULL; PFNGLGETPERFMONITORCOUNTERDATAAMDPROC glad_glGetPerfMonitorCounterDataAMD = NULL; -PFNGLBLITFRAMEBUFFERANGLEPROC glad_glBlitFramebufferANGLE = NULL; -PFNGLRENDERBUFFERSTORAGEMULTISAMPLEANGLEPROC glad_glRenderbufferStorageMultisampleANGLE = NULL; -PFNGLDRAWARRAYSINSTANCEDANGLEPROC glad_glDrawArraysInstancedANGLE = NULL; -PFNGLDRAWELEMENTSINSTANCEDANGLEPROC glad_glDrawElementsInstancedANGLE = NULL; -PFNGLVERTEXATTRIBDIVISORANGLEPROC glad_glVertexAttribDivisorANGLE = NULL; -PFNGLGETTRANSLATEDSHADERSOURCEANGLEPROC glad_glGetTranslatedShaderSourceANGLE = NULL; -PFNGLCOPYTEXTURELEVELSAPPLEPROC glad_glCopyTextureLevelsAPPLE = NULL; -PFNGLRENDERBUFFERSTORAGEMULTISAMPLEAPPLEPROC glad_glRenderbufferStorageMultisampleAPPLE = NULL; -PFNGLRESOLVEMULTISAMPLEFRAMEBUFFERAPPLEPROC glad_glResolveMultisampleFramebufferAPPLE = NULL; -PFNGLFENCESYNCAPPLEPROC glad_glFenceSyncAPPLE = NULL; -PFNGLISSYNCAPPLEPROC glad_glIsSyncAPPLE = NULL; -PFNGLDELETESYNCAPPLEPROC glad_glDeleteSyncAPPLE = NULL; -PFNGLCLIENTWAITSYNCAPPLEPROC glad_glClientWaitSyncAPPLE = NULL; -PFNGLWAITSYNCAPPLEPROC glad_glWaitSyncAPPLE = NULL; -PFNGLGETINTEGER64VAPPLEPROC glad_glGetInteger64vAPPLE = NULL; -PFNGLGETSYNCIVAPPLEPROC glad_glGetSyncivAPPLE = NULL; -PFNGLMAXACTIVESHADERCORESARMPROC glad_glMaxActiveShaderCoresARM = NULL; +PFNGLSETMULTISAMPLEFVAMDPROC glad_glSetMultisamplefvAMD = NULL; +PFNGLTEXSTORAGESPARSEAMDPROC glad_glTexStorageSparseAMD = NULL; +PFNGLTEXTURESTORAGESPARSEAMDPROC glad_glTextureStorageSparseAMD = NULL; +PFNGLSTENCILOPVALUEAMDPROC glad_glStencilOpValueAMD = NULL; +PFNGLTESSELLATIONFACTORAMDPROC glad_glTessellationFactorAMD = NULL; +PFNGLTESSELLATIONMODEAMDPROC glad_glTessellationModeAMD = NULL; +PFNGLELEMENTPOINTERAPPLEPROC glad_glElementPointerAPPLE = NULL; +PFNGLDRAWELEMENTARRAYAPPLEPROC glad_glDrawElementArrayAPPLE = NULL; +PFNGLDRAWRANGEELEMENTARRAYAPPLEPROC glad_glDrawRangeElementArrayAPPLE = NULL; +PFNGLMULTIDRAWELEMENTARRAYAPPLEPROC glad_glMultiDrawElementArrayAPPLE = NULL; +PFNGLMULTIDRAWRANGEELEMENTARRAYAPPLEPROC glad_glMultiDrawRangeElementArrayAPPLE = NULL; +PFNGLGENFENCESAPPLEPROC glad_glGenFencesAPPLE = NULL; +PFNGLDELETEFENCESAPPLEPROC glad_glDeleteFencesAPPLE = NULL; +PFNGLSETFENCEAPPLEPROC glad_glSetFenceAPPLE = NULL; +PFNGLISFENCEAPPLEPROC glad_glIsFenceAPPLE = NULL; +PFNGLTESTFENCEAPPLEPROC glad_glTestFenceAPPLE = NULL; +PFNGLFINISHFENCEAPPLEPROC glad_glFinishFenceAPPLE = NULL; +PFNGLTESTOBJECTAPPLEPROC glad_glTestObjectAPPLE = NULL; +PFNGLFINISHOBJECTAPPLEPROC glad_glFinishObjectAPPLE = NULL; +PFNGLBUFFERPARAMETERIAPPLEPROC glad_glBufferParameteriAPPLE = NULL; +PFNGLFLUSHMAPPEDBUFFERRANGEAPPLEPROC glad_glFlushMappedBufferRangeAPPLE = NULL; +PFNGLOBJECTPURGEABLEAPPLEPROC glad_glObjectPurgeableAPPLE = NULL; +PFNGLOBJECTUNPURGEABLEAPPLEPROC glad_glObjectUnpurgeableAPPLE = NULL; +PFNGLGETOBJECTPARAMETERIVAPPLEPROC glad_glGetObjectParameterivAPPLE = NULL; +PFNGLTEXTURERANGEAPPLEPROC glad_glTextureRangeAPPLE = NULL; +PFNGLGETTEXPARAMETERPOINTERVAPPLEPROC glad_glGetTexParameterPointervAPPLE = NULL; +PFNGLBINDVERTEXARRAYAPPLEPROC glad_glBindVertexArrayAPPLE = NULL; +PFNGLDELETEVERTEXARRAYSAPPLEPROC glad_glDeleteVertexArraysAPPLE = NULL; +PFNGLGENVERTEXARRAYSAPPLEPROC glad_glGenVertexArraysAPPLE = NULL; +PFNGLISVERTEXARRAYAPPLEPROC glad_glIsVertexArrayAPPLE = NULL; +PFNGLVERTEXARRAYRANGEAPPLEPROC glad_glVertexArrayRangeAPPLE = NULL; +PFNGLFLUSHVERTEXARRAYRANGEAPPLEPROC glad_glFlushVertexArrayRangeAPPLE = NULL; +PFNGLVERTEXARRAYPARAMETERIAPPLEPROC glad_glVertexArrayParameteriAPPLE = NULL; +PFNGLENABLEVERTEXATTRIBAPPLEPROC glad_glEnableVertexAttribAPPLE = NULL; +PFNGLDISABLEVERTEXATTRIBAPPLEPROC glad_glDisableVertexAttribAPPLE = NULL; +PFNGLISVERTEXATTRIBENABLEDAPPLEPROC glad_glIsVertexAttribEnabledAPPLE = NULL; +PFNGLMAPVERTEXATTRIB1DAPPLEPROC glad_glMapVertexAttrib1dAPPLE = NULL; +PFNGLMAPVERTEXATTRIB1FAPPLEPROC glad_glMapVertexAttrib1fAPPLE = NULL; +PFNGLMAPVERTEXATTRIB2DAPPLEPROC glad_glMapVertexAttrib2dAPPLE = NULL; +PFNGLMAPVERTEXATTRIB2FAPPLEPROC glad_glMapVertexAttrib2fAPPLE = NULL; +PFNGLPRIMITIVEBOUNDINGBOXARBPROC glad_glPrimitiveBoundingBoxARB = NULL; +PFNGLGETTEXTUREHANDLEARBPROC glad_glGetTextureHandleARB = NULL; +PFNGLGETTEXTURESAMPLERHANDLEARBPROC glad_glGetTextureSamplerHandleARB = NULL; +PFNGLMAKETEXTUREHANDLERESIDENTARBPROC glad_glMakeTextureHandleResidentARB = NULL; +PFNGLMAKETEXTUREHANDLENONRESIDENTARBPROC glad_glMakeTextureHandleNonResidentARB = NULL; +PFNGLGETIMAGEHANDLEARBPROC glad_glGetImageHandleARB = NULL; +PFNGLMAKEIMAGEHANDLERESIDENTARBPROC glad_glMakeImageHandleResidentARB = NULL; +PFNGLMAKEIMAGEHANDLENONRESIDENTARBPROC glad_glMakeImageHandleNonResidentARB = NULL; +PFNGLUNIFORMHANDLEUI64ARBPROC glad_glUniformHandleui64ARB = NULL; +PFNGLUNIFORMHANDLEUI64VARBPROC glad_glUniformHandleui64vARB = NULL; +PFNGLPROGRAMUNIFORMHANDLEUI64ARBPROC glad_glProgramUniformHandleui64ARB = NULL; +PFNGLPROGRAMUNIFORMHANDLEUI64VARBPROC glad_glProgramUniformHandleui64vARB = NULL; +PFNGLISTEXTUREHANDLERESIDENTARBPROC glad_glIsTextureHandleResidentARB = NULL; +PFNGLISIMAGEHANDLERESIDENTARBPROC glad_glIsImageHandleResidentARB = NULL; +PFNGLVERTEXATTRIBL1UI64ARBPROC glad_glVertexAttribL1ui64ARB = NULL; +PFNGLVERTEXATTRIBL1UI64VARBPROC glad_glVertexAttribL1ui64vARB = NULL; +PFNGLGETVERTEXATTRIBLUI64VARBPROC glad_glGetVertexAttribLui64vARB = NULL; +PFNGLCREATESYNCFROMCLEVENTARBPROC glad_glCreateSyncFromCLeventARB = NULL; +PFNGLCLAMPCOLORARBPROC glad_glClampColorARB = NULL; +PFNGLDISPATCHCOMPUTEGROUPSIZEARBPROC glad_glDispatchComputeGroupSizeARB = NULL; +PFNGLDEBUGMESSAGECONTROLARBPROC glad_glDebugMessageControlARB = NULL; +PFNGLDEBUGMESSAGEINSERTARBPROC glad_glDebugMessageInsertARB = NULL; +PFNGLDEBUGMESSAGECALLBACKARBPROC glad_glDebugMessageCallbackARB = NULL; +PFNGLGETDEBUGMESSAGELOGARBPROC glad_glGetDebugMessageLogARB = NULL; +PFNGLDRAWBUFFERSARBPROC glad_glDrawBuffersARB = NULL; +PFNGLBLENDEQUATIONIARBPROC glad_glBlendEquationiARB = NULL; +PFNGLBLENDEQUATIONSEPARATEIARBPROC glad_glBlendEquationSeparateiARB = NULL; +PFNGLBLENDFUNCIARBPROC glad_glBlendFunciARB = NULL; +PFNGLBLENDFUNCSEPARATEIARBPROC glad_glBlendFuncSeparateiARB = NULL; +PFNGLDRAWARRAYSINSTANCEDARBPROC glad_glDrawArraysInstancedARB = NULL; +PFNGLDRAWELEMENTSINSTANCEDARBPROC glad_glDrawElementsInstancedARB = NULL; +PFNGLPROGRAMSTRINGARBPROC glad_glProgramStringARB = NULL; +PFNGLBINDPROGRAMARBPROC glad_glBindProgramARB = NULL; +PFNGLDELETEPROGRAMSARBPROC glad_glDeleteProgramsARB = NULL; +PFNGLGENPROGRAMSARBPROC glad_glGenProgramsARB = NULL; +PFNGLPROGRAMENVPARAMETER4DARBPROC glad_glProgramEnvParameter4dARB = NULL; +PFNGLPROGRAMENVPARAMETER4DVARBPROC glad_glProgramEnvParameter4dvARB = NULL; +PFNGLPROGRAMENVPARAMETER4FARBPROC glad_glProgramEnvParameter4fARB = NULL; +PFNGLPROGRAMENVPARAMETER4FVARBPROC glad_glProgramEnvParameter4fvARB = NULL; +PFNGLPROGRAMLOCALPARAMETER4DARBPROC glad_glProgramLocalParameter4dARB = NULL; +PFNGLPROGRAMLOCALPARAMETER4DVARBPROC glad_glProgramLocalParameter4dvARB = NULL; +PFNGLPROGRAMLOCALPARAMETER4FARBPROC glad_glProgramLocalParameter4fARB = NULL; +PFNGLPROGRAMLOCALPARAMETER4FVARBPROC glad_glProgramLocalParameter4fvARB = NULL; +PFNGLGETPROGRAMENVPARAMETERDVARBPROC glad_glGetProgramEnvParameterdvARB = NULL; +PFNGLGETPROGRAMENVPARAMETERFVARBPROC glad_glGetProgramEnvParameterfvARB = NULL; +PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC glad_glGetProgramLocalParameterdvARB = NULL; +PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC glad_glGetProgramLocalParameterfvARB = NULL; +PFNGLGETPROGRAMIVARBPROC glad_glGetProgramivARB = NULL; +PFNGLGETPROGRAMSTRINGARBPROC glad_glGetProgramStringARB = NULL; +PFNGLISPROGRAMARBPROC glad_glIsProgramARB = NULL; +PFNGLPROGRAMPARAMETERIARBPROC glad_glProgramParameteriARB = NULL; +PFNGLFRAMEBUFFERTEXTUREARBPROC glad_glFramebufferTextureARB = NULL; +PFNGLFRAMEBUFFERTEXTURELAYERARBPROC glad_glFramebufferTextureLayerARB = NULL; +PFNGLFRAMEBUFFERTEXTUREFACEARBPROC glad_glFramebufferTextureFaceARB = NULL; +PFNGLSPECIALIZESHADERARBPROC glad_glSpecializeShaderARB = NULL; +PFNGLUNIFORM1I64ARBPROC glad_glUniform1i64ARB = NULL; +PFNGLUNIFORM2I64ARBPROC glad_glUniform2i64ARB = NULL; +PFNGLUNIFORM3I64ARBPROC glad_glUniform3i64ARB = NULL; +PFNGLUNIFORM4I64ARBPROC glad_glUniform4i64ARB = NULL; +PFNGLUNIFORM1I64VARBPROC glad_glUniform1i64vARB = NULL; +PFNGLUNIFORM2I64VARBPROC glad_glUniform2i64vARB = NULL; +PFNGLUNIFORM3I64VARBPROC glad_glUniform3i64vARB = NULL; +PFNGLUNIFORM4I64VARBPROC glad_glUniform4i64vARB = NULL; +PFNGLUNIFORM1UI64ARBPROC glad_glUniform1ui64ARB = NULL; +PFNGLUNIFORM2UI64ARBPROC glad_glUniform2ui64ARB = NULL; +PFNGLUNIFORM3UI64ARBPROC glad_glUniform3ui64ARB = NULL; +PFNGLUNIFORM4UI64ARBPROC glad_glUniform4ui64ARB = NULL; +PFNGLUNIFORM1UI64VARBPROC glad_glUniform1ui64vARB = NULL; +PFNGLUNIFORM2UI64VARBPROC glad_glUniform2ui64vARB = NULL; +PFNGLUNIFORM3UI64VARBPROC glad_glUniform3ui64vARB = NULL; +PFNGLUNIFORM4UI64VARBPROC glad_glUniform4ui64vARB = NULL; +PFNGLGETUNIFORMI64VARBPROC glad_glGetUniformi64vARB = NULL; +PFNGLGETUNIFORMUI64VARBPROC glad_glGetUniformui64vARB = NULL; +PFNGLGETNUNIFORMI64VARBPROC glad_glGetnUniformi64vARB = NULL; +PFNGLGETNUNIFORMUI64VARBPROC glad_glGetnUniformui64vARB = NULL; +PFNGLPROGRAMUNIFORM1I64ARBPROC glad_glProgramUniform1i64ARB = NULL; +PFNGLPROGRAMUNIFORM2I64ARBPROC glad_glProgramUniform2i64ARB = NULL; +PFNGLPROGRAMUNIFORM3I64ARBPROC glad_glProgramUniform3i64ARB = NULL; +PFNGLPROGRAMUNIFORM4I64ARBPROC glad_glProgramUniform4i64ARB = NULL; +PFNGLPROGRAMUNIFORM1I64VARBPROC glad_glProgramUniform1i64vARB = NULL; +PFNGLPROGRAMUNIFORM2I64VARBPROC glad_glProgramUniform2i64vARB = NULL; +PFNGLPROGRAMUNIFORM3I64VARBPROC glad_glProgramUniform3i64vARB = NULL; +PFNGLPROGRAMUNIFORM4I64VARBPROC glad_glProgramUniform4i64vARB = NULL; +PFNGLPROGRAMUNIFORM1UI64ARBPROC glad_glProgramUniform1ui64ARB = NULL; +PFNGLPROGRAMUNIFORM2UI64ARBPROC glad_glProgramUniform2ui64ARB = NULL; +PFNGLPROGRAMUNIFORM3UI64ARBPROC glad_glProgramUniform3ui64ARB = NULL; +PFNGLPROGRAMUNIFORM4UI64ARBPROC glad_glProgramUniform4ui64ARB = NULL; +PFNGLPROGRAMUNIFORM1UI64VARBPROC glad_glProgramUniform1ui64vARB = NULL; +PFNGLPROGRAMUNIFORM2UI64VARBPROC glad_glProgramUniform2ui64vARB = NULL; +PFNGLPROGRAMUNIFORM3UI64VARBPROC glad_glProgramUniform3ui64vARB = NULL; +PFNGLPROGRAMUNIFORM4UI64VARBPROC glad_glProgramUniform4ui64vARB = NULL; +PFNGLCOLORTABLEPROC glad_glColorTable = NULL; +PFNGLCOLORTABLEPARAMETERFVPROC glad_glColorTableParameterfv = NULL; +PFNGLCOLORTABLEPARAMETERIVPROC glad_glColorTableParameteriv = NULL; +PFNGLCOPYCOLORTABLEPROC glad_glCopyColorTable = NULL; +PFNGLGETCOLORTABLEPROC glad_glGetColorTable = NULL; +PFNGLGETCOLORTABLEPARAMETERFVPROC glad_glGetColorTableParameterfv = NULL; +PFNGLGETCOLORTABLEPARAMETERIVPROC glad_glGetColorTableParameteriv = NULL; +PFNGLCOLORSUBTABLEPROC glad_glColorSubTable = NULL; +PFNGLCOPYCOLORSUBTABLEPROC glad_glCopyColorSubTable = NULL; +PFNGLCONVOLUTIONFILTER1DPROC glad_glConvolutionFilter1D = NULL; +PFNGLCONVOLUTIONFILTER2DPROC glad_glConvolutionFilter2D = NULL; +PFNGLCONVOLUTIONPARAMETERFPROC glad_glConvolutionParameterf = NULL; +PFNGLCONVOLUTIONPARAMETERFVPROC glad_glConvolutionParameterfv = NULL; +PFNGLCONVOLUTIONPARAMETERIPROC glad_glConvolutionParameteri = NULL; +PFNGLCONVOLUTIONPARAMETERIVPROC glad_glConvolutionParameteriv = NULL; +PFNGLCOPYCONVOLUTIONFILTER1DPROC glad_glCopyConvolutionFilter1D = NULL; +PFNGLCOPYCONVOLUTIONFILTER2DPROC glad_glCopyConvolutionFilter2D = NULL; +PFNGLGETCONVOLUTIONFILTERPROC glad_glGetConvolutionFilter = NULL; +PFNGLGETCONVOLUTIONPARAMETERFVPROC glad_glGetConvolutionParameterfv = NULL; +PFNGLGETCONVOLUTIONPARAMETERIVPROC glad_glGetConvolutionParameteriv = NULL; +PFNGLGETSEPARABLEFILTERPROC glad_glGetSeparableFilter = NULL; +PFNGLSEPARABLEFILTER2DPROC glad_glSeparableFilter2D = NULL; +PFNGLGETHISTOGRAMPROC glad_glGetHistogram = NULL; +PFNGLGETHISTOGRAMPARAMETERFVPROC glad_glGetHistogramParameterfv = NULL; +PFNGLGETHISTOGRAMPARAMETERIVPROC glad_glGetHistogramParameteriv = NULL; +PFNGLGETMINMAXPROC glad_glGetMinmax = NULL; +PFNGLGETMINMAXPARAMETERFVPROC glad_glGetMinmaxParameterfv = NULL; +PFNGLGETMINMAXPARAMETERIVPROC glad_glGetMinmaxParameteriv = NULL; +PFNGLHISTOGRAMPROC glad_glHistogram = NULL; +PFNGLMINMAXPROC glad_glMinmax = NULL; +PFNGLRESETHISTOGRAMPROC glad_glResetHistogram = NULL; +PFNGLRESETMINMAXPROC glad_glResetMinmax = NULL; +PFNGLMULTIDRAWARRAYSINDIRECTCOUNTARBPROC glad_glMultiDrawArraysIndirectCountARB = NULL; +PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTARBPROC glad_glMultiDrawElementsIndirectCountARB = NULL; +PFNGLVERTEXATTRIBDIVISORARBPROC glad_glVertexAttribDivisorARB = NULL; +PFNGLCURRENTPALETTEMATRIXARBPROC glad_glCurrentPaletteMatrixARB = NULL; +PFNGLMATRIXINDEXUBVARBPROC glad_glMatrixIndexubvARB = NULL; +PFNGLMATRIXINDEXUSVARBPROC glad_glMatrixIndexusvARB = NULL; +PFNGLMATRIXINDEXUIVARBPROC glad_glMatrixIndexuivARB = NULL; +PFNGLMATRIXINDEXPOINTERARBPROC glad_glMatrixIndexPointerARB = NULL; +PFNGLSAMPLECOVERAGEARBPROC glad_glSampleCoverageARB = NULL; +PFNGLACTIVETEXTUREARBPROC glad_glActiveTextureARB = NULL; +PFNGLCLIENTACTIVETEXTUREARBPROC glad_glClientActiveTextureARB = NULL; +PFNGLMULTITEXCOORD1DARBPROC glad_glMultiTexCoord1dARB = NULL; +PFNGLMULTITEXCOORD1DVARBPROC glad_glMultiTexCoord1dvARB = NULL; +PFNGLMULTITEXCOORD1FARBPROC glad_glMultiTexCoord1fARB = NULL; +PFNGLMULTITEXCOORD1FVARBPROC glad_glMultiTexCoord1fvARB = NULL; +PFNGLMULTITEXCOORD1IARBPROC glad_glMultiTexCoord1iARB = NULL; +PFNGLMULTITEXCOORD1IVARBPROC glad_glMultiTexCoord1ivARB = NULL; +PFNGLMULTITEXCOORD1SARBPROC glad_glMultiTexCoord1sARB = NULL; +PFNGLMULTITEXCOORD1SVARBPROC glad_glMultiTexCoord1svARB = NULL; +PFNGLMULTITEXCOORD2DARBPROC glad_glMultiTexCoord2dARB = NULL; +PFNGLMULTITEXCOORD2DVARBPROC glad_glMultiTexCoord2dvARB = NULL; +PFNGLMULTITEXCOORD2FARBPROC glad_glMultiTexCoord2fARB = NULL; +PFNGLMULTITEXCOORD2FVARBPROC glad_glMultiTexCoord2fvARB = NULL; +PFNGLMULTITEXCOORD2IARBPROC glad_glMultiTexCoord2iARB = NULL; +PFNGLMULTITEXCOORD2IVARBPROC glad_glMultiTexCoord2ivARB = NULL; +PFNGLMULTITEXCOORD2SARBPROC glad_glMultiTexCoord2sARB = NULL; +PFNGLMULTITEXCOORD2SVARBPROC glad_glMultiTexCoord2svARB = NULL; +PFNGLMULTITEXCOORD3DARBPROC glad_glMultiTexCoord3dARB = NULL; +PFNGLMULTITEXCOORD3DVARBPROC glad_glMultiTexCoord3dvARB = NULL; +PFNGLMULTITEXCOORD3FARBPROC glad_glMultiTexCoord3fARB = NULL; +PFNGLMULTITEXCOORD3FVARBPROC glad_glMultiTexCoord3fvARB = NULL; +PFNGLMULTITEXCOORD3IARBPROC glad_glMultiTexCoord3iARB = NULL; +PFNGLMULTITEXCOORD3IVARBPROC glad_glMultiTexCoord3ivARB = NULL; +PFNGLMULTITEXCOORD3SARBPROC glad_glMultiTexCoord3sARB = NULL; +PFNGLMULTITEXCOORD3SVARBPROC glad_glMultiTexCoord3svARB = NULL; +PFNGLMULTITEXCOORD4DARBPROC glad_glMultiTexCoord4dARB = NULL; +PFNGLMULTITEXCOORD4DVARBPROC glad_glMultiTexCoord4dvARB = NULL; +PFNGLMULTITEXCOORD4FARBPROC glad_glMultiTexCoord4fARB = NULL; +PFNGLMULTITEXCOORD4FVARBPROC glad_glMultiTexCoord4fvARB = NULL; +PFNGLMULTITEXCOORD4IARBPROC glad_glMultiTexCoord4iARB = NULL; +PFNGLMULTITEXCOORD4IVARBPROC glad_glMultiTexCoord4ivARB = NULL; +PFNGLMULTITEXCOORD4SARBPROC glad_glMultiTexCoord4sARB = NULL; +PFNGLMULTITEXCOORD4SVARBPROC glad_glMultiTexCoord4svARB = NULL; +PFNGLGENQUERIESARBPROC glad_glGenQueriesARB = NULL; +PFNGLDELETEQUERIESARBPROC glad_glDeleteQueriesARB = NULL; +PFNGLISQUERYARBPROC glad_glIsQueryARB = NULL; +PFNGLBEGINQUERYARBPROC glad_glBeginQueryARB = NULL; +PFNGLENDQUERYARBPROC glad_glEndQueryARB = NULL; +PFNGLGETQUERYIVARBPROC glad_glGetQueryivARB = NULL; +PFNGLGETQUERYOBJECTIVARBPROC glad_glGetQueryObjectivARB = NULL; +PFNGLGETQUERYOBJECTUIVARBPROC glad_glGetQueryObjectuivARB = NULL; +PFNGLMAXSHADERCOMPILERTHREADSARBPROC glad_glMaxShaderCompilerThreadsARB = NULL; +PFNGLPOINTPARAMETERFARBPROC glad_glPointParameterfARB = NULL; +PFNGLPOINTPARAMETERFVARBPROC glad_glPointParameterfvARB = NULL; +PFNGLPOLYGONOFFSETCLAMPPROC glad_glPolygonOffsetClamp = NULL; +PFNGLGETGRAPHICSRESETSTATUSARBPROC glad_glGetGraphicsResetStatusARB = NULL; +PFNGLGETNTEXIMAGEARBPROC glad_glGetnTexImageARB = NULL; +PFNGLREADNPIXELSARBPROC glad_glReadnPixelsARB = NULL; +PFNGLGETNCOMPRESSEDTEXIMAGEARBPROC glad_glGetnCompressedTexImageARB = NULL; +PFNGLGETNUNIFORMFVARBPROC glad_glGetnUniformfvARB = NULL; +PFNGLGETNUNIFORMIVARBPROC glad_glGetnUniformivARB = NULL; +PFNGLGETNUNIFORMUIVARBPROC glad_glGetnUniformuivARB = NULL; +PFNGLGETNUNIFORMDVARBPROC glad_glGetnUniformdvARB = NULL; +PFNGLGETNMAPDVARBPROC glad_glGetnMapdvARB = NULL; +PFNGLGETNMAPFVARBPROC glad_glGetnMapfvARB = NULL; +PFNGLGETNMAPIVARBPROC glad_glGetnMapivARB = NULL; +PFNGLGETNPIXELMAPFVARBPROC glad_glGetnPixelMapfvARB = NULL; +PFNGLGETNPIXELMAPUIVARBPROC glad_glGetnPixelMapuivARB = NULL; +PFNGLGETNPIXELMAPUSVARBPROC glad_glGetnPixelMapusvARB = NULL; +PFNGLGETNPOLYGONSTIPPLEARBPROC glad_glGetnPolygonStippleARB = NULL; +PFNGLGETNCOLORTABLEARBPROC glad_glGetnColorTableARB = NULL; +PFNGLGETNCONVOLUTIONFILTERARBPROC glad_glGetnConvolutionFilterARB = NULL; +PFNGLGETNSEPARABLEFILTERARBPROC glad_glGetnSeparableFilterARB = NULL; +PFNGLGETNHISTOGRAMARBPROC glad_glGetnHistogramARB = NULL; +PFNGLGETNMINMAXARBPROC glad_glGetnMinmaxARB = NULL; +PFNGLFRAMEBUFFERSAMPLELOCATIONSFVARBPROC glad_glFramebufferSampleLocationsfvARB = NULL; +PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVARBPROC glad_glNamedFramebufferSampleLocationsfvARB = NULL; +PFNGLEVALUATEDEPTHVALUESARBPROC glad_glEvaluateDepthValuesARB = NULL; +PFNGLMINSAMPLESHADINGARBPROC glad_glMinSampleShadingARB = NULL; +PFNGLDELETEOBJECTARBPROC glad_glDeleteObjectARB = NULL; +PFNGLGETHANDLEARBPROC glad_glGetHandleARB = NULL; +PFNGLDETACHOBJECTARBPROC glad_glDetachObjectARB = NULL; +PFNGLCREATESHADEROBJECTARBPROC glad_glCreateShaderObjectARB = NULL; +PFNGLSHADERSOURCEARBPROC glad_glShaderSourceARB = NULL; +PFNGLCOMPILESHADERARBPROC glad_glCompileShaderARB = NULL; +PFNGLCREATEPROGRAMOBJECTARBPROC glad_glCreateProgramObjectARB = NULL; +PFNGLATTACHOBJECTARBPROC glad_glAttachObjectARB = NULL; +PFNGLLINKPROGRAMARBPROC glad_glLinkProgramARB = NULL; +PFNGLUSEPROGRAMOBJECTARBPROC glad_glUseProgramObjectARB = NULL; +PFNGLVALIDATEPROGRAMARBPROC glad_glValidateProgramARB = NULL; +PFNGLUNIFORM1FARBPROC glad_glUniform1fARB = NULL; +PFNGLUNIFORM2FARBPROC glad_glUniform2fARB = NULL; +PFNGLUNIFORM3FARBPROC glad_glUniform3fARB = NULL; +PFNGLUNIFORM4FARBPROC glad_glUniform4fARB = NULL; +PFNGLUNIFORM1IARBPROC glad_glUniform1iARB = NULL; +PFNGLUNIFORM2IARBPROC glad_glUniform2iARB = NULL; +PFNGLUNIFORM3IARBPROC glad_glUniform3iARB = NULL; +PFNGLUNIFORM4IARBPROC glad_glUniform4iARB = NULL; +PFNGLUNIFORM1FVARBPROC glad_glUniform1fvARB = NULL; +PFNGLUNIFORM2FVARBPROC glad_glUniform2fvARB = NULL; +PFNGLUNIFORM3FVARBPROC glad_glUniform3fvARB = NULL; +PFNGLUNIFORM4FVARBPROC glad_glUniform4fvARB = NULL; +PFNGLUNIFORM1IVARBPROC glad_glUniform1ivARB = NULL; +PFNGLUNIFORM2IVARBPROC glad_glUniform2ivARB = NULL; +PFNGLUNIFORM3IVARBPROC glad_glUniform3ivARB = NULL; +PFNGLUNIFORM4IVARBPROC glad_glUniform4ivARB = NULL; +PFNGLUNIFORMMATRIX2FVARBPROC glad_glUniformMatrix2fvARB = NULL; +PFNGLUNIFORMMATRIX3FVARBPROC glad_glUniformMatrix3fvARB = NULL; +PFNGLUNIFORMMATRIX4FVARBPROC glad_glUniformMatrix4fvARB = NULL; +PFNGLGETOBJECTPARAMETERFVARBPROC glad_glGetObjectParameterfvARB = NULL; +PFNGLGETOBJECTPARAMETERIVARBPROC glad_glGetObjectParameterivARB = NULL; +PFNGLGETINFOLOGARBPROC glad_glGetInfoLogARB = NULL; +PFNGLGETATTACHEDOBJECTSARBPROC glad_glGetAttachedObjectsARB = NULL; +PFNGLGETUNIFORMLOCATIONARBPROC glad_glGetUniformLocationARB = NULL; +PFNGLGETACTIVEUNIFORMARBPROC glad_glGetActiveUniformARB = NULL; +PFNGLGETUNIFORMFVARBPROC glad_glGetUniformfvARB = NULL; +PFNGLGETUNIFORMIVARBPROC glad_glGetUniformivARB = NULL; +PFNGLGETSHADERSOURCEARBPROC glad_glGetShaderSourceARB = NULL; +PFNGLNAMEDSTRINGARBPROC glad_glNamedStringARB = NULL; +PFNGLDELETENAMEDSTRINGARBPROC glad_glDeleteNamedStringARB = NULL; +PFNGLCOMPILESHADERINCLUDEARBPROC glad_glCompileShaderIncludeARB = NULL; +PFNGLISNAMEDSTRINGARBPROC glad_glIsNamedStringARB = NULL; +PFNGLGETNAMEDSTRINGARBPROC glad_glGetNamedStringARB = NULL; +PFNGLGETNAMEDSTRINGIVARBPROC glad_glGetNamedStringivARB = NULL; +PFNGLBUFFERPAGECOMMITMENTARBPROC glad_glBufferPageCommitmentARB = NULL; +PFNGLNAMEDBUFFERPAGECOMMITMENTEXTPROC glad_glNamedBufferPageCommitmentEXT = NULL; +PFNGLNAMEDBUFFERPAGECOMMITMENTARBPROC glad_glNamedBufferPageCommitmentARB = NULL; +PFNGLTEXPAGECOMMITMENTARBPROC glad_glTexPageCommitmentARB = NULL; +PFNGLTEXBUFFERARBPROC glad_glTexBufferARB = NULL; +PFNGLCOMPRESSEDTEXIMAGE3DARBPROC glad_glCompressedTexImage3DARB = NULL; +PFNGLCOMPRESSEDTEXIMAGE2DARBPROC glad_glCompressedTexImage2DARB = NULL; +PFNGLCOMPRESSEDTEXIMAGE1DARBPROC glad_glCompressedTexImage1DARB = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE3DARBPROC glad_glCompressedTexSubImage3DARB = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE2DARBPROC glad_glCompressedTexSubImage2DARB = NULL; +PFNGLCOMPRESSEDTEXSUBIMAGE1DARBPROC glad_glCompressedTexSubImage1DARB = NULL; +PFNGLGETCOMPRESSEDTEXIMAGEARBPROC glad_glGetCompressedTexImageARB = NULL; +PFNGLLOADTRANSPOSEMATRIXFARBPROC glad_glLoadTransposeMatrixfARB = NULL; +PFNGLLOADTRANSPOSEMATRIXDARBPROC glad_glLoadTransposeMatrixdARB = NULL; +PFNGLMULTTRANSPOSEMATRIXFARBPROC glad_glMultTransposeMatrixfARB = NULL; +PFNGLMULTTRANSPOSEMATRIXDARBPROC glad_glMultTransposeMatrixdARB = NULL; +PFNGLWEIGHTBVARBPROC glad_glWeightbvARB = NULL; +PFNGLWEIGHTSVARBPROC glad_glWeightsvARB = NULL; +PFNGLWEIGHTIVARBPROC glad_glWeightivARB = NULL; +PFNGLWEIGHTFVARBPROC glad_glWeightfvARB = NULL; +PFNGLWEIGHTDVARBPROC glad_glWeightdvARB = NULL; +PFNGLWEIGHTUBVARBPROC glad_glWeightubvARB = NULL; +PFNGLWEIGHTUSVARBPROC glad_glWeightusvARB = NULL; +PFNGLWEIGHTUIVARBPROC glad_glWeightuivARB = NULL; +PFNGLWEIGHTPOINTERARBPROC glad_glWeightPointerARB = NULL; +PFNGLVERTEXBLENDARBPROC glad_glVertexBlendARB = NULL; +PFNGLBINDBUFFERARBPROC glad_glBindBufferARB = NULL; +PFNGLDELETEBUFFERSARBPROC glad_glDeleteBuffersARB = NULL; +PFNGLGENBUFFERSARBPROC glad_glGenBuffersARB = NULL; +PFNGLISBUFFERARBPROC glad_glIsBufferARB = NULL; +PFNGLBUFFERDATAARBPROC glad_glBufferDataARB = NULL; +PFNGLBUFFERSUBDATAARBPROC glad_glBufferSubDataARB = NULL; +PFNGLGETBUFFERSUBDATAARBPROC glad_glGetBufferSubDataARB = NULL; +PFNGLMAPBUFFERARBPROC glad_glMapBufferARB = NULL; +PFNGLUNMAPBUFFERARBPROC glad_glUnmapBufferARB = NULL; +PFNGLGETBUFFERPARAMETERIVARBPROC glad_glGetBufferParameterivARB = NULL; +PFNGLGETBUFFERPOINTERVARBPROC glad_glGetBufferPointervARB = NULL; +PFNGLVERTEXATTRIB1DARBPROC glad_glVertexAttrib1dARB = NULL; +PFNGLVERTEXATTRIB1DVARBPROC glad_glVertexAttrib1dvARB = NULL; +PFNGLVERTEXATTRIB1FARBPROC glad_glVertexAttrib1fARB = NULL; +PFNGLVERTEXATTRIB1FVARBPROC glad_glVertexAttrib1fvARB = NULL; +PFNGLVERTEXATTRIB1SARBPROC glad_glVertexAttrib1sARB = NULL; +PFNGLVERTEXATTRIB1SVARBPROC glad_glVertexAttrib1svARB = NULL; +PFNGLVERTEXATTRIB2DARBPROC glad_glVertexAttrib2dARB = NULL; +PFNGLVERTEXATTRIB2DVARBPROC glad_glVertexAttrib2dvARB = NULL; +PFNGLVERTEXATTRIB2FARBPROC glad_glVertexAttrib2fARB = NULL; +PFNGLVERTEXATTRIB2FVARBPROC glad_glVertexAttrib2fvARB = NULL; +PFNGLVERTEXATTRIB2SARBPROC glad_glVertexAttrib2sARB = NULL; +PFNGLVERTEXATTRIB2SVARBPROC glad_glVertexAttrib2svARB = NULL; +PFNGLVERTEXATTRIB3DARBPROC glad_glVertexAttrib3dARB = NULL; +PFNGLVERTEXATTRIB3DVARBPROC glad_glVertexAttrib3dvARB = NULL; +PFNGLVERTEXATTRIB3FARBPROC glad_glVertexAttrib3fARB = NULL; +PFNGLVERTEXATTRIB3FVARBPROC glad_glVertexAttrib3fvARB = NULL; +PFNGLVERTEXATTRIB3SARBPROC glad_glVertexAttrib3sARB = NULL; +PFNGLVERTEXATTRIB3SVARBPROC glad_glVertexAttrib3svARB = NULL; +PFNGLVERTEXATTRIB4NBVARBPROC glad_glVertexAttrib4NbvARB = NULL; +PFNGLVERTEXATTRIB4NIVARBPROC glad_glVertexAttrib4NivARB = NULL; +PFNGLVERTEXATTRIB4NSVARBPROC glad_glVertexAttrib4NsvARB = NULL; +PFNGLVERTEXATTRIB4NUBARBPROC glad_glVertexAttrib4NubARB = NULL; +PFNGLVERTEXATTRIB4NUBVARBPROC glad_glVertexAttrib4NubvARB = NULL; +PFNGLVERTEXATTRIB4NUIVARBPROC glad_glVertexAttrib4NuivARB = NULL; +PFNGLVERTEXATTRIB4NUSVARBPROC glad_glVertexAttrib4NusvARB = NULL; +PFNGLVERTEXATTRIB4BVARBPROC glad_glVertexAttrib4bvARB = NULL; +PFNGLVERTEXATTRIB4DARBPROC glad_glVertexAttrib4dARB = NULL; +PFNGLVERTEXATTRIB4DVARBPROC glad_glVertexAttrib4dvARB = NULL; +PFNGLVERTEXATTRIB4FARBPROC glad_glVertexAttrib4fARB = NULL; +PFNGLVERTEXATTRIB4FVARBPROC glad_glVertexAttrib4fvARB = NULL; +PFNGLVERTEXATTRIB4IVARBPROC glad_glVertexAttrib4ivARB = NULL; +PFNGLVERTEXATTRIB4SARBPROC glad_glVertexAttrib4sARB = NULL; +PFNGLVERTEXATTRIB4SVARBPROC glad_glVertexAttrib4svARB = NULL; +PFNGLVERTEXATTRIB4UBVARBPROC glad_glVertexAttrib4ubvARB = NULL; +PFNGLVERTEXATTRIB4UIVARBPROC glad_glVertexAttrib4uivARB = NULL; +PFNGLVERTEXATTRIB4USVARBPROC glad_glVertexAttrib4usvARB = NULL; +PFNGLVERTEXATTRIBPOINTERARBPROC glad_glVertexAttribPointerARB = NULL; +PFNGLENABLEVERTEXATTRIBARRAYARBPROC glad_glEnableVertexAttribArrayARB = NULL; +PFNGLDISABLEVERTEXATTRIBARRAYARBPROC glad_glDisableVertexAttribArrayARB = NULL; +PFNGLGETVERTEXATTRIBDVARBPROC glad_glGetVertexAttribdvARB = NULL; +PFNGLGETVERTEXATTRIBFVARBPROC glad_glGetVertexAttribfvARB = NULL; +PFNGLGETVERTEXATTRIBIVARBPROC glad_glGetVertexAttribivARB = NULL; +PFNGLGETVERTEXATTRIBPOINTERVARBPROC glad_glGetVertexAttribPointervARB = NULL; +PFNGLBINDATTRIBLOCATIONARBPROC glad_glBindAttribLocationARB = NULL; +PFNGLGETACTIVEATTRIBARBPROC glad_glGetActiveAttribARB = NULL; +PFNGLGETATTRIBLOCATIONARBPROC glad_glGetAttribLocationARB = NULL; +PFNGLDEPTHRANGEARRAYDVNVPROC glad_glDepthRangeArraydvNV = NULL; +PFNGLDEPTHRANGEINDEXEDDNVPROC glad_glDepthRangeIndexeddNV = NULL; +PFNGLWINDOWPOS2DARBPROC glad_glWindowPos2dARB = NULL; +PFNGLWINDOWPOS2DVARBPROC glad_glWindowPos2dvARB = NULL; +PFNGLWINDOWPOS2FARBPROC glad_glWindowPos2fARB = NULL; +PFNGLWINDOWPOS2FVARBPROC glad_glWindowPos2fvARB = NULL; +PFNGLWINDOWPOS2IARBPROC glad_glWindowPos2iARB = NULL; +PFNGLWINDOWPOS2IVARBPROC glad_glWindowPos2ivARB = NULL; +PFNGLWINDOWPOS2SARBPROC glad_glWindowPos2sARB = NULL; +PFNGLWINDOWPOS2SVARBPROC glad_glWindowPos2svARB = NULL; +PFNGLWINDOWPOS3DARBPROC glad_glWindowPos3dARB = NULL; +PFNGLWINDOWPOS3DVARBPROC glad_glWindowPos3dvARB = NULL; +PFNGLWINDOWPOS3FARBPROC glad_glWindowPos3fARB = NULL; +PFNGLWINDOWPOS3FVARBPROC glad_glWindowPos3fvARB = NULL; +PFNGLWINDOWPOS3IARBPROC glad_glWindowPos3iARB = NULL; +PFNGLWINDOWPOS3IVARBPROC glad_glWindowPos3ivARB = NULL; +PFNGLWINDOWPOS3SARBPROC glad_glWindowPos3sARB = NULL; +PFNGLWINDOWPOS3SVARBPROC glad_glWindowPos3svARB = NULL; +PFNGLDRAWBUFFERSATIPROC glad_glDrawBuffersATI = NULL; +PFNGLELEMENTPOINTERATIPROC glad_glElementPointerATI = NULL; +PFNGLDRAWELEMENTARRAYATIPROC glad_glDrawElementArrayATI = NULL; +PFNGLDRAWRANGEELEMENTARRAYATIPROC glad_glDrawRangeElementArrayATI = NULL; +PFNGLTEXBUMPPARAMETERIVATIPROC glad_glTexBumpParameterivATI = NULL; +PFNGLTEXBUMPPARAMETERFVATIPROC glad_glTexBumpParameterfvATI = NULL; +PFNGLGETTEXBUMPPARAMETERIVATIPROC glad_glGetTexBumpParameterivATI = NULL; +PFNGLGETTEXBUMPPARAMETERFVATIPROC glad_glGetTexBumpParameterfvATI = NULL; +PFNGLGENFRAGMENTSHADERSATIPROC glad_glGenFragmentShadersATI = NULL; +PFNGLBINDFRAGMENTSHADERATIPROC glad_glBindFragmentShaderATI = NULL; +PFNGLDELETEFRAGMENTSHADERATIPROC glad_glDeleteFragmentShaderATI = NULL; +PFNGLBEGINFRAGMENTSHADERATIPROC glad_glBeginFragmentShaderATI = NULL; +PFNGLENDFRAGMENTSHADERATIPROC glad_glEndFragmentShaderATI = NULL; +PFNGLPASSTEXCOORDATIPROC glad_glPassTexCoordATI = NULL; +PFNGLSAMPLEMAPATIPROC glad_glSampleMapATI = NULL; +PFNGLCOLORFRAGMENTOP1ATIPROC glad_glColorFragmentOp1ATI = NULL; +PFNGLCOLORFRAGMENTOP2ATIPROC glad_glColorFragmentOp2ATI = NULL; +PFNGLCOLORFRAGMENTOP3ATIPROC glad_glColorFragmentOp3ATI = NULL; +PFNGLALPHAFRAGMENTOP1ATIPROC glad_glAlphaFragmentOp1ATI = NULL; +PFNGLALPHAFRAGMENTOP2ATIPROC glad_glAlphaFragmentOp2ATI = NULL; +PFNGLALPHAFRAGMENTOP3ATIPROC glad_glAlphaFragmentOp3ATI = NULL; +PFNGLSETFRAGMENTSHADERCONSTANTATIPROC glad_glSetFragmentShaderConstantATI = NULL; +PFNGLMAPOBJECTBUFFERATIPROC glad_glMapObjectBufferATI = NULL; +PFNGLUNMAPOBJECTBUFFERATIPROC glad_glUnmapObjectBufferATI = NULL; +PFNGLPNTRIANGLESIATIPROC glad_glPNTrianglesiATI = NULL; +PFNGLPNTRIANGLESFATIPROC glad_glPNTrianglesfATI = NULL; +PFNGLSTENCILOPSEPARATEATIPROC glad_glStencilOpSeparateATI = NULL; +PFNGLSTENCILFUNCSEPARATEATIPROC glad_glStencilFuncSeparateATI = NULL; +PFNGLNEWOBJECTBUFFERATIPROC glad_glNewObjectBufferATI = NULL; +PFNGLISOBJECTBUFFERATIPROC glad_glIsObjectBufferATI = NULL; +PFNGLUPDATEOBJECTBUFFERATIPROC glad_glUpdateObjectBufferATI = NULL; +PFNGLGETOBJECTBUFFERFVATIPROC glad_glGetObjectBufferfvATI = NULL; +PFNGLGETOBJECTBUFFERIVATIPROC glad_glGetObjectBufferivATI = NULL; +PFNGLFREEOBJECTBUFFERATIPROC glad_glFreeObjectBufferATI = NULL; +PFNGLARRAYOBJECTATIPROC glad_glArrayObjectATI = NULL; +PFNGLGETARRAYOBJECTFVATIPROC glad_glGetArrayObjectfvATI = NULL; +PFNGLGETARRAYOBJECTIVATIPROC glad_glGetArrayObjectivATI = NULL; +PFNGLVARIANTARRAYOBJECTATIPROC glad_glVariantArrayObjectATI = NULL; +PFNGLGETVARIANTARRAYOBJECTFVATIPROC glad_glGetVariantArrayObjectfvATI = NULL; +PFNGLGETVARIANTARRAYOBJECTIVATIPROC glad_glGetVariantArrayObjectivATI = NULL; +PFNGLVERTEXATTRIBARRAYOBJECTATIPROC glad_glVertexAttribArrayObjectATI = NULL; +PFNGLGETVERTEXATTRIBARRAYOBJECTFVATIPROC glad_glGetVertexAttribArrayObjectfvATI = NULL; +PFNGLGETVERTEXATTRIBARRAYOBJECTIVATIPROC glad_glGetVertexAttribArrayObjectivATI = NULL; +PFNGLVERTEXSTREAM1SATIPROC glad_glVertexStream1sATI = NULL; +PFNGLVERTEXSTREAM1SVATIPROC glad_glVertexStream1svATI = NULL; +PFNGLVERTEXSTREAM1IATIPROC glad_glVertexStream1iATI = NULL; +PFNGLVERTEXSTREAM1IVATIPROC glad_glVertexStream1ivATI = NULL; +PFNGLVERTEXSTREAM1FATIPROC glad_glVertexStream1fATI = NULL; +PFNGLVERTEXSTREAM1FVATIPROC glad_glVertexStream1fvATI = NULL; +PFNGLVERTEXSTREAM1DATIPROC glad_glVertexStream1dATI = NULL; +PFNGLVERTEXSTREAM1DVATIPROC glad_glVertexStream1dvATI = NULL; +PFNGLVERTEXSTREAM2SATIPROC glad_glVertexStream2sATI = NULL; +PFNGLVERTEXSTREAM2SVATIPROC glad_glVertexStream2svATI = NULL; +PFNGLVERTEXSTREAM2IATIPROC glad_glVertexStream2iATI = NULL; +PFNGLVERTEXSTREAM2IVATIPROC glad_glVertexStream2ivATI = NULL; +PFNGLVERTEXSTREAM2FATIPROC glad_glVertexStream2fATI = NULL; +PFNGLVERTEXSTREAM2FVATIPROC glad_glVertexStream2fvATI = NULL; +PFNGLVERTEXSTREAM2DATIPROC glad_glVertexStream2dATI = NULL; +PFNGLVERTEXSTREAM2DVATIPROC glad_glVertexStream2dvATI = NULL; +PFNGLVERTEXSTREAM3SATIPROC glad_glVertexStream3sATI = NULL; +PFNGLVERTEXSTREAM3SVATIPROC glad_glVertexStream3svATI = NULL; +PFNGLVERTEXSTREAM3IATIPROC glad_glVertexStream3iATI = NULL; +PFNGLVERTEXSTREAM3IVATIPROC glad_glVertexStream3ivATI = NULL; +PFNGLVERTEXSTREAM3FATIPROC glad_glVertexStream3fATI = NULL; +PFNGLVERTEXSTREAM3FVATIPROC glad_glVertexStream3fvATI = NULL; +PFNGLVERTEXSTREAM3DATIPROC glad_glVertexStream3dATI = NULL; +PFNGLVERTEXSTREAM3DVATIPROC glad_glVertexStream3dvATI = NULL; +PFNGLVERTEXSTREAM4SATIPROC glad_glVertexStream4sATI = NULL; +PFNGLVERTEXSTREAM4SVATIPROC glad_glVertexStream4svATI = NULL; +PFNGLVERTEXSTREAM4IATIPROC glad_glVertexStream4iATI = NULL; +PFNGLVERTEXSTREAM4IVATIPROC glad_glVertexStream4ivATI = NULL; +PFNGLVERTEXSTREAM4FATIPROC glad_glVertexStream4fATI = NULL; +PFNGLVERTEXSTREAM4FVATIPROC glad_glVertexStream4fvATI = NULL; +PFNGLVERTEXSTREAM4DATIPROC glad_glVertexStream4dATI = NULL; +PFNGLVERTEXSTREAM4DVATIPROC glad_glVertexStream4dvATI = NULL; +PFNGLNORMALSTREAM3BATIPROC glad_glNormalStream3bATI = NULL; +PFNGLNORMALSTREAM3BVATIPROC glad_glNormalStream3bvATI = NULL; +PFNGLNORMALSTREAM3SATIPROC glad_glNormalStream3sATI = NULL; +PFNGLNORMALSTREAM3SVATIPROC glad_glNormalStream3svATI = NULL; +PFNGLNORMALSTREAM3IATIPROC glad_glNormalStream3iATI = NULL; +PFNGLNORMALSTREAM3IVATIPROC glad_glNormalStream3ivATI = NULL; +PFNGLNORMALSTREAM3FATIPROC glad_glNormalStream3fATI = NULL; +PFNGLNORMALSTREAM3FVATIPROC glad_glNormalStream3fvATI = NULL; +PFNGLNORMALSTREAM3DATIPROC glad_glNormalStream3dATI = NULL; +PFNGLNORMALSTREAM3DVATIPROC glad_glNormalStream3dvATI = NULL; +PFNGLCLIENTACTIVEVERTEXSTREAMATIPROC glad_glClientActiveVertexStreamATI = NULL; +PFNGLVERTEXBLENDENVIATIPROC glad_glVertexBlendEnviATI = NULL; +PFNGLVERTEXBLENDENVFATIPROC glad_glVertexBlendEnvfATI = NULL; PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC glad_glEGLImageTargetTexStorageEXT = NULL; PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC glad_glEGLImageTargetTextureStorageEXT = NULL; -PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEEXTPROC glad_glDrawArraysInstancedBaseInstanceEXT = NULL; -PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEEXTPROC glad_glDrawElementsInstancedBaseInstanceEXT = NULL; -PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEEXTPROC glad_glDrawElementsInstancedBaseVertexBaseInstanceEXT = NULL; -PFNGLBINDFRAGDATALOCATIONINDEXEDEXTPROC glad_glBindFragDataLocationIndexedEXT = NULL; -PFNGLBINDFRAGDATALOCATIONEXTPROC glad_glBindFragDataLocationEXT = NULL; -PFNGLGETPROGRAMRESOURCELOCATIONINDEXEXTPROC glad_glGetProgramResourceLocationIndexEXT = NULL; -PFNGLGETFRAGDATAINDEXEXTPROC glad_glGetFragDataIndexEXT = NULL; +PFNGLUNIFORMBUFFEREXTPROC glad_glUniformBufferEXT = NULL; +PFNGLGETUNIFORMBUFFERSIZEEXTPROC glad_glGetUniformBufferSizeEXT = NULL; +PFNGLGETUNIFORMOFFSETEXTPROC glad_glGetUniformOffsetEXT = NULL; +PFNGLBLENDCOLOREXTPROC glad_glBlendColorEXT = NULL; +PFNGLBLENDEQUATIONSEPARATEEXTPROC glad_glBlendEquationSeparateEXT = NULL; +PFNGLBLENDFUNCSEPARATEEXTPROC glad_glBlendFuncSeparateEXT = NULL; PFNGLBLENDEQUATIONEXTPROC glad_glBlendEquationEXT = NULL; -PFNGLBUFFERSTORAGEEXTPROC glad_glBufferStorageEXT = NULL; -PFNGLCLEARTEXIMAGEEXTPROC glad_glClearTexImageEXT = NULL; -PFNGLCLEARTEXSUBIMAGEEXTPROC glad_glClearTexSubImageEXT = NULL; -PFNGLCLIPCONTROLEXTPROC glad_glClipControlEXT = NULL; -PFNGLCOPYIMAGESUBDATAEXTPROC glad_glCopyImageSubDataEXT = NULL; +PFNGLCOLORSUBTABLEEXTPROC glad_glColorSubTableEXT = NULL; +PFNGLCOPYCOLORSUBTABLEEXTPROC glad_glCopyColorSubTableEXT = NULL; +PFNGLLOCKARRAYSEXTPROC glad_glLockArraysEXT = NULL; +PFNGLUNLOCKARRAYSEXTPROC glad_glUnlockArraysEXT = NULL; +PFNGLCONVOLUTIONFILTER1DEXTPROC glad_glConvolutionFilter1DEXT = NULL; +PFNGLCONVOLUTIONFILTER2DEXTPROC glad_glConvolutionFilter2DEXT = NULL; +PFNGLCONVOLUTIONPARAMETERFEXTPROC glad_glConvolutionParameterfEXT = NULL; +PFNGLCONVOLUTIONPARAMETERFVEXTPROC glad_glConvolutionParameterfvEXT = NULL; +PFNGLCONVOLUTIONPARAMETERIEXTPROC glad_glConvolutionParameteriEXT = NULL; +PFNGLCONVOLUTIONPARAMETERIVEXTPROC glad_glConvolutionParameterivEXT = NULL; +PFNGLCOPYCONVOLUTIONFILTER1DEXTPROC glad_glCopyConvolutionFilter1DEXT = NULL; +PFNGLCOPYCONVOLUTIONFILTER2DEXTPROC glad_glCopyConvolutionFilter2DEXT = NULL; +PFNGLGETCONVOLUTIONFILTEREXTPROC glad_glGetConvolutionFilterEXT = NULL; +PFNGLGETCONVOLUTIONPARAMETERFVEXTPROC glad_glGetConvolutionParameterfvEXT = NULL; +PFNGLGETCONVOLUTIONPARAMETERIVEXTPROC glad_glGetConvolutionParameterivEXT = NULL; +PFNGLGETSEPARABLEFILTEREXTPROC glad_glGetSeparableFilterEXT = NULL; +PFNGLSEPARABLEFILTER2DEXTPROC glad_glSeparableFilter2DEXT = NULL; +PFNGLTANGENT3BEXTPROC glad_glTangent3bEXT = NULL; +PFNGLTANGENT3BVEXTPROC glad_glTangent3bvEXT = NULL; +PFNGLTANGENT3DEXTPROC glad_glTangent3dEXT = NULL; +PFNGLTANGENT3DVEXTPROC glad_glTangent3dvEXT = NULL; +PFNGLTANGENT3FEXTPROC glad_glTangent3fEXT = NULL; +PFNGLTANGENT3FVEXTPROC glad_glTangent3fvEXT = NULL; +PFNGLTANGENT3IEXTPROC glad_glTangent3iEXT = NULL; +PFNGLTANGENT3IVEXTPROC glad_glTangent3ivEXT = NULL; +PFNGLTANGENT3SEXTPROC glad_glTangent3sEXT = NULL; +PFNGLTANGENT3SVEXTPROC glad_glTangent3svEXT = NULL; +PFNGLBINORMAL3BEXTPROC glad_glBinormal3bEXT = NULL; +PFNGLBINORMAL3BVEXTPROC glad_glBinormal3bvEXT = NULL; +PFNGLBINORMAL3DEXTPROC glad_glBinormal3dEXT = NULL; +PFNGLBINORMAL3DVEXTPROC glad_glBinormal3dvEXT = NULL; +PFNGLBINORMAL3FEXTPROC glad_glBinormal3fEXT = NULL; +PFNGLBINORMAL3FVEXTPROC glad_glBinormal3fvEXT = NULL; +PFNGLBINORMAL3IEXTPROC glad_glBinormal3iEXT = NULL; +PFNGLBINORMAL3IVEXTPROC glad_glBinormal3ivEXT = NULL; +PFNGLBINORMAL3SEXTPROC glad_glBinormal3sEXT = NULL; +PFNGLBINORMAL3SVEXTPROC glad_glBinormal3svEXT = NULL; +PFNGLTANGENTPOINTEREXTPROC glad_glTangentPointerEXT = NULL; +PFNGLBINORMALPOINTEREXTPROC glad_glBinormalPointerEXT = NULL; +PFNGLCOPYTEXIMAGE1DEXTPROC glad_glCopyTexImage1DEXT = NULL; +PFNGLCOPYTEXIMAGE2DEXTPROC glad_glCopyTexImage2DEXT = NULL; +PFNGLCOPYTEXSUBIMAGE1DEXTPROC glad_glCopyTexSubImage1DEXT = NULL; +PFNGLCOPYTEXSUBIMAGE2DEXTPROC glad_glCopyTexSubImage2DEXT = NULL; +PFNGLCOPYTEXSUBIMAGE3DEXTPROC glad_glCopyTexSubImage3DEXT = NULL; +PFNGLCULLPARAMETERDVEXTPROC glad_glCullParameterdvEXT = NULL; +PFNGLCULLPARAMETERFVEXTPROC glad_glCullParameterfvEXT = NULL; PFNGLLABELOBJECTEXTPROC glad_glLabelObjectEXT = NULL; PFNGLGETOBJECTLABELEXTPROC glad_glGetObjectLabelEXT = NULL; PFNGLINSERTEVENTMARKEREXTPROC glad_glInsertEventMarkerEXT = NULL; PFNGLPUSHGROUPMARKEREXTPROC glad_glPushGroupMarkerEXT = NULL; PFNGLPOPGROUPMARKEREXTPROC glad_glPopGroupMarkerEXT = NULL; -PFNGLDISCARDFRAMEBUFFEREXTPROC glad_glDiscardFramebufferEXT = NULL; -PFNGLGENQUERIESEXTPROC glad_glGenQueriesEXT = NULL; -PFNGLDELETEQUERIESEXTPROC glad_glDeleteQueriesEXT = NULL; -PFNGLISQUERYEXTPROC glad_glIsQueryEXT = NULL; -PFNGLBEGINQUERYEXTPROC glad_glBeginQueryEXT = NULL; -PFNGLENDQUERYEXTPROC glad_glEndQueryEXT = NULL; -PFNGLQUERYCOUNTEREXTPROC glad_glQueryCounterEXT = NULL; -PFNGLGETQUERYIVEXTPROC glad_glGetQueryivEXT = NULL; -PFNGLGETQUERYOBJECTIVEXTPROC glad_glGetQueryObjectivEXT = NULL; -PFNGLGETQUERYOBJECTUIVEXTPROC glad_glGetQueryObjectuivEXT = NULL; -PFNGLGETQUERYOBJECTI64VEXTPROC glad_glGetQueryObjecti64vEXT = NULL; -PFNGLGETQUERYOBJECTUI64VEXTPROC glad_glGetQueryObjectui64vEXT = NULL; -PFNGLGETINTEGER64VEXTPROC glad_glGetInteger64vEXT = NULL; -PFNGLDRAWBUFFERSEXTPROC glad_glDrawBuffersEXT = NULL; -PFNGLENABLEIEXTPROC glad_glEnableiEXT = NULL; -PFNGLDISABLEIEXTPROC glad_glDisableiEXT = NULL; -PFNGLBLENDEQUATIONIEXTPROC glad_glBlendEquationiEXT = NULL; -PFNGLBLENDEQUATIONSEPARATEIEXTPROC glad_glBlendEquationSeparateiEXT = NULL; -PFNGLBLENDFUNCIEXTPROC glad_glBlendFunciEXT = NULL; -PFNGLBLENDFUNCSEPARATEIEXTPROC glad_glBlendFuncSeparateiEXT = NULL; -PFNGLCOLORMASKIEXTPROC glad_glColorMaskiEXT = NULL; -PFNGLISENABLEDIEXTPROC glad_glIsEnablediEXT = NULL; -PFNGLDRAWELEMENTSBASEVERTEXEXTPROC glad_glDrawElementsBaseVertexEXT = NULL; -PFNGLDRAWRANGEELEMENTSBASEVERTEXEXTPROC glad_glDrawRangeElementsBaseVertexEXT = NULL; -PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXEXTPROC glad_glDrawElementsInstancedBaseVertexEXT = NULL; -PFNGLMULTIDRAWELEMENTSBASEVERTEXEXTPROC glad_glMultiDrawElementsBaseVertexEXT = NULL; +PFNGLDEPTHBOUNDSEXTPROC glad_glDepthBoundsEXT = NULL; +PFNGLMATRIXLOADFEXTPROC glad_glMatrixLoadfEXT = NULL; +PFNGLMATRIXLOADDEXTPROC glad_glMatrixLoaddEXT = NULL; +PFNGLMATRIXMULTFEXTPROC glad_glMatrixMultfEXT = NULL; +PFNGLMATRIXMULTDEXTPROC glad_glMatrixMultdEXT = NULL; +PFNGLMATRIXLOADIDENTITYEXTPROC glad_glMatrixLoadIdentityEXT = NULL; +PFNGLMATRIXROTATEFEXTPROC glad_glMatrixRotatefEXT = NULL; +PFNGLMATRIXROTATEDEXTPROC glad_glMatrixRotatedEXT = NULL; +PFNGLMATRIXSCALEFEXTPROC glad_glMatrixScalefEXT = NULL; +PFNGLMATRIXSCALEDEXTPROC glad_glMatrixScaledEXT = NULL; +PFNGLMATRIXTRANSLATEFEXTPROC glad_glMatrixTranslatefEXT = NULL; +PFNGLMATRIXTRANSLATEDEXTPROC glad_glMatrixTranslatedEXT = NULL; +PFNGLMATRIXFRUSTUMEXTPROC glad_glMatrixFrustumEXT = NULL; +PFNGLMATRIXORTHOEXTPROC glad_glMatrixOrthoEXT = NULL; +PFNGLMATRIXPOPEXTPROC glad_glMatrixPopEXT = NULL; +PFNGLMATRIXPUSHEXTPROC glad_glMatrixPushEXT = NULL; +PFNGLCLIENTATTRIBDEFAULTEXTPROC glad_glClientAttribDefaultEXT = NULL; +PFNGLPUSHCLIENTATTRIBDEFAULTEXTPROC glad_glPushClientAttribDefaultEXT = NULL; +PFNGLTEXTUREPARAMETERFEXTPROC glad_glTextureParameterfEXT = NULL; +PFNGLTEXTUREPARAMETERFVEXTPROC glad_glTextureParameterfvEXT = NULL; +PFNGLTEXTUREPARAMETERIEXTPROC glad_glTextureParameteriEXT = NULL; +PFNGLTEXTUREPARAMETERIVEXTPROC glad_glTextureParameterivEXT = NULL; +PFNGLTEXTUREIMAGE1DEXTPROC glad_glTextureImage1DEXT = NULL; +PFNGLTEXTUREIMAGE2DEXTPROC glad_glTextureImage2DEXT = NULL; +PFNGLTEXTURESUBIMAGE1DEXTPROC glad_glTextureSubImage1DEXT = NULL; +PFNGLTEXTURESUBIMAGE2DEXTPROC glad_glTextureSubImage2DEXT = NULL; +PFNGLCOPYTEXTUREIMAGE1DEXTPROC glad_glCopyTextureImage1DEXT = NULL; +PFNGLCOPYTEXTUREIMAGE2DEXTPROC glad_glCopyTextureImage2DEXT = NULL; +PFNGLCOPYTEXTURESUBIMAGE1DEXTPROC glad_glCopyTextureSubImage1DEXT = NULL; +PFNGLCOPYTEXTURESUBIMAGE2DEXTPROC glad_glCopyTextureSubImage2DEXT = NULL; +PFNGLGETTEXTUREIMAGEEXTPROC glad_glGetTextureImageEXT = NULL; +PFNGLGETTEXTUREPARAMETERFVEXTPROC glad_glGetTextureParameterfvEXT = NULL; +PFNGLGETTEXTUREPARAMETERIVEXTPROC glad_glGetTextureParameterivEXT = NULL; +PFNGLGETTEXTURELEVELPARAMETERFVEXTPROC glad_glGetTextureLevelParameterfvEXT = NULL; +PFNGLGETTEXTURELEVELPARAMETERIVEXTPROC glad_glGetTextureLevelParameterivEXT = NULL; +PFNGLTEXTUREIMAGE3DEXTPROC glad_glTextureImage3DEXT = NULL; +PFNGLTEXTURESUBIMAGE3DEXTPROC glad_glTextureSubImage3DEXT = NULL; +PFNGLCOPYTEXTURESUBIMAGE3DEXTPROC glad_glCopyTextureSubImage3DEXT = NULL; +PFNGLBINDMULTITEXTUREEXTPROC glad_glBindMultiTextureEXT = NULL; +PFNGLMULTITEXCOORDPOINTEREXTPROC glad_glMultiTexCoordPointerEXT = NULL; +PFNGLMULTITEXENVFEXTPROC glad_glMultiTexEnvfEXT = NULL; +PFNGLMULTITEXENVFVEXTPROC glad_glMultiTexEnvfvEXT = NULL; +PFNGLMULTITEXENVIEXTPROC glad_glMultiTexEnviEXT = NULL; +PFNGLMULTITEXENVIVEXTPROC glad_glMultiTexEnvivEXT = NULL; +PFNGLMULTITEXGENDEXTPROC glad_glMultiTexGendEXT = NULL; +PFNGLMULTITEXGENDVEXTPROC glad_glMultiTexGendvEXT = NULL; +PFNGLMULTITEXGENFEXTPROC glad_glMultiTexGenfEXT = NULL; +PFNGLMULTITEXGENFVEXTPROC glad_glMultiTexGenfvEXT = NULL; +PFNGLMULTITEXGENIEXTPROC glad_glMultiTexGeniEXT = NULL; +PFNGLMULTITEXGENIVEXTPROC glad_glMultiTexGenivEXT = NULL; +PFNGLGETMULTITEXENVFVEXTPROC glad_glGetMultiTexEnvfvEXT = NULL; +PFNGLGETMULTITEXENVIVEXTPROC glad_glGetMultiTexEnvivEXT = NULL; +PFNGLGETMULTITEXGENDVEXTPROC glad_glGetMultiTexGendvEXT = NULL; +PFNGLGETMULTITEXGENFVEXTPROC glad_glGetMultiTexGenfvEXT = NULL; +PFNGLGETMULTITEXGENIVEXTPROC glad_glGetMultiTexGenivEXT = NULL; +PFNGLMULTITEXPARAMETERIEXTPROC glad_glMultiTexParameteriEXT = NULL; +PFNGLMULTITEXPARAMETERIVEXTPROC glad_glMultiTexParameterivEXT = NULL; +PFNGLMULTITEXPARAMETERFEXTPROC glad_glMultiTexParameterfEXT = NULL; +PFNGLMULTITEXPARAMETERFVEXTPROC glad_glMultiTexParameterfvEXT = NULL; +PFNGLMULTITEXIMAGE1DEXTPROC glad_glMultiTexImage1DEXT = NULL; +PFNGLMULTITEXIMAGE2DEXTPROC glad_glMultiTexImage2DEXT = NULL; +PFNGLMULTITEXSUBIMAGE1DEXTPROC glad_glMultiTexSubImage1DEXT = NULL; +PFNGLMULTITEXSUBIMAGE2DEXTPROC glad_glMultiTexSubImage2DEXT = NULL; +PFNGLCOPYMULTITEXIMAGE1DEXTPROC glad_glCopyMultiTexImage1DEXT = NULL; +PFNGLCOPYMULTITEXIMAGE2DEXTPROC glad_glCopyMultiTexImage2DEXT = NULL; +PFNGLCOPYMULTITEXSUBIMAGE1DEXTPROC glad_glCopyMultiTexSubImage1DEXT = NULL; +PFNGLCOPYMULTITEXSUBIMAGE2DEXTPROC glad_glCopyMultiTexSubImage2DEXT = NULL; +PFNGLGETMULTITEXIMAGEEXTPROC glad_glGetMultiTexImageEXT = NULL; +PFNGLGETMULTITEXPARAMETERFVEXTPROC glad_glGetMultiTexParameterfvEXT = NULL; +PFNGLGETMULTITEXPARAMETERIVEXTPROC glad_glGetMultiTexParameterivEXT = NULL; +PFNGLGETMULTITEXLEVELPARAMETERFVEXTPROC glad_glGetMultiTexLevelParameterfvEXT = NULL; +PFNGLGETMULTITEXLEVELPARAMETERIVEXTPROC glad_glGetMultiTexLevelParameterivEXT = NULL; +PFNGLMULTITEXIMAGE3DEXTPROC glad_glMultiTexImage3DEXT = NULL; +PFNGLMULTITEXSUBIMAGE3DEXTPROC glad_glMultiTexSubImage3DEXT = NULL; +PFNGLCOPYMULTITEXSUBIMAGE3DEXTPROC glad_glCopyMultiTexSubImage3DEXT = NULL; +PFNGLENABLECLIENTSTATEINDEXEDEXTPROC glad_glEnableClientStateIndexedEXT = NULL; +PFNGLDISABLECLIENTSTATEINDEXEDEXTPROC glad_glDisableClientStateIndexedEXT = NULL; +PFNGLGETFLOATINDEXEDVEXTPROC glad_glGetFloatIndexedvEXT = NULL; +PFNGLGETDOUBLEINDEXEDVEXTPROC glad_glGetDoubleIndexedvEXT = NULL; +PFNGLGETPOINTERINDEXEDVEXTPROC glad_glGetPointerIndexedvEXT = NULL; +PFNGLENABLEINDEXEDEXTPROC glad_glEnableIndexedEXT = NULL; +PFNGLDISABLEINDEXEDEXTPROC glad_glDisableIndexedEXT = NULL; +PFNGLISENABLEDINDEXEDEXTPROC glad_glIsEnabledIndexedEXT = NULL; +PFNGLGETINTEGERINDEXEDVEXTPROC glad_glGetIntegerIndexedvEXT = NULL; +PFNGLGETBOOLEANINDEXEDVEXTPROC glad_glGetBooleanIndexedvEXT = NULL; +PFNGLCOMPRESSEDTEXTUREIMAGE3DEXTPROC glad_glCompressedTextureImage3DEXT = NULL; +PFNGLCOMPRESSEDTEXTUREIMAGE2DEXTPROC glad_glCompressedTextureImage2DEXT = NULL; +PFNGLCOMPRESSEDTEXTUREIMAGE1DEXTPROC glad_glCompressedTextureImage1DEXT = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE3DEXTPROC glad_glCompressedTextureSubImage3DEXT = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE2DEXTPROC glad_glCompressedTextureSubImage2DEXT = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE1DEXTPROC glad_glCompressedTextureSubImage1DEXT = NULL; +PFNGLGETCOMPRESSEDTEXTUREIMAGEEXTPROC glad_glGetCompressedTextureImageEXT = NULL; +PFNGLCOMPRESSEDMULTITEXIMAGE3DEXTPROC glad_glCompressedMultiTexImage3DEXT = NULL; +PFNGLCOMPRESSEDMULTITEXIMAGE2DEXTPROC glad_glCompressedMultiTexImage2DEXT = NULL; +PFNGLCOMPRESSEDMULTITEXIMAGE1DEXTPROC glad_glCompressedMultiTexImage1DEXT = NULL; +PFNGLCOMPRESSEDMULTITEXSUBIMAGE3DEXTPROC glad_glCompressedMultiTexSubImage3DEXT = NULL; +PFNGLCOMPRESSEDMULTITEXSUBIMAGE2DEXTPROC glad_glCompressedMultiTexSubImage2DEXT = NULL; +PFNGLCOMPRESSEDMULTITEXSUBIMAGE1DEXTPROC glad_glCompressedMultiTexSubImage1DEXT = NULL; +PFNGLGETCOMPRESSEDMULTITEXIMAGEEXTPROC glad_glGetCompressedMultiTexImageEXT = NULL; +PFNGLMATRIXLOADTRANSPOSEFEXTPROC glad_glMatrixLoadTransposefEXT = NULL; +PFNGLMATRIXLOADTRANSPOSEDEXTPROC glad_glMatrixLoadTransposedEXT = NULL; +PFNGLMATRIXMULTTRANSPOSEFEXTPROC glad_glMatrixMultTransposefEXT = NULL; +PFNGLMATRIXMULTTRANSPOSEDEXTPROC glad_glMatrixMultTransposedEXT = NULL; +PFNGLNAMEDBUFFERDATAEXTPROC glad_glNamedBufferDataEXT = NULL; +PFNGLNAMEDBUFFERSUBDATAEXTPROC glad_glNamedBufferSubDataEXT = NULL; +PFNGLMAPNAMEDBUFFEREXTPROC glad_glMapNamedBufferEXT = NULL; +PFNGLUNMAPNAMEDBUFFEREXTPROC glad_glUnmapNamedBufferEXT = NULL; +PFNGLGETNAMEDBUFFERPARAMETERIVEXTPROC glad_glGetNamedBufferParameterivEXT = NULL; +PFNGLGETNAMEDBUFFERPOINTERVEXTPROC glad_glGetNamedBufferPointervEXT = NULL; +PFNGLGETNAMEDBUFFERSUBDATAEXTPROC glad_glGetNamedBufferSubDataEXT = NULL; +PFNGLPROGRAMUNIFORM1FEXTPROC glad_glProgramUniform1fEXT = NULL; +PFNGLPROGRAMUNIFORM2FEXTPROC glad_glProgramUniform2fEXT = NULL; +PFNGLPROGRAMUNIFORM3FEXTPROC glad_glProgramUniform3fEXT = NULL; +PFNGLPROGRAMUNIFORM4FEXTPROC glad_glProgramUniform4fEXT = NULL; +PFNGLPROGRAMUNIFORM1IEXTPROC glad_glProgramUniform1iEXT = NULL; +PFNGLPROGRAMUNIFORM2IEXTPROC glad_glProgramUniform2iEXT = NULL; +PFNGLPROGRAMUNIFORM3IEXTPROC glad_glProgramUniform3iEXT = NULL; +PFNGLPROGRAMUNIFORM4IEXTPROC glad_glProgramUniform4iEXT = NULL; +PFNGLPROGRAMUNIFORM1FVEXTPROC glad_glProgramUniform1fvEXT = NULL; +PFNGLPROGRAMUNIFORM2FVEXTPROC glad_glProgramUniform2fvEXT = NULL; +PFNGLPROGRAMUNIFORM3FVEXTPROC glad_glProgramUniform3fvEXT = NULL; +PFNGLPROGRAMUNIFORM4FVEXTPROC glad_glProgramUniform4fvEXT = NULL; +PFNGLPROGRAMUNIFORM1IVEXTPROC glad_glProgramUniform1ivEXT = NULL; +PFNGLPROGRAMUNIFORM2IVEXTPROC glad_glProgramUniform2ivEXT = NULL; +PFNGLPROGRAMUNIFORM3IVEXTPROC glad_glProgramUniform3ivEXT = NULL; +PFNGLPROGRAMUNIFORM4IVEXTPROC glad_glProgramUniform4ivEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC glad_glProgramUniformMatrix2fvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC glad_glProgramUniformMatrix3fvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC glad_glProgramUniformMatrix4fvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC glad_glProgramUniformMatrix2x3fvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC glad_glProgramUniformMatrix3x2fvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC glad_glProgramUniformMatrix2x4fvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC glad_glProgramUniformMatrix4x2fvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC glad_glProgramUniformMatrix3x4fvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC glad_glProgramUniformMatrix4x3fvEXT = NULL; +PFNGLTEXTUREBUFFEREXTPROC glad_glTextureBufferEXT = NULL; +PFNGLMULTITEXBUFFEREXTPROC glad_glMultiTexBufferEXT = NULL; +PFNGLTEXTUREPARAMETERIIVEXTPROC glad_glTextureParameterIivEXT = NULL; +PFNGLTEXTUREPARAMETERIUIVEXTPROC glad_glTextureParameterIuivEXT = NULL; +PFNGLGETTEXTUREPARAMETERIIVEXTPROC glad_glGetTextureParameterIivEXT = NULL; +PFNGLGETTEXTUREPARAMETERIUIVEXTPROC glad_glGetTextureParameterIuivEXT = NULL; +PFNGLMULTITEXPARAMETERIIVEXTPROC glad_glMultiTexParameterIivEXT = NULL; +PFNGLMULTITEXPARAMETERIUIVEXTPROC glad_glMultiTexParameterIuivEXT = NULL; +PFNGLGETMULTITEXPARAMETERIIVEXTPROC glad_glGetMultiTexParameterIivEXT = NULL; +PFNGLGETMULTITEXPARAMETERIUIVEXTPROC glad_glGetMultiTexParameterIuivEXT = NULL; +PFNGLPROGRAMUNIFORM1UIEXTPROC glad_glProgramUniform1uiEXT = NULL; +PFNGLPROGRAMUNIFORM2UIEXTPROC glad_glProgramUniform2uiEXT = NULL; +PFNGLPROGRAMUNIFORM3UIEXTPROC glad_glProgramUniform3uiEXT = NULL; +PFNGLPROGRAMUNIFORM4UIEXTPROC glad_glProgramUniform4uiEXT = NULL; +PFNGLPROGRAMUNIFORM1UIVEXTPROC glad_glProgramUniform1uivEXT = NULL; +PFNGLPROGRAMUNIFORM2UIVEXTPROC glad_glProgramUniform2uivEXT = NULL; +PFNGLPROGRAMUNIFORM3UIVEXTPROC glad_glProgramUniform3uivEXT = NULL; +PFNGLPROGRAMUNIFORM4UIVEXTPROC glad_glProgramUniform4uivEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETERS4FVEXTPROC glad_glNamedProgramLocalParameters4fvEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETERI4IEXTPROC glad_glNamedProgramLocalParameterI4iEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETERI4IVEXTPROC glad_glNamedProgramLocalParameterI4ivEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETERSI4IVEXTPROC glad_glNamedProgramLocalParametersI4ivEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIEXTPROC glad_glNamedProgramLocalParameterI4uiEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIVEXTPROC glad_glNamedProgramLocalParameterI4uivEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETERSI4UIVEXTPROC glad_glNamedProgramLocalParametersI4uivEXT = NULL; +PFNGLGETNAMEDPROGRAMLOCALPARAMETERIIVEXTPROC glad_glGetNamedProgramLocalParameterIivEXT = NULL; +PFNGLGETNAMEDPROGRAMLOCALPARAMETERIUIVEXTPROC glad_glGetNamedProgramLocalParameterIuivEXT = NULL; +PFNGLENABLECLIENTSTATEIEXTPROC glad_glEnableClientStateiEXT = NULL; +PFNGLDISABLECLIENTSTATEIEXTPROC glad_glDisableClientStateiEXT = NULL; +PFNGLGETFLOATI_VEXTPROC glad_glGetFloati_vEXT = NULL; +PFNGLGETDOUBLEI_VEXTPROC glad_glGetDoublei_vEXT = NULL; +PFNGLGETPOINTERI_VEXTPROC glad_glGetPointeri_vEXT = NULL; +PFNGLNAMEDPROGRAMSTRINGEXTPROC glad_glNamedProgramStringEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETER4DEXTPROC glad_glNamedProgramLocalParameter4dEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETER4DVEXTPROC glad_glNamedProgramLocalParameter4dvEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETER4FEXTPROC glad_glNamedProgramLocalParameter4fEXT = NULL; +PFNGLNAMEDPROGRAMLOCALPARAMETER4FVEXTPROC glad_glNamedProgramLocalParameter4fvEXT = NULL; +PFNGLGETNAMEDPROGRAMLOCALPARAMETERDVEXTPROC glad_glGetNamedProgramLocalParameterdvEXT = NULL; +PFNGLGETNAMEDPROGRAMLOCALPARAMETERFVEXTPROC glad_glGetNamedProgramLocalParameterfvEXT = NULL; +PFNGLGETNAMEDPROGRAMIVEXTPROC glad_glGetNamedProgramivEXT = NULL; +PFNGLGETNAMEDPROGRAMSTRINGEXTPROC glad_glGetNamedProgramStringEXT = NULL; +PFNGLNAMEDRENDERBUFFERSTORAGEEXTPROC glad_glNamedRenderbufferStorageEXT = NULL; +PFNGLGETNAMEDRENDERBUFFERPARAMETERIVEXTPROC glad_glGetNamedRenderbufferParameterivEXT = NULL; +PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC glad_glNamedRenderbufferStorageMultisampleEXT = NULL; +PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLECOVERAGEEXTPROC glad_glNamedRenderbufferStorageMultisampleCoverageEXT = NULL; +PFNGLCHECKNAMEDFRAMEBUFFERSTATUSEXTPROC glad_glCheckNamedFramebufferStatusEXT = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTURE1DEXTPROC glad_glNamedFramebufferTexture1DEXT = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTURE2DEXTPROC glad_glNamedFramebufferTexture2DEXT = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTURE3DEXTPROC glad_glNamedFramebufferTexture3DEXT = NULL; +PFNGLNAMEDFRAMEBUFFERRENDERBUFFEREXTPROC glad_glNamedFramebufferRenderbufferEXT = NULL; +PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC glad_glGetNamedFramebufferAttachmentParameterivEXT = NULL; +PFNGLGENERATETEXTUREMIPMAPEXTPROC glad_glGenerateTextureMipmapEXT = NULL; +PFNGLGENERATEMULTITEXMIPMAPEXTPROC glad_glGenerateMultiTexMipmapEXT = NULL; +PFNGLFRAMEBUFFERDRAWBUFFEREXTPROC glad_glFramebufferDrawBufferEXT = NULL; +PFNGLFRAMEBUFFERDRAWBUFFERSEXTPROC glad_glFramebufferDrawBuffersEXT = NULL; +PFNGLFRAMEBUFFERREADBUFFEREXTPROC glad_glFramebufferReadBufferEXT = NULL; +PFNGLGETFRAMEBUFFERPARAMETERIVEXTPROC glad_glGetFramebufferParameterivEXT = NULL; +PFNGLNAMEDCOPYBUFFERSUBDATAEXTPROC glad_glNamedCopyBufferSubDataEXT = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTUREEXTPROC glad_glNamedFramebufferTextureEXT = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTURELAYEREXTPROC glad_glNamedFramebufferTextureLayerEXT = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTUREFACEEXTPROC glad_glNamedFramebufferTextureFaceEXT = NULL; +PFNGLTEXTURERENDERBUFFEREXTPROC glad_glTextureRenderbufferEXT = NULL; +PFNGLMULTITEXRENDERBUFFEREXTPROC glad_glMultiTexRenderbufferEXT = NULL; +PFNGLVERTEXARRAYVERTEXOFFSETEXTPROC glad_glVertexArrayVertexOffsetEXT = NULL; +PFNGLVERTEXARRAYCOLOROFFSETEXTPROC glad_glVertexArrayColorOffsetEXT = NULL; +PFNGLVERTEXARRAYEDGEFLAGOFFSETEXTPROC glad_glVertexArrayEdgeFlagOffsetEXT = NULL; +PFNGLVERTEXARRAYINDEXOFFSETEXTPROC glad_glVertexArrayIndexOffsetEXT = NULL; +PFNGLVERTEXARRAYNORMALOFFSETEXTPROC glad_glVertexArrayNormalOffsetEXT = NULL; +PFNGLVERTEXARRAYTEXCOORDOFFSETEXTPROC glad_glVertexArrayTexCoordOffsetEXT = NULL; +PFNGLVERTEXARRAYMULTITEXCOORDOFFSETEXTPROC glad_glVertexArrayMultiTexCoordOffsetEXT = NULL; +PFNGLVERTEXARRAYFOGCOORDOFFSETEXTPROC glad_glVertexArrayFogCoordOffsetEXT = NULL; +PFNGLVERTEXARRAYSECONDARYCOLOROFFSETEXTPROC glad_glVertexArraySecondaryColorOffsetEXT = NULL; +PFNGLVERTEXARRAYVERTEXATTRIBOFFSETEXTPROC glad_glVertexArrayVertexAttribOffsetEXT = NULL; +PFNGLVERTEXARRAYVERTEXATTRIBIOFFSETEXTPROC glad_glVertexArrayVertexAttribIOffsetEXT = NULL; +PFNGLENABLEVERTEXARRAYEXTPROC glad_glEnableVertexArrayEXT = NULL; +PFNGLDISABLEVERTEXARRAYEXTPROC glad_glDisableVertexArrayEXT = NULL; +PFNGLENABLEVERTEXARRAYATTRIBEXTPROC glad_glEnableVertexArrayAttribEXT = NULL; +PFNGLDISABLEVERTEXARRAYATTRIBEXTPROC glad_glDisableVertexArrayAttribEXT = NULL; +PFNGLGETVERTEXARRAYINTEGERVEXTPROC glad_glGetVertexArrayIntegervEXT = NULL; +PFNGLGETVERTEXARRAYPOINTERVEXTPROC glad_glGetVertexArrayPointervEXT = NULL; +PFNGLGETVERTEXARRAYINTEGERI_VEXTPROC glad_glGetVertexArrayIntegeri_vEXT = NULL; +PFNGLGETVERTEXARRAYPOINTERI_VEXTPROC glad_glGetVertexArrayPointeri_vEXT = NULL; +PFNGLMAPNAMEDBUFFERRANGEEXTPROC glad_glMapNamedBufferRangeEXT = NULL; +PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEEXTPROC glad_glFlushMappedNamedBufferRangeEXT = NULL; +PFNGLNAMEDBUFFERSTORAGEEXTPROC glad_glNamedBufferStorageEXT = NULL; +PFNGLCLEARNAMEDBUFFERDATAEXTPROC glad_glClearNamedBufferDataEXT = NULL; +PFNGLCLEARNAMEDBUFFERSUBDATAEXTPROC glad_glClearNamedBufferSubDataEXT = NULL; +PFNGLNAMEDFRAMEBUFFERPARAMETERIEXTPROC glad_glNamedFramebufferParameteriEXT = NULL; +PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVEXTPROC glad_glGetNamedFramebufferParameterivEXT = NULL; +PFNGLPROGRAMUNIFORM1DEXTPROC glad_glProgramUniform1dEXT = NULL; +PFNGLPROGRAMUNIFORM2DEXTPROC glad_glProgramUniform2dEXT = NULL; +PFNGLPROGRAMUNIFORM3DEXTPROC glad_glProgramUniform3dEXT = NULL; +PFNGLPROGRAMUNIFORM4DEXTPROC glad_glProgramUniform4dEXT = NULL; +PFNGLPROGRAMUNIFORM1DVEXTPROC glad_glProgramUniform1dvEXT = NULL; +PFNGLPROGRAMUNIFORM2DVEXTPROC glad_glProgramUniform2dvEXT = NULL; +PFNGLPROGRAMUNIFORM3DVEXTPROC glad_glProgramUniform3dvEXT = NULL; +PFNGLPROGRAMUNIFORM4DVEXTPROC glad_glProgramUniform4dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX2DVEXTPROC glad_glProgramUniformMatrix2dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX3DVEXTPROC glad_glProgramUniformMatrix3dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX4DVEXTPROC glad_glProgramUniformMatrix4dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX2X3DVEXTPROC glad_glProgramUniformMatrix2x3dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX2X4DVEXTPROC glad_glProgramUniformMatrix2x4dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX3X2DVEXTPROC glad_glProgramUniformMatrix3x2dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX3X4DVEXTPROC glad_glProgramUniformMatrix3x4dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX4X2DVEXTPROC glad_glProgramUniformMatrix4x2dvEXT = NULL; +PFNGLPROGRAMUNIFORMMATRIX4X3DVEXTPROC glad_glProgramUniformMatrix4x3dvEXT = NULL; +PFNGLTEXTUREBUFFERRANGEEXTPROC glad_glTextureBufferRangeEXT = NULL; +PFNGLTEXTURESTORAGE1DEXTPROC glad_glTextureStorage1DEXT = NULL; +PFNGLTEXTURESTORAGE2DEXTPROC glad_glTextureStorage2DEXT = NULL; +PFNGLTEXTURESTORAGE3DEXTPROC glad_glTextureStorage3DEXT = NULL; +PFNGLTEXTURESTORAGE2DMULTISAMPLEEXTPROC glad_glTextureStorage2DMultisampleEXT = NULL; +PFNGLTEXTURESTORAGE3DMULTISAMPLEEXTPROC glad_glTextureStorage3DMultisampleEXT = NULL; +PFNGLVERTEXARRAYBINDVERTEXBUFFEREXTPROC glad_glVertexArrayBindVertexBufferEXT = NULL; +PFNGLVERTEXARRAYVERTEXATTRIBFORMATEXTPROC glad_glVertexArrayVertexAttribFormatEXT = NULL; +PFNGLVERTEXARRAYVERTEXATTRIBIFORMATEXTPROC glad_glVertexArrayVertexAttribIFormatEXT = NULL; +PFNGLVERTEXARRAYVERTEXATTRIBLFORMATEXTPROC glad_glVertexArrayVertexAttribLFormatEXT = NULL; +PFNGLVERTEXARRAYVERTEXATTRIBBINDINGEXTPROC glad_glVertexArrayVertexAttribBindingEXT = NULL; +PFNGLVERTEXARRAYVERTEXBINDINGDIVISOREXTPROC glad_glVertexArrayVertexBindingDivisorEXT = NULL; +PFNGLVERTEXARRAYVERTEXATTRIBLOFFSETEXTPROC glad_glVertexArrayVertexAttribLOffsetEXT = NULL; +PFNGLTEXTUREPAGECOMMITMENTEXTPROC glad_glTexturePageCommitmentEXT = NULL; +PFNGLVERTEXARRAYVERTEXATTRIBDIVISOREXTPROC glad_glVertexArrayVertexAttribDivisorEXT = NULL; +PFNGLCOLORMASKINDEXEDEXTPROC glad_glColorMaskIndexedEXT = NULL; PFNGLDRAWARRAYSINSTANCEDEXTPROC glad_glDrawArraysInstancedEXT = NULL; PFNGLDRAWELEMENTSINSTANCEDEXTPROC glad_glDrawElementsInstancedEXT = NULL; -PFNGLDRAWTRANSFORMFEEDBACKEXTPROC glad_glDrawTransformFeedbackEXT = NULL; -PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDEXTPROC glad_glDrawTransformFeedbackInstancedEXT = NULL; +PFNGLDRAWRANGEELEMENTSEXTPROC glad_glDrawRangeElementsEXT = NULL; PFNGLBUFFERSTORAGEEXTERNALEXTPROC glad_glBufferStorageExternalEXT = NULL; PFNGLNAMEDBUFFERSTORAGEEXTERNALEXTPROC glad_glNamedBufferStorageExternalEXT = NULL; +PFNGLFOGCOORDFEXTPROC glad_glFogCoordfEXT = NULL; +PFNGLFOGCOORDFVEXTPROC glad_glFogCoordfvEXT = NULL; +PFNGLFOGCOORDDEXTPROC glad_glFogCoorddEXT = NULL; +PFNGLFOGCOORDDVEXTPROC glad_glFogCoorddvEXT = NULL; +PFNGLFOGCOORDPOINTEREXTPROC glad_glFogCoordPointerEXT = NULL; PFNGLGETFRAGMENTSHADINGRATESEXTPROC glad_glGetFragmentShadingRatesEXT = NULL; PFNGLSHADINGRATEEXTPROC glad_glShadingRateEXT = NULL; PFNGLSHADINGRATECOMBINEROPSEXTPROC glad_glShadingRateCombinerOpsEXT = NULL; PFNGLFRAMEBUFFERSHADINGRATEEXTPROC glad_glFramebufferShadingRateEXT = NULL; +PFNGLBLITFRAMEBUFFEREXTPROC glad_glBlitFramebufferEXT = NULL; PFNGLBLITFRAMEBUFFERLAYERSEXTPROC glad_glBlitFramebufferLayersEXT = NULL; PFNGLBLITFRAMEBUFFERLAYEREXTPROC glad_glBlitFramebufferLayerEXT = NULL; -PFNGLFRAMEBUFFERTEXTUREEXTPROC glad_glFramebufferTextureEXT = NULL; -PFNGLVERTEXATTRIBDIVISOREXTPROC glad_glVertexAttribDivisorEXT = NULL; -PFNGLMAPBUFFERRANGEEXTPROC glad_glMapBufferRangeEXT = NULL; -PFNGLFLUSHMAPPEDBUFFERRANGEEXTPROC glad_glFlushMappedBufferRangeEXT = NULL; +PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC glad_glRenderbufferStorageMultisampleEXT = NULL; +PFNGLISRENDERBUFFEREXTPROC glad_glIsRenderbufferEXT = NULL; +PFNGLBINDRENDERBUFFEREXTPROC glad_glBindRenderbufferEXT = NULL; +PFNGLDELETERENDERBUFFERSEXTPROC glad_glDeleteRenderbuffersEXT = NULL; +PFNGLGENRENDERBUFFERSEXTPROC glad_glGenRenderbuffersEXT = NULL; +PFNGLRENDERBUFFERSTORAGEEXTPROC glad_glRenderbufferStorageEXT = NULL; +PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC glad_glGetRenderbufferParameterivEXT = NULL; +PFNGLISFRAMEBUFFEREXTPROC glad_glIsFramebufferEXT = NULL; +PFNGLBINDFRAMEBUFFEREXTPROC glad_glBindFramebufferEXT = NULL; +PFNGLDELETEFRAMEBUFFERSEXTPROC glad_glDeleteFramebuffersEXT = NULL; +PFNGLGENFRAMEBUFFERSEXTPROC glad_glGenFramebuffersEXT = NULL; +PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC glad_glCheckFramebufferStatusEXT = NULL; +PFNGLFRAMEBUFFERTEXTURE1DEXTPROC glad_glFramebufferTexture1DEXT = NULL; +PFNGLFRAMEBUFFERTEXTURE2DEXTPROC glad_glFramebufferTexture2DEXT = NULL; +PFNGLFRAMEBUFFERTEXTURE3DEXTPROC glad_glFramebufferTexture3DEXT = NULL; +PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC glad_glFramebufferRenderbufferEXT = NULL; +PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC glad_glGetFramebufferAttachmentParameterivEXT = NULL; +PFNGLGENERATEMIPMAPEXTPROC glad_glGenerateMipmapEXT = NULL; +PFNGLPROGRAMPARAMETERIEXTPROC glad_glProgramParameteriEXT = NULL; +PFNGLPROGRAMENVPARAMETERS4FVEXTPROC glad_glProgramEnvParameters4fvEXT = NULL; +PFNGLPROGRAMLOCALPARAMETERS4FVEXTPROC glad_glProgramLocalParameters4fvEXT = NULL; +PFNGLGETUNIFORMUIVEXTPROC glad_glGetUniformuivEXT = NULL; +PFNGLBINDFRAGDATALOCATIONEXTPROC glad_glBindFragDataLocationEXT = NULL; +PFNGLGETFRAGDATALOCATIONEXTPROC glad_glGetFragDataLocationEXT = NULL; +PFNGLUNIFORM1UIEXTPROC glad_glUniform1uiEXT = NULL; +PFNGLUNIFORM2UIEXTPROC glad_glUniform2uiEXT = NULL; +PFNGLUNIFORM3UIEXTPROC glad_glUniform3uiEXT = NULL; +PFNGLUNIFORM4UIEXTPROC glad_glUniform4uiEXT = NULL; +PFNGLUNIFORM1UIVEXTPROC glad_glUniform1uivEXT = NULL; +PFNGLUNIFORM2UIVEXTPROC glad_glUniform2uivEXT = NULL; +PFNGLUNIFORM3UIVEXTPROC glad_glUniform3uivEXT = NULL; +PFNGLUNIFORM4UIVEXTPROC glad_glUniform4uivEXT = NULL; +PFNGLVERTEXATTRIBI1IEXTPROC glad_glVertexAttribI1iEXT = NULL; +PFNGLVERTEXATTRIBI2IEXTPROC glad_glVertexAttribI2iEXT = NULL; +PFNGLVERTEXATTRIBI3IEXTPROC glad_glVertexAttribI3iEXT = NULL; +PFNGLVERTEXATTRIBI4IEXTPROC glad_glVertexAttribI4iEXT = NULL; +PFNGLVERTEXATTRIBI1UIEXTPROC glad_glVertexAttribI1uiEXT = NULL; +PFNGLVERTEXATTRIBI2UIEXTPROC glad_glVertexAttribI2uiEXT = NULL; +PFNGLVERTEXATTRIBI3UIEXTPROC glad_glVertexAttribI3uiEXT = NULL; +PFNGLVERTEXATTRIBI4UIEXTPROC glad_glVertexAttribI4uiEXT = NULL; +PFNGLVERTEXATTRIBI1IVEXTPROC glad_glVertexAttribI1ivEXT = NULL; +PFNGLVERTEXATTRIBI2IVEXTPROC glad_glVertexAttribI2ivEXT = NULL; +PFNGLVERTEXATTRIBI3IVEXTPROC glad_glVertexAttribI3ivEXT = NULL; +PFNGLVERTEXATTRIBI4IVEXTPROC glad_glVertexAttribI4ivEXT = NULL; +PFNGLVERTEXATTRIBI1UIVEXTPROC glad_glVertexAttribI1uivEXT = NULL; +PFNGLVERTEXATTRIBI2UIVEXTPROC glad_glVertexAttribI2uivEXT = NULL; +PFNGLVERTEXATTRIBI3UIVEXTPROC glad_glVertexAttribI3uivEXT = NULL; +PFNGLVERTEXATTRIBI4UIVEXTPROC glad_glVertexAttribI4uivEXT = NULL; +PFNGLVERTEXATTRIBI4BVEXTPROC glad_glVertexAttribI4bvEXT = NULL; +PFNGLVERTEXATTRIBI4SVEXTPROC glad_glVertexAttribI4svEXT = NULL; +PFNGLVERTEXATTRIBI4UBVEXTPROC glad_glVertexAttribI4ubvEXT = NULL; +PFNGLVERTEXATTRIBI4USVEXTPROC glad_glVertexAttribI4usvEXT = NULL; +PFNGLVERTEXATTRIBIPOINTEREXTPROC glad_glVertexAttribIPointerEXT = NULL; +PFNGLGETVERTEXATTRIBIIVEXTPROC glad_glGetVertexAttribIivEXT = NULL; +PFNGLGETVERTEXATTRIBIUIVEXTPROC glad_glGetVertexAttribIuivEXT = NULL; +PFNGLGETHISTOGRAMEXTPROC glad_glGetHistogramEXT = NULL; +PFNGLGETHISTOGRAMPARAMETERFVEXTPROC glad_glGetHistogramParameterfvEXT = NULL; +PFNGLGETHISTOGRAMPARAMETERIVEXTPROC glad_glGetHistogramParameterivEXT = NULL; +PFNGLGETMINMAXEXTPROC glad_glGetMinmaxEXT = NULL; +PFNGLGETMINMAXPARAMETERFVEXTPROC glad_glGetMinmaxParameterfvEXT = NULL; +PFNGLGETMINMAXPARAMETERIVEXTPROC glad_glGetMinmaxParameterivEXT = NULL; +PFNGLHISTOGRAMEXTPROC glad_glHistogramEXT = NULL; +PFNGLMINMAXEXTPROC glad_glMinmaxEXT = NULL; +PFNGLRESETHISTOGRAMEXTPROC glad_glResetHistogramEXT = NULL; +PFNGLRESETMINMAXEXTPROC glad_glResetMinmaxEXT = NULL; +PFNGLINDEXFUNCEXTPROC glad_glIndexFuncEXT = NULL; +PFNGLINDEXMATERIALEXTPROC glad_glIndexMaterialEXT = NULL; +PFNGLAPPLYTEXTUREEXTPROC glad_glApplyTextureEXT = NULL; +PFNGLTEXTURELIGHTEXTPROC glad_glTextureLightEXT = NULL; +PFNGLTEXTUREMATERIALEXTPROC glad_glTextureMaterialEXT = NULL; PFNGLGETUNSIGNEDBYTEVEXTPROC glad_glGetUnsignedBytevEXT = NULL; PFNGLGETUNSIGNEDBYTEI_VEXTPROC glad_glGetUnsignedBytei_vEXT = NULL; PFNGLDELETEMEMORYOBJECTSEXTPROC glad_glDeleteMemoryObjectsEXT = NULL; @@ -1256,20 +4064,41 @@ PFNGLMULTIDRAWMESHTASKSINDIRECTEXTPROC glad_glMultiDrawMeshTasksIndirectEXT = NU PFNGLMULTIDRAWMESHTASKSINDIRECTCOUNTEXTPROC glad_glMultiDrawMeshTasksIndirectCountEXT = NULL; PFNGLMULTIDRAWARRAYSEXTPROC glad_glMultiDrawArraysEXT = NULL; PFNGLMULTIDRAWELEMENTSEXTPROC glad_glMultiDrawElementsEXT = NULL; -PFNGLMULTIDRAWARRAYSINDIRECTEXTPROC glad_glMultiDrawArraysIndirectEXT = NULL; -PFNGLMULTIDRAWELEMENTSINDIRECTEXTPROC glad_glMultiDrawElementsIndirectEXT = NULL; -PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC glad_glRenderbufferStorageMultisampleEXT = NULL; -PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC glad_glFramebufferTexture2DMultisampleEXT = NULL; -PFNGLREADBUFFERINDEXEDEXTPROC glad_glReadBufferIndexedEXT = NULL; -PFNGLDRAWBUFFERSINDEXEDEXTPROC glad_glDrawBuffersIndexedEXT = NULL; -PFNGLGETINTEGERI_VEXTPROC glad_glGetIntegeri_vEXT = NULL; +PFNGLSAMPLEMASKEXTPROC glad_glSampleMaskEXT = NULL; +PFNGLSAMPLEPATTERNEXTPROC glad_glSamplePatternEXT = NULL; +PFNGLCOLORTABLEEXTPROC glad_glColorTableEXT = NULL; +PFNGLGETCOLORTABLEEXTPROC glad_glGetColorTableEXT = NULL; +PFNGLGETCOLORTABLEPARAMETERIVEXTPROC glad_glGetColorTableParameterivEXT = NULL; +PFNGLGETCOLORTABLEPARAMETERFVEXTPROC glad_glGetColorTableParameterfvEXT = NULL; +PFNGLPIXELTRANSFORMPARAMETERIEXTPROC glad_glPixelTransformParameteriEXT = NULL; +PFNGLPIXELTRANSFORMPARAMETERFEXTPROC glad_glPixelTransformParameterfEXT = NULL; +PFNGLPIXELTRANSFORMPARAMETERIVEXTPROC glad_glPixelTransformParameterivEXT = NULL; +PFNGLPIXELTRANSFORMPARAMETERFVEXTPROC glad_glPixelTransformParameterfvEXT = NULL; +PFNGLGETPIXELTRANSFORMPARAMETERIVEXTPROC glad_glGetPixelTransformParameterivEXT = NULL; +PFNGLGETPIXELTRANSFORMPARAMETERFVEXTPROC glad_glGetPixelTransformParameterfvEXT = NULL; +PFNGLPOINTPARAMETERFEXTPROC glad_glPointParameterfEXT = NULL; +PFNGLPOINTPARAMETERFVEXTPROC glad_glPointParameterfvEXT = NULL; +PFNGLPOLYGONOFFSETEXTPROC glad_glPolygonOffsetEXT = NULL; PFNGLPOLYGONOFFSETCLAMPEXTPROC glad_glPolygonOffsetClampEXT = NULL; -PFNGLPRIMITIVEBOUNDINGBOXEXTPROC glad_glPrimitiveBoundingBoxEXT = NULL; +PFNGLPROVOKINGVERTEXEXTPROC glad_glProvokingVertexEXT = NULL; PFNGLRASTERSAMPLESEXTPROC glad_glRasterSamplesEXT = NULL; -PFNGLGETGRAPHICSRESETSTATUSEXTPROC glad_glGetGraphicsResetStatusEXT = NULL; -PFNGLREADNPIXELSEXTPROC glad_glReadnPixelsEXT = NULL; -PFNGLGETNUNIFORMFVEXTPROC glad_glGetnUniformfvEXT = NULL; -PFNGLGETNUNIFORMIVEXTPROC glad_glGetnUniformivEXT = NULL; +PFNGLSECONDARYCOLOR3BEXTPROC glad_glSecondaryColor3bEXT = NULL; +PFNGLSECONDARYCOLOR3BVEXTPROC glad_glSecondaryColor3bvEXT = NULL; +PFNGLSECONDARYCOLOR3DEXTPROC glad_glSecondaryColor3dEXT = NULL; +PFNGLSECONDARYCOLOR3DVEXTPROC glad_glSecondaryColor3dvEXT = NULL; +PFNGLSECONDARYCOLOR3FEXTPROC glad_glSecondaryColor3fEXT = NULL; +PFNGLSECONDARYCOLOR3FVEXTPROC glad_glSecondaryColor3fvEXT = NULL; +PFNGLSECONDARYCOLOR3IEXTPROC glad_glSecondaryColor3iEXT = NULL; +PFNGLSECONDARYCOLOR3IVEXTPROC glad_glSecondaryColor3ivEXT = NULL; +PFNGLSECONDARYCOLOR3SEXTPROC glad_glSecondaryColor3sEXT = NULL; +PFNGLSECONDARYCOLOR3SVEXTPROC glad_glSecondaryColor3svEXT = NULL; +PFNGLSECONDARYCOLOR3UBEXTPROC glad_glSecondaryColor3ubEXT = NULL; +PFNGLSECONDARYCOLOR3UBVEXTPROC glad_glSecondaryColor3ubvEXT = NULL; +PFNGLSECONDARYCOLOR3UIEXTPROC glad_glSecondaryColor3uiEXT = NULL; +PFNGLSECONDARYCOLOR3UIVEXTPROC glad_glSecondaryColor3uivEXT = NULL; +PFNGLSECONDARYCOLOR3USEXTPROC glad_glSecondaryColor3usEXT = NULL; +PFNGLSECONDARYCOLOR3USVEXTPROC glad_glSecondaryColor3usvEXT = NULL; +PFNGLSECONDARYCOLORPOINTEREXTPROC glad_glSecondaryColorPointerEXT = NULL; PFNGLGENSEMAPHORESEXTPROC glad_glGenSemaphoresEXT = NULL; PFNGLDELETESEMAPHORESEXTPROC glad_glDeleteSemaphoresEXT = NULL; PFNGLISSEMAPHOREEXTPROC glad_glIsSemaphoreEXT = NULL; @@ -1291,81 +4120,140 @@ PFNGLGENPROGRAMPIPELINESEXTPROC glad_glGenProgramPipelinesEXT = NULL; PFNGLGETPROGRAMPIPELINEINFOLOGEXTPROC glad_glGetProgramPipelineInfoLogEXT = NULL; PFNGLGETPROGRAMPIPELINEIVEXTPROC glad_glGetProgramPipelineivEXT = NULL; PFNGLISPROGRAMPIPELINEEXTPROC glad_glIsProgramPipelineEXT = NULL; -PFNGLPROGRAMPARAMETERIEXTPROC glad_glProgramParameteriEXT = NULL; -PFNGLPROGRAMUNIFORM1FEXTPROC glad_glProgramUniform1fEXT = NULL; -PFNGLPROGRAMUNIFORM1FVEXTPROC glad_glProgramUniform1fvEXT = NULL; -PFNGLPROGRAMUNIFORM1IEXTPROC glad_glProgramUniform1iEXT = NULL; -PFNGLPROGRAMUNIFORM1IVEXTPROC glad_glProgramUniform1ivEXT = NULL; -PFNGLPROGRAMUNIFORM2FEXTPROC glad_glProgramUniform2fEXT = NULL; -PFNGLPROGRAMUNIFORM2FVEXTPROC glad_glProgramUniform2fvEXT = NULL; -PFNGLPROGRAMUNIFORM2IEXTPROC glad_glProgramUniform2iEXT = NULL; -PFNGLPROGRAMUNIFORM2IVEXTPROC glad_glProgramUniform2ivEXT = NULL; -PFNGLPROGRAMUNIFORM3FEXTPROC glad_glProgramUniform3fEXT = NULL; -PFNGLPROGRAMUNIFORM3FVEXTPROC glad_glProgramUniform3fvEXT = NULL; -PFNGLPROGRAMUNIFORM3IEXTPROC glad_glProgramUniform3iEXT = NULL; -PFNGLPROGRAMUNIFORM3IVEXTPROC glad_glProgramUniform3ivEXT = NULL; -PFNGLPROGRAMUNIFORM4FEXTPROC glad_glProgramUniform4fEXT = NULL; -PFNGLPROGRAMUNIFORM4FVEXTPROC glad_glProgramUniform4fvEXT = NULL; -PFNGLPROGRAMUNIFORM4IEXTPROC glad_glProgramUniform4iEXT = NULL; -PFNGLPROGRAMUNIFORM4IVEXTPROC glad_glProgramUniform4ivEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC glad_glProgramUniformMatrix2fvEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC glad_glProgramUniformMatrix3fvEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC glad_glProgramUniformMatrix4fvEXT = NULL; PFNGLUSEPROGRAMSTAGESEXTPROC glad_glUseProgramStagesEXT = NULL; PFNGLVALIDATEPROGRAMPIPELINEEXTPROC glad_glValidateProgramPipelineEXT = NULL; -PFNGLPROGRAMUNIFORM1UIEXTPROC glad_glProgramUniform1uiEXT = NULL; -PFNGLPROGRAMUNIFORM2UIEXTPROC glad_glProgramUniform2uiEXT = NULL; -PFNGLPROGRAMUNIFORM3UIEXTPROC glad_glProgramUniform3uiEXT = NULL; -PFNGLPROGRAMUNIFORM4UIEXTPROC glad_glProgramUniform4uiEXT = NULL; -PFNGLPROGRAMUNIFORM1UIVEXTPROC glad_glProgramUniform1uivEXT = NULL; -PFNGLPROGRAMUNIFORM2UIVEXTPROC glad_glProgramUniform2uivEXT = NULL; -PFNGLPROGRAMUNIFORM3UIVEXTPROC glad_glProgramUniform3uivEXT = NULL; -PFNGLPROGRAMUNIFORM4UIVEXTPROC glad_glProgramUniform4uivEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC glad_glProgramUniformMatrix2x3fvEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC glad_glProgramUniformMatrix3x2fvEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC glad_glProgramUniformMatrix2x4fvEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC glad_glProgramUniformMatrix4x2fvEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC glad_glProgramUniformMatrix3x4fvEXT = NULL; -PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC glad_glProgramUniformMatrix4x3fvEXT = NULL; PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC glad_glFramebufferFetchBarrierEXT = NULL; -PFNGLFRAMEBUFFERPIXELLOCALSTORAGESIZEEXTPROC glad_glFramebufferPixelLocalStorageSizeEXT = NULL; -PFNGLGETFRAMEBUFFERPIXELLOCALSTORAGESIZEEXTPROC glad_glGetFramebufferPixelLocalStorageSizeEXT = NULL; -PFNGLCLEARPIXELLOCALSTORAGEUIEXTPROC glad_glClearPixelLocalStorageuiEXT = NULL; -PFNGLTEXPAGECOMMITMENTEXTPROC glad_glTexPageCommitmentEXT = NULL; -PFNGLPATCHPARAMETERIEXTPROC glad_glPatchParameteriEXT = NULL; +PFNGLBINDIMAGETEXTUREEXTPROC glad_glBindImageTextureEXT = NULL; +PFNGLMEMORYBARRIEREXTPROC glad_glMemoryBarrierEXT = NULL; +PFNGLSTENCILCLEARTAGEXTPROC glad_glStencilClearTagEXT = NULL; +PFNGLACTIVESTENCILFACEEXTPROC glad_glActiveStencilFaceEXT = NULL; +PFNGLTEXSUBIMAGE1DEXTPROC glad_glTexSubImage1DEXT = NULL; +PFNGLTEXSUBIMAGE2DEXTPROC glad_glTexSubImage2DEXT = NULL; +PFNGLTEXIMAGE3DEXTPROC glad_glTexImage3DEXT = NULL; +PFNGLTEXSUBIMAGE3DEXTPROC glad_glTexSubImage3DEXT = NULL; +PFNGLFRAMEBUFFERTEXTURELAYEREXTPROC glad_glFramebufferTextureLayerEXT = NULL; +PFNGLTEXBUFFEREXTPROC glad_glTexBufferEXT = NULL; PFNGLTEXPARAMETERIIVEXTPROC glad_glTexParameterIivEXT = NULL; PFNGLTEXPARAMETERIUIVEXTPROC glad_glTexParameterIuivEXT = NULL; PFNGLGETTEXPARAMETERIIVEXTPROC glad_glGetTexParameterIivEXT = NULL; PFNGLGETTEXPARAMETERIUIVEXTPROC glad_glGetTexParameterIuivEXT = NULL; -PFNGLSAMPLERPARAMETERIIVEXTPROC glad_glSamplerParameterIivEXT = NULL; -PFNGLSAMPLERPARAMETERIUIVEXTPROC glad_glSamplerParameterIuivEXT = NULL; -PFNGLGETSAMPLERPARAMETERIIVEXTPROC glad_glGetSamplerParameterIivEXT = NULL; -PFNGLGETSAMPLERPARAMETERIUIVEXTPROC glad_glGetSamplerParameterIuivEXT = NULL; -PFNGLTEXBUFFEREXTPROC glad_glTexBufferEXT = NULL; -PFNGLTEXBUFFERRANGEEXTPROC glad_glTexBufferRangeEXT = NULL; +PFNGLCLEARCOLORIIEXTPROC glad_glClearColorIiEXT = NULL; +PFNGLCLEARCOLORIUIEXTPROC glad_glClearColorIuiEXT = NULL; +PFNGLARETEXTURESRESIDENTEXTPROC glad_glAreTexturesResidentEXT = NULL; +PFNGLBINDTEXTUREEXTPROC glad_glBindTextureEXT = NULL; +PFNGLDELETETEXTURESEXTPROC glad_glDeleteTexturesEXT = NULL; +PFNGLGENTEXTURESEXTPROC glad_glGenTexturesEXT = NULL; +PFNGLISTEXTUREEXTPROC glad_glIsTextureEXT = NULL; +PFNGLPRIORITIZETEXTURESEXTPROC glad_glPrioritizeTexturesEXT = NULL; +PFNGLTEXTURENORMALEXTPROC glad_glTextureNormalEXT = NULL; PFNGLTEXSTORAGE1DEXTPROC glad_glTexStorage1DEXT = NULL; PFNGLTEXSTORAGE2DEXTPROC glad_glTexStorage2DEXT = NULL; PFNGLTEXSTORAGE3DEXTPROC glad_glTexStorage3DEXT = NULL; -PFNGLTEXTURESTORAGE1DEXTPROC glad_glTextureStorage1DEXT = NULL; -PFNGLTEXTURESTORAGE2DEXTPROC glad_glTextureStorage2DEXT = NULL; -PFNGLTEXTURESTORAGE3DEXTPROC glad_glTextureStorage3DEXT = NULL; -PFNGLTEXSTORAGEATTRIBS2DEXTPROC glad_glTexStorageAttribs2DEXT = NULL; -PFNGLTEXSTORAGEATTRIBS3DEXTPROC glad_glTexStorageAttribs3DEXT = NULL; -PFNGLTEXTUREVIEWEXTPROC glad_glTextureViewEXT = NULL; +PFNGLGETQUERYOBJECTI64VEXTPROC glad_glGetQueryObjecti64vEXT = NULL; +PFNGLGETQUERYOBJECTUI64VEXTPROC glad_glGetQueryObjectui64vEXT = NULL; +PFNGLBEGINTRANSFORMFEEDBACKEXTPROC glad_glBeginTransformFeedbackEXT = NULL; +PFNGLENDTRANSFORMFEEDBACKEXTPROC glad_glEndTransformFeedbackEXT = NULL; +PFNGLBINDBUFFERRANGEEXTPROC glad_glBindBufferRangeEXT = NULL; +PFNGLBINDBUFFEROFFSETEXTPROC glad_glBindBufferOffsetEXT = NULL; +PFNGLBINDBUFFERBASEEXTPROC glad_glBindBufferBaseEXT = NULL; +PFNGLTRANSFORMFEEDBACKVARYINGSEXTPROC glad_glTransformFeedbackVaryingsEXT = NULL; +PFNGLGETTRANSFORMFEEDBACKVARYINGEXTPROC glad_glGetTransformFeedbackVaryingEXT = NULL; +PFNGLARRAYELEMENTEXTPROC glad_glArrayElementEXT = NULL; +PFNGLCOLORPOINTEREXTPROC glad_glColorPointerEXT = NULL; +PFNGLDRAWARRAYSEXTPROC glad_glDrawArraysEXT = NULL; +PFNGLEDGEFLAGPOINTEREXTPROC glad_glEdgeFlagPointerEXT = NULL; +PFNGLGETPOINTERVEXTPROC glad_glGetPointervEXT = NULL; +PFNGLINDEXPOINTEREXTPROC glad_glIndexPointerEXT = NULL; +PFNGLNORMALPOINTEREXTPROC glad_glNormalPointerEXT = NULL; +PFNGLTEXCOORDPOINTEREXTPROC glad_glTexCoordPointerEXT = NULL; +PFNGLVERTEXPOINTEREXTPROC glad_glVertexPointerEXT = NULL; +PFNGLVERTEXATTRIBL1DEXTPROC glad_glVertexAttribL1dEXT = NULL; +PFNGLVERTEXATTRIBL2DEXTPROC glad_glVertexAttribL2dEXT = NULL; +PFNGLVERTEXATTRIBL3DEXTPROC glad_glVertexAttribL3dEXT = NULL; +PFNGLVERTEXATTRIBL4DEXTPROC glad_glVertexAttribL4dEXT = NULL; +PFNGLVERTEXATTRIBL1DVEXTPROC glad_glVertexAttribL1dvEXT = NULL; +PFNGLVERTEXATTRIBL2DVEXTPROC glad_glVertexAttribL2dvEXT = NULL; +PFNGLVERTEXATTRIBL3DVEXTPROC glad_glVertexAttribL3dvEXT = NULL; +PFNGLVERTEXATTRIBL4DVEXTPROC glad_glVertexAttribL4dvEXT = NULL; +PFNGLVERTEXATTRIBLPOINTEREXTPROC glad_glVertexAttribLPointerEXT = NULL; +PFNGLGETVERTEXATTRIBLDVEXTPROC glad_glGetVertexAttribLdvEXT = NULL; +PFNGLBEGINVERTEXSHADEREXTPROC glad_glBeginVertexShaderEXT = NULL; +PFNGLENDVERTEXSHADEREXTPROC glad_glEndVertexShaderEXT = NULL; +PFNGLBINDVERTEXSHADEREXTPROC glad_glBindVertexShaderEXT = NULL; +PFNGLGENVERTEXSHADERSEXTPROC glad_glGenVertexShadersEXT = NULL; +PFNGLDELETEVERTEXSHADEREXTPROC glad_glDeleteVertexShaderEXT = NULL; +PFNGLSHADEROP1EXTPROC glad_glShaderOp1EXT = NULL; +PFNGLSHADEROP2EXTPROC glad_glShaderOp2EXT = NULL; +PFNGLSHADEROP3EXTPROC glad_glShaderOp3EXT = NULL; +PFNGLSWIZZLEEXTPROC glad_glSwizzleEXT = NULL; +PFNGLWRITEMASKEXTPROC glad_glWriteMaskEXT = NULL; +PFNGLINSERTCOMPONENTEXTPROC glad_glInsertComponentEXT = NULL; +PFNGLEXTRACTCOMPONENTEXTPROC glad_glExtractComponentEXT = NULL; +PFNGLGENSYMBOLSEXTPROC glad_glGenSymbolsEXT = NULL; +PFNGLSETINVARIANTEXTPROC glad_glSetInvariantEXT = NULL; +PFNGLSETLOCALCONSTANTEXTPROC glad_glSetLocalConstantEXT = NULL; +PFNGLVARIANTBVEXTPROC glad_glVariantbvEXT = NULL; +PFNGLVARIANTSVEXTPROC glad_glVariantsvEXT = NULL; +PFNGLVARIANTIVEXTPROC glad_glVariantivEXT = NULL; +PFNGLVARIANTFVEXTPROC glad_glVariantfvEXT = NULL; +PFNGLVARIANTDVEXTPROC glad_glVariantdvEXT = NULL; +PFNGLVARIANTUBVEXTPROC glad_glVariantubvEXT = NULL; +PFNGLVARIANTUSVEXTPROC glad_glVariantusvEXT = NULL; +PFNGLVARIANTUIVEXTPROC glad_glVariantuivEXT = NULL; +PFNGLVARIANTPOINTEREXTPROC glad_glVariantPointerEXT = NULL; +PFNGLENABLEVARIANTCLIENTSTATEEXTPROC glad_glEnableVariantClientStateEXT = NULL; +PFNGLDISABLEVARIANTCLIENTSTATEEXTPROC glad_glDisableVariantClientStateEXT = NULL; +PFNGLBINDLIGHTPARAMETEREXTPROC glad_glBindLightParameterEXT = NULL; +PFNGLBINDMATERIALPARAMETEREXTPROC glad_glBindMaterialParameterEXT = NULL; +PFNGLBINDTEXGENPARAMETEREXTPROC glad_glBindTexGenParameterEXT = NULL; +PFNGLBINDTEXTUREUNITPARAMETEREXTPROC glad_glBindTextureUnitParameterEXT = NULL; +PFNGLBINDPARAMETEREXTPROC glad_glBindParameterEXT = NULL; +PFNGLISVARIANTENABLEDEXTPROC glad_glIsVariantEnabledEXT = NULL; +PFNGLGETVARIANTBOOLEANVEXTPROC glad_glGetVariantBooleanvEXT = NULL; +PFNGLGETVARIANTINTEGERVEXTPROC glad_glGetVariantIntegervEXT = NULL; +PFNGLGETVARIANTFLOATVEXTPROC glad_glGetVariantFloatvEXT = NULL; +PFNGLGETVARIANTPOINTERVEXTPROC glad_glGetVariantPointervEXT = NULL; +PFNGLGETINVARIANTBOOLEANVEXTPROC glad_glGetInvariantBooleanvEXT = NULL; +PFNGLGETINVARIANTINTEGERVEXTPROC glad_glGetInvariantIntegervEXT = NULL; +PFNGLGETINVARIANTFLOATVEXTPROC glad_glGetInvariantFloatvEXT = NULL; +PFNGLGETLOCALCONSTANTBOOLEANVEXTPROC glad_glGetLocalConstantBooleanvEXT = NULL; +PFNGLGETLOCALCONSTANTINTEGERVEXTPROC glad_glGetLocalConstantIntegervEXT = NULL; +PFNGLGETLOCALCONSTANTFLOATVEXTPROC glad_glGetLocalConstantFloatvEXT = NULL; +PFNGLVERTEXWEIGHTFEXTPROC glad_glVertexWeightfEXT = NULL; +PFNGLVERTEXWEIGHTFVEXTPROC glad_glVertexWeightfvEXT = NULL; +PFNGLVERTEXWEIGHTPOINTEREXTPROC glad_glVertexWeightPointerEXT = NULL; PFNGLACQUIREKEYEDMUTEXWIN32EXTPROC glad_glAcquireKeyedMutexWin32EXT = NULL; PFNGLRELEASEKEYEDMUTEXWIN32EXTPROC glad_glReleaseKeyedMutexWin32EXT = NULL; PFNGLWINDOWRECTANGLESEXTPROC glad_glWindowRectanglesEXT = NULL; -PFNGLGETTEXTUREHANDLEIMGPROC glad_glGetTextureHandleIMG = NULL; -PFNGLGETTEXTURESAMPLERHANDLEIMGPROC glad_glGetTextureSamplerHandleIMG = NULL; -PFNGLUNIFORMHANDLEUI64IMGPROC glad_glUniformHandleui64IMG = NULL; -PFNGLUNIFORMHANDLEUI64VIMGPROC glad_glUniformHandleui64vIMG = NULL; -PFNGLPROGRAMUNIFORMHANDLEUI64IMGPROC glad_glProgramUniformHandleui64IMG = NULL; -PFNGLPROGRAMUNIFORMHANDLEUI64VIMGPROC glad_glProgramUniformHandleui64vIMG = NULL; -PFNGLFRAMEBUFFERTEXTURE2DDOWNSAMPLEIMGPROC glad_glFramebufferTexture2DDownsampleIMG = NULL; -PFNGLFRAMEBUFFERTEXTURELAYERDOWNSAMPLEIMGPROC glad_glFramebufferTextureLayerDownsampleIMG = NULL; -PFNGLRENDERBUFFERSTORAGEMULTISAMPLEIMGPROC glad_glRenderbufferStorageMultisampleIMG = NULL; -PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEIMGPROC glad_glFramebufferTexture2DMultisampleIMG = NULL; +PFNGLIMPORTSYNCEXTPROC glad_glImportSyncEXT = NULL; +PFNGLFRAMETERMINATORGREMEDYPROC glad_glFrameTerminatorGREMEDY = NULL; +PFNGLSTRINGMARKERGREMEDYPROC glad_glStringMarkerGREMEDY = NULL; +PFNGLIMAGETRANSFORMPARAMETERIHPPROC glad_glImageTransformParameteriHP = NULL; +PFNGLIMAGETRANSFORMPARAMETERFHPPROC glad_glImageTransformParameterfHP = NULL; +PFNGLIMAGETRANSFORMPARAMETERIVHPPROC glad_glImageTransformParameterivHP = NULL; +PFNGLIMAGETRANSFORMPARAMETERFVHPPROC glad_glImageTransformParameterfvHP = NULL; +PFNGLGETIMAGETRANSFORMPARAMETERIVHPPROC glad_glGetImageTransformParameterivHP = NULL; +PFNGLGETIMAGETRANSFORMPARAMETERFVHPPROC glad_glGetImageTransformParameterfvHP = NULL; +PFNGLMULTIMODEDRAWARRAYSIBMPROC glad_glMultiModeDrawArraysIBM = NULL; +PFNGLMULTIMODEDRAWELEMENTSIBMPROC glad_glMultiModeDrawElementsIBM = NULL; +PFNGLFLUSHSTATICDATAIBMPROC glad_glFlushStaticDataIBM = NULL; +PFNGLCOLORPOINTERLISTIBMPROC glad_glColorPointerListIBM = NULL; +PFNGLSECONDARYCOLORPOINTERLISTIBMPROC glad_glSecondaryColorPointerListIBM = NULL; +PFNGLEDGEFLAGPOINTERLISTIBMPROC glad_glEdgeFlagPointerListIBM = NULL; +PFNGLFOGCOORDPOINTERLISTIBMPROC glad_glFogCoordPointerListIBM = NULL; +PFNGLINDEXPOINTERLISTIBMPROC glad_glIndexPointerListIBM = NULL; +PFNGLNORMALPOINTERLISTIBMPROC glad_glNormalPointerListIBM = NULL; +PFNGLTEXCOORDPOINTERLISTIBMPROC glad_glTexCoordPointerListIBM = NULL; +PFNGLVERTEXPOINTERLISTIBMPROC glad_glVertexPointerListIBM = NULL; +PFNGLBLENDFUNCSEPARATEINGRPROC glad_glBlendFuncSeparateINGR = NULL; PFNGLAPPLYFRAMEBUFFERATTACHMENTCMAAINTELPROC glad_glApplyFramebufferAttachmentCMAAINTEL = NULL; +PFNGLSYNCTEXTUREINTELPROC glad_glSyncTextureINTEL = NULL; +PFNGLUNMAPTEXTURE2DINTELPROC glad_glUnmapTexture2DINTEL = NULL; +PFNGLMAPTEXTURE2DINTELPROC glad_glMapTexture2DINTEL = NULL; +PFNGLVERTEXPOINTERVINTELPROC glad_glVertexPointervINTEL = NULL; +PFNGLNORMALPOINTERVINTELPROC glad_glNormalPointervINTEL = NULL; +PFNGLCOLORPOINTERVINTELPROC glad_glColorPointervINTEL = NULL; +PFNGLTEXCOORDPOINTERVINTELPROC glad_glTexCoordPointervINTEL = NULL; PFNGLBEGINPERFQUERYINTELPROC glad_glBeginPerfQueryINTEL = NULL; PFNGLCREATEPERFQUERYINTELPROC glad_glCreatePerfQueryINTEL = NULL; PFNGLDELETEPERFQUERYINTELPROC glad_glDeletePerfQueryINTEL = NULL; @@ -1396,6 +4284,51 @@ PFNGLGETNUNIFORMIVKHRPROC glad_glGetnUniformivKHR = NULL; PFNGLGETNUNIFORMUIVKHRPROC glad_glGetnUniformuivKHR = NULL; PFNGLFRAMEBUFFERPARAMETERIMESAPROC glad_glFramebufferParameteriMESA = NULL; PFNGLGETFRAMEBUFFERPARAMETERIVMESAPROC glad_glGetFramebufferParameterivMESA = NULL; +PFNGLRESIZEBUFFERSMESAPROC glad_glResizeBuffersMESA = NULL; +PFNGLWINDOWPOS2DMESAPROC glad_glWindowPos2dMESA = NULL; +PFNGLWINDOWPOS2DVMESAPROC glad_glWindowPos2dvMESA = NULL; +PFNGLWINDOWPOS2FMESAPROC glad_glWindowPos2fMESA = NULL; +PFNGLWINDOWPOS2FVMESAPROC glad_glWindowPos2fvMESA = NULL; +PFNGLWINDOWPOS2IMESAPROC glad_glWindowPos2iMESA = NULL; +PFNGLWINDOWPOS2IVMESAPROC glad_glWindowPos2ivMESA = NULL; +PFNGLWINDOWPOS2SMESAPROC glad_glWindowPos2sMESA = NULL; +PFNGLWINDOWPOS2SVMESAPROC glad_glWindowPos2svMESA = NULL; +PFNGLWINDOWPOS3DMESAPROC glad_glWindowPos3dMESA = NULL; +PFNGLWINDOWPOS3DVMESAPROC glad_glWindowPos3dvMESA = NULL; +PFNGLWINDOWPOS3FMESAPROC glad_glWindowPos3fMESA = NULL; +PFNGLWINDOWPOS3FVMESAPROC glad_glWindowPos3fvMESA = NULL; +PFNGLWINDOWPOS3IMESAPROC glad_glWindowPos3iMESA = NULL; +PFNGLWINDOWPOS3IVMESAPROC glad_glWindowPos3ivMESA = NULL; +PFNGLWINDOWPOS3SMESAPROC glad_glWindowPos3sMESA = NULL; +PFNGLWINDOWPOS3SVMESAPROC glad_glWindowPos3svMESA = NULL; +PFNGLWINDOWPOS4DMESAPROC glad_glWindowPos4dMESA = NULL; +PFNGLWINDOWPOS4DVMESAPROC glad_glWindowPos4dvMESA = NULL; +PFNGLWINDOWPOS4FMESAPROC glad_glWindowPos4fMESA = NULL; +PFNGLWINDOWPOS4FVMESAPROC glad_glWindowPos4fvMESA = NULL; +PFNGLWINDOWPOS4IMESAPROC glad_glWindowPos4iMESA = NULL; +PFNGLWINDOWPOS4IVMESAPROC glad_glWindowPos4ivMESA = NULL; +PFNGLWINDOWPOS4SMESAPROC glad_glWindowPos4sMESA = NULL; +PFNGLWINDOWPOS4SVMESAPROC glad_glWindowPos4svMESA = NULL; +PFNGLBEGINCONDITIONALRENDERNVXPROC glad_glBeginConditionalRenderNVX = NULL; +PFNGLENDCONDITIONALRENDERNVXPROC glad_glEndConditionalRenderNVX = NULL; +PFNGLUPLOADGPUMASKNVXPROC glad_glUploadGpuMaskNVX = NULL; +PFNGLMULTICASTVIEWPORTARRAYVNVXPROC glad_glMulticastViewportArrayvNVX = NULL; +PFNGLMULTICASTVIEWPORTPOSITIONWSCALENVXPROC glad_glMulticastViewportPositionWScaleNVX = NULL; +PFNGLMULTICASTSCISSORARRAYVNVXPROC glad_glMulticastScissorArrayvNVX = NULL; +PFNGLASYNCCOPYBUFFERSUBDATANVXPROC glad_glAsyncCopyBufferSubDataNVX = NULL; +PFNGLASYNCCOPYIMAGESUBDATANVXPROC glad_glAsyncCopyImageSubDataNVX = NULL; +PFNGLLGPUNAMEDBUFFERSUBDATANVXPROC glad_glLGPUNamedBufferSubDataNVX = NULL; +PFNGLLGPUCOPYIMAGESUBDATANVXPROC glad_glLGPUCopyImageSubDataNVX = NULL; +PFNGLLGPUINTERLOCKNVXPROC glad_glLGPUInterlockNVX = NULL; +PFNGLCREATEPROGRESSFENCENVXPROC glad_glCreateProgressFenceNVX = NULL; +PFNGLSIGNALSEMAPHOREUI64NVXPROC glad_glSignalSemaphoreui64NVX = NULL; +PFNGLWAITSEMAPHOREUI64NVXPROC glad_glWaitSemaphoreui64NVX = NULL; +PFNGLCLIENTWAITSEMAPHOREUI64NVXPROC glad_glClientWaitSemaphoreui64NVX = NULL; +PFNGLALPHATOCOVERAGEDITHERCONTROLNVPROC glad_glAlphaToCoverageDitherControlNV = NULL; +PFNGLMULTIDRAWARRAYSINDIRECTBINDLESSNVPROC glad_glMultiDrawArraysIndirectBindlessNV = NULL; +PFNGLMULTIDRAWELEMENTSINDIRECTBINDLESSNVPROC glad_glMultiDrawElementsIndirectBindlessNV = NULL; +PFNGLMULTIDRAWARRAYSINDIRECTBINDLESSCOUNTNVPROC glad_glMultiDrawArraysIndirectBindlessCountNV = NULL; +PFNGLMULTIDRAWELEMENTSINDIRECTBINDLESSCOUNTNVPROC glad_glMultiDrawElementsIndirectBindlessCountNV = NULL; PFNGLGETTEXTUREHANDLENVPROC glad_glGetTextureHandleNV = NULL; PFNGLGETTEXTURESAMPLERHANDLENVPROC glad_glGetTextureSamplerHandleNV = NULL; PFNGLMAKETEXTUREHANDLERESIDENTNVPROC glad_glMakeTextureHandleResidentNV = NULL; @@ -1412,21 +4345,50 @@ PFNGLISIMAGEHANDLERESIDENTNVPROC glad_glIsImageHandleResidentNV = NULL; PFNGLBLENDPARAMETERINVPROC glad_glBlendParameteriNV = NULL; PFNGLBLENDBARRIERNVPROC glad_glBlendBarrierNV = NULL; PFNGLVIEWPORTPOSITIONWSCALENVPROC glad_glViewportPositionWScaleNV = NULL; +PFNGLCREATESTATESNVPROC glad_glCreateStatesNV = NULL; +PFNGLDELETESTATESNVPROC glad_glDeleteStatesNV = NULL; +PFNGLISSTATENVPROC glad_glIsStateNV = NULL; +PFNGLSTATECAPTURENVPROC glad_glStateCaptureNV = NULL; +PFNGLGETCOMMANDHEADERNVPROC glad_glGetCommandHeaderNV = NULL; +PFNGLGETSTAGEINDEXNVPROC glad_glGetStageIndexNV = NULL; +PFNGLDRAWCOMMANDSNVPROC glad_glDrawCommandsNV = NULL; +PFNGLDRAWCOMMANDSADDRESSNVPROC glad_glDrawCommandsAddressNV = NULL; +PFNGLDRAWCOMMANDSSTATESNVPROC glad_glDrawCommandsStatesNV = NULL; +PFNGLDRAWCOMMANDSSTATESADDRESSNVPROC glad_glDrawCommandsStatesAddressNV = NULL; +PFNGLCREATECOMMANDLISTSNVPROC glad_glCreateCommandListsNV = NULL; +PFNGLDELETECOMMANDLISTSNVPROC glad_glDeleteCommandListsNV = NULL; +PFNGLISCOMMANDLISTNVPROC glad_glIsCommandListNV = NULL; +PFNGLLISTDRAWCOMMANDSSTATESCLIENTNVPROC glad_glListDrawCommandsStatesClientNV = NULL; +PFNGLCOMMANDLISTSEGMENTSNVPROC glad_glCommandListSegmentsNV = NULL; +PFNGLCOMPILECOMMANDLISTNVPROC glad_glCompileCommandListNV = NULL; +PFNGLCALLCOMMANDLISTNVPROC glad_glCallCommandListNV = NULL; PFNGLBEGINCONDITIONALRENDERNVPROC glad_glBeginConditionalRenderNV = NULL; PFNGLENDCONDITIONALRENDERNVPROC glad_glEndConditionalRenderNV = NULL; PFNGLSUBPIXELPRECISIONBIASNVPROC glad_glSubpixelPrecisionBiasNV = NULL; +PFNGLCONSERVATIVERASTERPARAMETERFNVPROC glad_glConservativeRasterParameterfNV = NULL; PFNGLCONSERVATIVERASTERPARAMETERINVPROC glad_glConservativeRasterParameteriNV = NULL; -PFNGLCOPYBUFFERSUBDATANVPROC glad_glCopyBufferSubDataNV = NULL; -PFNGLCOVERAGEMASKNVPROC glad_glCoverageMaskNV = NULL; -PFNGLCOVERAGEOPERATIONNVPROC glad_glCoverageOperationNV = NULL; -PFNGLDRAWBUFFERSNVPROC glad_glDrawBuffersNV = NULL; -PFNGLDRAWARRAYSINSTANCEDNVPROC glad_glDrawArraysInstancedNV = NULL; -PFNGLDRAWELEMENTSINSTANCEDNVPROC glad_glDrawElementsInstancedNV = NULL; +PFNGLCOPYIMAGESUBDATANVPROC glad_glCopyImageSubDataNV = NULL; +PFNGLDEPTHRANGEDNVPROC glad_glDepthRangedNV = NULL; +PFNGLCLEARDEPTHDNVPROC glad_glClearDepthdNV = NULL; +PFNGLDEPTHBOUNDSDNVPROC glad_glDepthBoundsdNV = NULL; +PFNGLDRAWTEXTURENVPROC glad_glDrawTextureNV = NULL; PFNGLDRAWVKIMAGENVPROC glad_glDrawVkImageNV = NULL; PFNGLGETVKPROCADDRNVPROC glad_glGetVkProcAddrNV = NULL; PFNGLWAITVKSEMAPHORENVPROC glad_glWaitVkSemaphoreNV = NULL; PFNGLSIGNALVKSEMAPHORENVPROC glad_glSignalVkSemaphoreNV = NULL; PFNGLSIGNALVKFENCENVPROC glad_glSignalVkFenceNV = NULL; +PFNGLMAPCONTROLPOINTSNVPROC glad_glMapControlPointsNV = NULL; +PFNGLMAPPARAMETERIVNVPROC glad_glMapParameterivNV = NULL; +PFNGLMAPPARAMETERFVNVPROC glad_glMapParameterfvNV = NULL; +PFNGLGETMAPCONTROLPOINTSNVPROC glad_glGetMapControlPointsNV = NULL; +PFNGLGETMAPPARAMETERIVNVPROC glad_glGetMapParameterivNV = NULL; +PFNGLGETMAPPARAMETERFVNVPROC glad_glGetMapParameterfvNV = NULL; +PFNGLGETMAPATTRIBPARAMETERIVNVPROC glad_glGetMapAttribParameterivNV = NULL; +PFNGLGETMAPATTRIBPARAMETERFVNVPROC glad_glGetMapAttribParameterfvNV = NULL; +PFNGLEVALMAPSNVPROC glad_glEvalMapsNV = NULL; +PFNGLGETMULTISAMPLEFVNVPROC glad_glGetMultisamplefvNV = NULL; +PFNGLSAMPLEMASKINDEXEDNVPROC glad_glSampleMaskIndexedNV = NULL; +PFNGLTEXRENDERBUFFERNVPROC glad_glTexRenderbufferNV = NULL; PFNGLDELETEFENCESNVPROC glad_glDeleteFencesNV = NULL; PFNGLGENFENCESNVPROC glad_glGenFencesNV = NULL; PFNGLISFENCENVPROC glad_glIsFenceNV = NULL; @@ -1435,45 +4397,95 @@ PFNGLGETFENCEIVNVPROC glad_glGetFenceivNV = NULL; PFNGLFINISHFENCENVPROC glad_glFinishFenceNV = NULL; PFNGLSETFENCENVPROC glad_glSetFenceNV = NULL; PFNGLFRAGMENTCOVERAGECOLORNVPROC glad_glFragmentCoverageColorNV = NULL; -PFNGLBLITFRAMEBUFFERNVPROC glad_glBlitFramebufferNV = NULL; +PFNGLPROGRAMNAMEDPARAMETER4FNVPROC glad_glProgramNamedParameter4fNV = NULL; +PFNGLPROGRAMNAMEDPARAMETER4FVNVPROC glad_glProgramNamedParameter4fvNV = NULL; +PFNGLPROGRAMNAMEDPARAMETER4DNVPROC glad_glProgramNamedParameter4dNV = NULL; +PFNGLPROGRAMNAMEDPARAMETER4DVNVPROC glad_glProgramNamedParameter4dvNV = NULL; +PFNGLGETPROGRAMNAMEDPARAMETERFVNVPROC glad_glGetProgramNamedParameterfvNV = NULL; +PFNGLGETPROGRAMNAMEDPARAMETERDVNVPROC glad_glGetProgramNamedParameterdvNV = NULL; PFNGLCOVERAGEMODULATIONTABLENVPROC glad_glCoverageModulationTableNV = NULL; PFNGLGETCOVERAGEMODULATIONTABLENVPROC glad_glGetCoverageModulationTableNV = NULL; PFNGLCOVERAGEMODULATIONNVPROC glad_glCoverageModulationNV = NULL; -PFNGLRENDERBUFFERSTORAGEMULTISAMPLENVPROC glad_glRenderbufferStorageMultisampleNV = NULL; -PFNGLUNIFORM1I64NVPROC glad_glUniform1i64NV = NULL; -PFNGLUNIFORM2I64NVPROC glad_glUniform2i64NV = NULL; -PFNGLUNIFORM3I64NVPROC glad_glUniform3i64NV = NULL; -PFNGLUNIFORM4I64NVPROC glad_glUniform4i64NV = NULL; -PFNGLUNIFORM1I64VNVPROC glad_glUniform1i64vNV = NULL; -PFNGLUNIFORM2I64VNVPROC glad_glUniform2i64vNV = NULL; -PFNGLUNIFORM3I64VNVPROC glad_glUniform3i64vNV = NULL; -PFNGLUNIFORM4I64VNVPROC glad_glUniform4i64vNV = NULL; -PFNGLUNIFORM1UI64NVPROC glad_glUniform1ui64NV = NULL; -PFNGLUNIFORM2UI64NVPROC glad_glUniform2ui64NV = NULL; -PFNGLUNIFORM3UI64NVPROC glad_glUniform3ui64NV = NULL; -PFNGLUNIFORM4UI64NVPROC glad_glUniform4ui64NV = NULL; -PFNGLUNIFORM1UI64VNVPROC glad_glUniform1ui64vNV = NULL; -PFNGLUNIFORM2UI64VNVPROC glad_glUniform2ui64vNV = NULL; -PFNGLUNIFORM3UI64VNVPROC glad_glUniform3ui64vNV = NULL; -PFNGLUNIFORM4UI64VNVPROC glad_glUniform4ui64vNV = NULL; -PFNGLGETUNIFORMI64VNVPROC glad_glGetUniformi64vNV = NULL; -PFNGLPROGRAMUNIFORM1I64NVPROC glad_glProgramUniform1i64NV = NULL; -PFNGLPROGRAMUNIFORM2I64NVPROC glad_glProgramUniform2i64NV = NULL; -PFNGLPROGRAMUNIFORM3I64NVPROC glad_glProgramUniform3i64NV = NULL; -PFNGLPROGRAMUNIFORM4I64NVPROC glad_glProgramUniform4i64NV = NULL; -PFNGLPROGRAMUNIFORM1I64VNVPROC glad_glProgramUniform1i64vNV = NULL; -PFNGLPROGRAMUNIFORM2I64VNVPROC glad_glProgramUniform2i64vNV = NULL; -PFNGLPROGRAMUNIFORM3I64VNVPROC glad_glProgramUniform3i64vNV = NULL; -PFNGLPROGRAMUNIFORM4I64VNVPROC glad_glProgramUniform4i64vNV = NULL; -PFNGLPROGRAMUNIFORM1UI64NVPROC glad_glProgramUniform1ui64NV = NULL; -PFNGLPROGRAMUNIFORM2UI64NVPROC glad_glProgramUniform2ui64NV = NULL; -PFNGLPROGRAMUNIFORM3UI64NVPROC glad_glProgramUniform3ui64NV = NULL; -PFNGLPROGRAMUNIFORM4UI64NVPROC glad_glProgramUniform4ui64NV = NULL; -PFNGLPROGRAMUNIFORM1UI64VNVPROC glad_glProgramUniform1ui64vNV = NULL; -PFNGLPROGRAMUNIFORM2UI64VNVPROC glad_glProgramUniform2ui64vNV = NULL; -PFNGLPROGRAMUNIFORM3UI64VNVPROC glad_glProgramUniform3ui64vNV = NULL; -PFNGLPROGRAMUNIFORM4UI64VNVPROC glad_glProgramUniform4ui64vNV = NULL; -PFNGLVERTEXATTRIBDIVISORNVPROC glad_glVertexAttribDivisorNV = NULL; +PFNGLRENDERBUFFERSTORAGEMULTISAMPLECOVERAGENVPROC glad_glRenderbufferStorageMultisampleCoverageNV = NULL; +PFNGLPROGRAMVERTEXLIMITNVPROC glad_glProgramVertexLimitNV = NULL; +PFNGLFRAMEBUFFERTEXTUREEXTPROC glad_glFramebufferTextureEXT = NULL; +PFNGLFRAMEBUFFERTEXTUREFACEEXTPROC glad_glFramebufferTextureFaceEXT = NULL; +PFNGLRENDERGPUMASKNVPROC glad_glRenderGpuMaskNV = NULL; +PFNGLMULTICASTBUFFERSUBDATANVPROC glad_glMulticastBufferSubDataNV = NULL; +PFNGLMULTICASTCOPYBUFFERSUBDATANVPROC glad_glMulticastCopyBufferSubDataNV = NULL; +PFNGLMULTICASTCOPYIMAGESUBDATANVPROC glad_glMulticastCopyImageSubDataNV = NULL; +PFNGLMULTICASTBLITFRAMEBUFFERNVPROC glad_glMulticastBlitFramebufferNV = NULL; +PFNGLMULTICASTFRAMEBUFFERSAMPLELOCATIONSFVNVPROC glad_glMulticastFramebufferSampleLocationsfvNV = NULL; +PFNGLMULTICASTBARRIERNVPROC glad_glMulticastBarrierNV = NULL; +PFNGLMULTICASTWAITSYNCNVPROC glad_glMulticastWaitSyncNV = NULL; +PFNGLMULTICASTGETQUERYOBJECTIVNVPROC glad_glMulticastGetQueryObjectivNV = NULL; +PFNGLMULTICASTGETQUERYOBJECTUIVNVPROC glad_glMulticastGetQueryObjectuivNV = NULL; +PFNGLMULTICASTGETQUERYOBJECTI64VNVPROC glad_glMulticastGetQueryObjecti64vNV = NULL; +PFNGLMULTICASTGETQUERYOBJECTUI64VNVPROC glad_glMulticastGetQueryObjectui64vNV = NULL; +PFNGLPROGRAMLOCALPARAMETERI4INVPROC glad_glProgramLocalParameterI4iNV = NULL; +PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC glad_glProgramLocalParameterI4ivNV = NULL; +PFNGLPROGRAMLOCALPARAMETERSI4IVNVPROC glad_glProgramLocalParametersI4ivNV = NULL; +PFNGLPROGRAMLOCALPARAMETERI4UINVPROC glad_glProgramLocalParameterI4uiNV = NULL; +PFNGLPROGRAMLOCALPARAMETERI4UIVNVPROC glad_glProgramLocalParameterI4uivNV = NULL; +PFNGLPROGRAMLOCALPARAMETERSI4UIVNVPROC glad_glProgramLocalParametersI4uivNV = NULL; +PFNGLPROGRAMENVPARAMETERI4INVPROC glad_glProgramEnvParameterI4iNV = NULL; +PFNGLPROGRAMENVPARAMETERI4IVNVPROC glad_glProgramEnvParameterI4ivNV = NULL; +PFNGLPROGRAMENVPARAMETERSI4IVNVPROC glad_glProgramEnvParametersI4ivNV = NULL; +PFNGLPROGRAMENVPARAMETERI4UINVPROC glad_glProgramEnvParameterI4uiNV = NULL; +PFNGLPROGRAMENVPARAMETERI4UIVNVPROC glad_glProgramEnvParameterI4uivNV = NULL; +PFNGLPROGRAMENVPARAMETERSI4UIVNVPROC glad_glProgramEnvParametersI4uivNV = NULL; +PFNGLGETPROGRAMLOCALPARAMETERIIVNVPROC glad_glGetProgramLocalParameterIivNV = NULL; +PFNGLGETPROGRAMLOCALPARAMETERIUIVNVPROC glad_glGetProgramLocalParameterIuivNV = NULL; +PFNGLGETPROGRAMENVPARAMETERIIVNVPROC glad_glGetProgramEnvParameterIivNV = NULL; +PFNGLGETPROGRAMENVPARAMETERIUIVNVPROC glad_glGetProgramEnvParameterIuivNV = NULL; +PFNGLPROGRAMSUBROUTINEPARAMETERSUIVNVPROC glad_glProgramSubroutineParametersuivNV = NULL; +PFNGLGETPROGRAMSUBROUTINEPARAMETERUIVNVPROC glad_glGetProgramSubroutineParameteruivNV = NULL; +PFNGLVERTEX2HNVPROC glad_glVertex2hNV = NULL; +PFNGLVERTEX2HVNVPROC glad_glVertex2hvNV = NULL; +PFNGLVERTEX3HNVPROC glad_glVertex3hNV = NULL; +PFNGLVERTEX3HVNVPROC glad_glVertex3hvNV = NULL; +PFNGLVERTEX4HNVPROC glad_glVertex4hNV = NULL; +PFNGLVERTEX4HVNVPROC glad_glVertex4hvNV = NULL; +PFNGLNORMAL3HNVPROC glad_glNormal3hNV = NULL; +PFNGLNORMAL3HVNVPROC glad_glNormal3hvNV = NULL; +PFNGLCOLOR3HNVPROC glad_glColor3hNV = NULL; +PFNGLCOLOR3HVNVPROC glad_glColor3hvNV = NULL; +PFNGLCOLOR4HNVPROC glad_glColor4hNV = NULL; +PFNGLCOLOR4HVNVPROC glad_glColor4hvNV = NULL; +PFNGLTEXCOORD1HNVPROC glad_glTexCoord1hNV = NULL; +PFNGLTEXCOORD1HVNVPROC glad_glTexCoord1hvNV = NULL; +PFNGLTEXCOORD2HNVPROC glad_glTexCoord2hNV = NULL; +PFNGLTEXCOORD2HVNVPROC glad_glTexCoord2hvNV = NULL; +PFNGLTEXCOORD3HNVPROC glad_glTexCoord3hNV = NULL; +PFNGLTEXCOORD3HVNVPROC glad_glTexCoord3hvNV = NULL; +PFNGLTEXCOORD4HNVPROC glad_glTexCoord4hNV = NULL; +PFNGLTEXCOORD4HVNVPROC glad_glTexCoord4hvNV = NULL; +PFNGLMULTITEXCOORD1HNVPROC glad_glMultiTexCoord1hNV = NULL; +PFNGLMULTITEXCOORD1HVNVPROC glad_glMultiTexCoord1hvNV = NULL; +PFNGLMULTITEXCOORD2HNVPROC glad_glMultiTexCoord2hNV = NULL; +PFNGLMULTITEXCOORD2HVNVPROC glad_glMultiTexCoord2hvNV = NULL; +PFNGLMULTITEXCOORD3HNVPROC glad_glMultiTexCoord3hNV = NULL; +PFNGLMULTITEXCOORD3HVNVPROC glad_glMultiTexCoord3hvNV = NULL; +PFNGLMULTITEXCOORD4HNVPROC glad_glMultiTexCoord4hNV = NULL; +PFNGLMULTITEXCOORD4HVNVPROC glad_glMultiTexCoord4hvNV = NULL; +PFNGLVERTEXATTRIB1HNVPROC glad_glVertexAttrib1hNV = NULL; +PFNGLVERTEXATTRIB1HVNVPROC glad_glVertexAttrib1hvNV = NULL; +PFNGLVERTEXATTRIB2HNVPROC glad_glVertexAttrib2hNV = NULL; +PFNGLVERTEXATTRIB2HVNVPROC glad_glVertexAttrib2hvNV = NULL; +PFNGLVERTEXATTRIB3HNVPROC glad_glVertexAttrib3hNV = NULL; +PFNGLVERTEXATTRIB3HVNVPROC glad_glVertexAttrib3hvNV = NULL; +PFNGLVERTEXATTRIB4HNVPROC glad_glVertexAttrib4hNV = NULL; +PFNGLVERTEXATTRIB4HVNVPROC glad_glVertexAttrib4hvNV = NULL; +PFNGLVERTEXATTRIBS1HVNVPROC glad_glVertexAttribs1hvNV = NULL; +PFNGLVERTEXATTRIBS2HVNVPROC glad_glVertexAttribs2hvNV = NULL; +PFNGLVERTEXATTRIBS3HVNVPROC glad_glVertexAttribs3hvNV = NULL; +PFNGLVERTEXATTRIBS4HVNVPROC glad_glVertexAttribs4hvNV = NULL; +PFNGLFOGCOORDHNVPROC glad_glFogCoordhNV = NULL; +PFNGLFOGCOORDHVNVPROC glad_glFogCoordhvNV = NULL; +PFNGLSECONDARYCOLOR3HNVPROC glad_glSecondaryColor3hNV = NULL; +PFNGLSECONDARYCOLOR3HVNVPROC glad_glSecondaryColor3hvNV = NULL; +PFNGLVERTEXWEIGHTHNVPROC glad_glVertexWeighthNV = NULL; +PFNGLVERTEXWEIGHTHVNVPROC glad_glVertexWeighthvNV = NULL; PFNGLGETINTERNALFORMATSAMPLEIVNVPROC glad_glGetInternalformatSampleivNV = NULL; PFNGLGETMEMORYOBJECTDETACHEDRESOURCESUIVNVPROC glad_glGetMemoryObjectDetachedResourcesuivNV = NULL; PFNGLRESETMEMORYOBJECTPARAMETERNVPROC glad_glResetMemoryObjectParameterNV = NULL; @@ -1489,12 +4501,16 @@ PFNGLDRAWMESHTASKSNVPROC glad_glDrawMeshTasksNV = NULL; PFNGLDRAWMESHTASKSINDIRECTNVPROC glad_glDrawMeshTasksIndirectNV = NULL; PFNGLMULTIDRAWMESHTASKSINDIRECTNVPROC glad_glMultiDrawMeshTasksIndirectNV = NULL; PFNGLMULTIDRAWMESHTASKSINDIRECTCOUNTNVPROC glad_glMultiDrawMeshTasksIndirectCountNV = NULL; -PFNGLUNIFORMMATRIX2X3FVNVPROC glad_glUniformMatrix2x3fvNV = NULL; -PFNGLUNIFORMMATRIX3X2FVNVPROC glad_glUniformMatrix3x2fvNV = NULL; -PFNGLUNIFORMMATRIX2X4FVNVPROC glad_glUniformMatrix2x4fvNV = NULL; -PFNGLUNIFORMMATRIX4X2FVNVPROC glad_glUniformMatrix4x2fvNV = NULL; -PFNGLUNIFORMMATRIX3X4FVNVPROC glad_glUniformMatrix3x4fvNV = NULL; -PFNGLUNIFORMMATRIX4X3FVNVPROC glad_glUniformMatrix4x3fvNV = NULL; +PFNGLGENOCCLUSIONQUERIESNVPROC glad_glGenOcclusionQueriesNV = NULL; +PFNGLDELETEOCCLUSIONQUERIESNVPROC glad_glDeleteOcclusionQueriesNV = NULL; +PFNGLISOCCLUSIONQUERYNVPROC glad_glIsOcclusionQueryNV = NULL; +PFNGLBEGINOCCLUSIONQUERYNVPROC glad_glBeginOcclusionQueryNV = NULL; +PFNGLENDOCCLUSIONQUERYNVPROC glad_glEndOcclusionQueryNV = NULL; +PFNGLGETOCCLUSIONQUERYIVNVPROC glad_glGetOcclusionQueryivNV = NULL; +PFNGLGETOCCLUSIONQUERYUIVNVPROC glad_glGetOcclusionQueryuivNV = NULL; +PFNGLPROGRAMBUFFERPARAMETERSFVNVPROC glad_glProgramBufferParametersfvNV = NULL; +PFNGLPROGRAMBUFFERPARAMETERSIIVNVPROC glad_glProgramBufferParametersIivNV = NULL; +PFNGLPROGRAMBUFFERPARAMETERSIUIVNVPROC glad_glProgramBufferParametersIuivNV = NULL; PFNGLGENPATHSNVPROC glad_glGenPathsNV = NULL; PFNGLDELETEPATHSNVPROC glad_glDeletePathsNV = NULL; PFNGLISPATHNVPROC glad_glIsPathNV = NULL; @@ -1559,32 +4575,55 @@ PFNGLGETPATHCOLORGENIVNVPROC glad_glGetPathColorGenivNV = NULL; PFNGLGETPATHCOLORGENFVNVPROC glad_glGetPathColorGenfvNV = NULL; PFNGLGETPATHTEXGENIVNVPROC glad_glGetPathTexGenivNV = NULL; PFNGLGETPATHTEXGENFVNVPROC glad_glGetPathTexGenfvNV = NULL; -PFNGLMATRIXFRUSTUMEXTPROC glad_glMatrixFrustumEXT = NULL; -PFNGLMATRIXLOADIDENTITYEXTPROC glad_glMatrixLoadIdentityEXT = NULL; -PFNGLMATRIXLOADTRANSPOSEFEXTPROC glad_glMatrixLoadTransposefEXT = NULL; -PFNGLMATRIXLOADTRANSPOSEDEXTPROC glad_glMatrixLoadTransposedEXT = NULL; -PFNGLMATRIXLOADFEXTPROC glad_glMatrixLoadfEXT = NULL; -PFNGLMATRIXLOADDEXTPROC glad_glMatrixLoaddEXT = NULL; -PFNGLMATRIXMULTTRANSPOSEFEXTPROC glad_glMatrixMultTransposefEXT = NULL; -PFNGLMATRIXMULTTRANSPOSEDEXTPROC glad_glMatrixMultTransposedEXT = NULL; -PFNGLMATRIXMULTFEXTPROC glad_glMatrixMultfEXT = NULL; -PFNGLMATRIXMULTDEXTPROC glad_glMatrixMultdEXT = NULL; -PFNGLMATRIXORTHOEXTPROC glad_glMatrixOrthoEXT = NULL; -PFNGLMATRIXPOPEXTPROC glad_glMatrixPopEXT = NULL; -PFNGLMATRIXPUSHEXTPROC glad_glMatrixPushEXT = NULL; -PFNGLMATRIXROTATEFEXTPROC glad_glMatrixRotatefEXT = NULL; -PFNGLMATRIXROTATEDEXTPROC glad_glMatrixRotatedEXT = NULL; -PFNGLMATRIXSCALEFEXTPROC glad_glMatrixScalefEXT = NULL; -PFNGLMATRIXSCALEDEXTPROC glad_glMatrixScaledEXT = NULL; -PFNGLMATRIXTRANSLATEFEXTPROC glad_glMatrixTranslatefEXT = NULL; -PFNGLMATRIXTRANSLATEDEXTPROC glad_glMatrixTranslatedEXT = NULL; -PFNGLPOLYGONMODENVPROC glad_glPolygonModeNV = NULL; -PFNGLREADBUFFERNVPROC glad_glReadBufferNV = NULL; +PFNGLPIXELDATARANGENVPROC glad_glPixelDataRangeNV = NULL; +PFNGLFLUSHPIXELDATARANGENVPROC glad_glFlushPixelDataRangeNV = NULL; +PFNGLPOINTPARAMETERINVPROC glad_glPointParameteriNV = NULL; +PFNGLPOINTPARAMETERIVNVPROC glad_glPointParameterivNV = NULL; +PFNGLPRESENTFRAMEKEYEDNVPROC glad_glPresentFrameKeyedNV = NULL; +PFNGLPRESENTFRAMEDUALFILLNVPROC glad_glPresentFrameDualFillNV = NULL; +PFNGLGETVIDEOIVNVPROC glad_glGetVideoivNV = NULL; +PFNGLGETVIDEOUIVNVPROC glad_glGetVideouivNV = NULL; +PFNGLGETVIDEOI64VNVPROC glad_glGetVideoi64vNV = NULL; +PFNGLGETVIDEOUI64VNVPROC glad_glGetVideoui64vNV = NULL; +PFNGLPRIMITIVERESTARTNVPROC glad_glPrimitiveRestartNV = NULL; +PFNGLPRIMITIVERESTARTINDEXNVPROC glad_glPrimitiveRestartIndexNV = NULL; +PFNGLQUERYRESOURCENVPROC glad_glQueryResourceNV = NULL; +PFNGLGENQUERYRESOURCETAGNVPROC glad_glGenQueryResourceTagNV = NULL; +PFNGLDELETEQUERYRESOURCETAGNVPROC glad_glDeleteQueryResourceTagNV = NULL; +PFNGLQUERYRESOURCETAGNVPROC glad_glQueryResourceTagNV = NULL; +PFNGLCOMBINERPARAMETERFVNVPROC glad_glCombinerParameterfvNV = NULL; +PFNGLCOMBINERPARAMETERFNVPROC glad_glCombinerParameterfNV = NULL; +PFNGLCOMBINERPARAMETERIVNVPROC glad_glCombinerParameterivNV = NULL; +PFNGLCOMBINERPARAMETERINVPROC glad_glCombinerParameteriNV = NULL; +PFNGLCOMBINERINPUTNVPROC glad_glCombinerInputNV = NULL; +PFNGLCOMBINEROUTPUTNVPROC glad_glCombinerOutputNV = NULL; +PFNGLFINALCOMBINERINPUTNVPROC glad_glFinalCombinerInputNV = NULL; +PFNGLGETCOMBINERINPUTPARAMETERFVNVPROC glad_glGetCombinerInputParameterfvNV = NULL; +PFNGLGETCOMBINERINPUTPARAMETERIVNVPROC glad_glGetCombinerInputParameterivNV = NULL; +PFNGLGETCOMBINEROUTPUTPARAMETERFVNVPROC glad_glGetCombinerOutputParameterfvNV = NULL; +PFNGLGETCOMBINEROUTPUTPARAMETERIVNVPROC glad_glGetCombinerOutputParameterivNV = NULL; +PFNGLGETFINALCOMBINERINPUTPARAMETERFVNVPROC glad_glGetFinalCombinerInputParameterfvNV = NULL; +PFNGLGETFINALCOMBINERINPUTPARAMETERIVNVPROC glad_glGetFinalCombinerInputParameterivNV = NULL; +PFNGLCOMBINERSTAGEPARAMETERFVNVPROC glad_glCombinerStageParameterfvNV = NULL; +PFNGLGETCOMBINERSTAGEPARAMETERFVNVPROC glad_glGetCombinerStageParameterfvNV = NULL; PFNGLFRAMEBUFFERSAMPLELOCATIONSFVNVPROC glad_glFramebufferSampleLocationsfvNV = NULL; PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVNVPROC glad_glNamedFramebufferSampleLocationsfvNV = NULL; PFNGLRESOLVEDEPTHVALUESNVPROC glad_glResolveDepthValuesNV = NULL; PFNGLSCISSOREXCLUSIVENVPROC glad_glScissorExclusiveNV = NULL; PFNGLSCISSOREXCLUSIVEARRAYVNVPROC glad_glScissorExclusiveArrayvNV = NULL; +PFNGLMAKEBUFFERRESIDENTNVPROC glad_glMakeBufferResidentNV = NULL; +PFNGLMAKEBUFFERNONRESIDENTNVPROC glad_glMakeBufferNonResidentNV = NULL; +PFNGLISBUFFERRESIDENTNVPROC glad_glIsBufferResidentNV = NULL; +PFNGLMAKENAMEDBUFFERRESIDENTNVPROC glad_glMakeNamedBufferResidentNV = NULL; +PFNGLMAKENAMEDBUFFERNONRESIDENTNVPROC glad_glMakeNamedBufferNonResidentNV = NULL; +PFNGLISNAMEDBUFFERRESIDENTNVPROC glad_glIsNamedBufferResidentNV = NULL; +PFNGLGETBUFFERPARAMETERUI64VNVPROC glad_glGetBufferParameterui64vNV = NULL; +PFNGLGETNAMEDBUFFERPARAMETERUI64VNVPROC glad_glGetNamedBufferParameterui64vNV = NULL; +PFNGLGETINTEGERUI64VNVPROC glad_glGetIntegerui64vNV = NULL; +PFNGLUNIFORMUI64NVPROC glad_glUniformui64NV = NULL; +PFNGLUNIFORMUI64VNVPROC glad_glUniformui64vNV = NULL; +PFNGLPROGRAMUNIFORMUI64NVPROC glad_glProgramUniformui64NV = NULL; +PFNGLPROGRAMUNIFORMUI64VNVPROC glad_glProgramUniformui64vNV = NULL; PFNGLBINDSHADINGRATEIMAGENVPROC glad_glBindShadingRateImageNV = NULL; PFNGLGETSHADINGRATEIMAGEPALETTENVPROC glad_glGetShadingRateImagePaletteNV = NULL; PFNGLGETSHADINGRATESAMPLELOCATIONIVNVPROC glad_glGetShadingRateSampleLocationivNV = NULL; @@ -1593,9 +4632,603 @@ PFNGLSHADINGRATEIMAGEPALETTENVPROC glad_glShadingRateImagePaletteNV = NULL; PFNGLSHADINGRATESAMPLEORDERNVPROC glad_glShadingRateSampleOrderNV = NULL; PFNGLSHADINGRATESAMPLEORDERCUSTOMNVPROC glad_glShadingRateSampleOrderCustomNV = NULL; PFNGLTEXTUREBARRIERNVPROC glad_glTextureBarrierNV = NULL; +PFNGLTEXIMAGE2DMULTISAMPLECOVERAGENVPROC glad_glTexImage2DMultisampleCoverageNV = NULL; +PFNGLTEXIMAGE3DMULTISAMPLECOVERAGENVPROC glad_glTexImage3DMultisampleCoverageNV = NULL; +PFNGLTEXTUREIMAGE2DMULTISAMPLENVPROC glad_glTextureImage2DMultisampleNV = NULL; +PFNGLTEXTUREIMAGE3DMULTISAMPLENVPROC glad_glTextureImage3DMultisampleNV = NULL; +PFNGLTEXTUREIMAGE2DMULTISAMPLECOVERAGENVPROC glad_glTextureImage2DMultisampleCoverageNV = NULL; +PFNGLTEXTUREIMAGE3DMULTISAMPLECOVERAGENVPROC glad_glTextureImage3DMultisampleCoverageNV = NULL; PFNGLCREATESEMAPHORESNVPROC glad_glCreateSemaphoresNV = NULL; PFNGLSEMAPHOREPARAMETERIVNVPROC glad_glSemaphoreParameterivNV = NULL; PFNGLGETSEMAPHOREPARAMETERIVNVPROC glad_glGetSemaphoreParameterivNV = NULL; +PFNGLBEGINTRANSFORMFEEDBACKNVPROC glad_glBeginTransformFeedbackNV = NULL; +PFNGLENDTRANSFORMFEEDBACKNVPROC glad_glEndTransformFeedbackNV = NULL; +PFNGLTRANSFORMFEEDBACKATTRIBSNVPROC glad_glTransformFeedbackAttribsNV = NULL; +PFNGLBINDBUFFERRANGENVPROC glad_glBindBufferRangeNV = NULL; +PFNGLBINDBUFFEROFFSETNVPROC glad_glBindBufferOffsetNV = NULL; +PFNGLBINDBUFFERBASENVPROC glad_glBindBufferBaseNV = NULL; +PFNGLTRANSFORMFEEDBACKVARYINGSNVPROC glad_glTransformFeedbackVaryingsNV = NULL; +PFNGLACTIVEVARYINGNVPROC glad_glActiveVaryingNV = NULL; +PFNGLGETVARYINGLOCATIONNVPROC glad_glGetVaryingLocationNV = NULL; +PFNGLGETACTIVEVARYINGNVPROC glad_glGetActiveVaryingNV = NULL; +PFNGLGETTRANSFORMFEEDBACKVARYINGNVPROC glad_glGetTransformFeedbackVaryingNV = NULL; +PFNGLTRANSFORMFEEDBACKSTREAMATTRIBSNVPROC glad_glTransformFeedbackStreamAttribsNV = NULL; +PFNGLBINDTRANSFORMFEEDBACKNVPROC glad_glBindTransformFeedbackNV = NULL; +PFNGLDELETETRANSFORMFEEDBACKSNVPROC glad_glDeleteTransformFeedbacksNV = NULL; +PFNGLGENTRANSFORMFEEDBACKSNVPROC glad_glGenTransformFeedbacksNV = NULL; +PFNGLISTRANSFORMFEEDBACKNVPROC glad_glIsTransformFeedbackNV = NULL; +PFNGLPAUSETRANSFORMFEEDBACKNVPROC glad_glPauseTransformFeedbackNV = NULL; +PFNGLRESUMETRANSFORMFEEDBACKNVPROC glad_glResumeTransformFeedbackNV = NULL; +PFNGLDRAWTRANSFORMFEEDBACKNVPROC glad_glDrawTransformFeedbackNV = NULL; +PFNGLVDPAUINITNVPROC glad_glVDPAUInitNV = NULL; +PFNGLVDPAUFININVPROC glad_glVDPAUFiniNV = NULL; +PFNGLVDPAUREGISTERVIDEOSURFACENVPROC glad_glVDPAURegisterVideoSurfaceNV = NULL; +PFNGLVDPAUREGISTEROUTPUTSURFACENVPROC glad_glVDPAURegisterOutputSurfaceNV = NULL; +PFNGLVDPAUISSURFACENVPROC glad_glVDPAUIsSurfaceNV = NULL; +PFNGLVDPAUUNREGISTERSURFACENVPROC glad_glVDPAUUnregisterSurfaceNV = NULL; +PFNGLVDPAUGETSURFACEIVNVPROC glad_glVDPAUGetSurfaceivNV = NULL; +PFNGLVDPAUSURFACEACCESSNVPROC glad_glVDPAUSurfaceAccessNV = NULL; +PFNGLVDPAUMAPSURFACESNVPROC glad_glVDPAUMapSurfacesNV = NULL; +PFNGLVDPAUUNMAPSURFACESNVPROC glad_glVDPAUUnmapSurfacesNV = NULL; +PFNGLVDPAUREGISTERVIDEOSURFACEWITHPICTURESTRUCTURENVPROC glad_glVDPAURegisterVideoSurfaceWithPictureStructureNV = NULL; +PFNGLFLUSHVERTEXARRAYRANGENVPROC glad_glFlushVertexArrayRangeNV = NULL; +PFNGLVERTEXARRAYRANGENVPROC glad_glVertexArrayRangeNV = NULL; +PFNGLVERTEXATTRIBL1I64NVPROC glad_glVertexAttribL1i64NV = NULL; +PFNGLVERTEXATTRIBL2I64NVPROC glad_glVertexAttribL2i64NV = NULL; +PFNGLVERTEXATTRIBL3I64NVPROC glad_glVertexAttribL3i64NV = NULL; +PFNGLVERTEXATTRIBL4I64NVPROC glad_glVertexAttribL4i64NV = NULL; +PFNGLVERTEXATTRIBL1I64VNVPROC glad_glVertexAttribL1i64vNV = NULL; +PFNGLVERTEXATTRIBL2I64VNVPROC glad_glVertexAttribL2i64vNV = NULL; +PFNGLVERTEXATTRIBL3I64VNVPROC glad_glVertexAttribL3i64vNV = NULL; +PFNGLVERTEXATTRIBL4I64VNVPROC glad_glVertexAttribL4i64vNV = NULL; +PFNGLVERTEXATTRIBL1UI64NVPROC glad_glVertexAttribL1ui64NV = NULL; +PFNGLVERTEXATTRIBL2UI64NVPROC glad_glVertexAttribL2ui64NV = NULL; +PFNGLVERTEXATTRIBL3UI64NVPROC glad_glVertexAttribL3ui64NV = NULL; +PFNGLVERTEXATTRIBL4UI64NVPROC glad_glVertexAttribL4ui64NV = NULL; +PFNGLVERTEXATTRIBL1UI64VNVPROC glad_glVertexAttribL1ui64vNV = NULL; +PFNGLVERTEXATTRIBL2UI64VNVPROC glad_glVertexAttribL2ui64vNV = NULL; +PFNGLVERTEXATTRIBL3UI64VNVPROC glad_glVertexAttribL3ui64vNV = NULL; +PFNGLVERTEXATTRIBL4UI64VNVPROC glad_glVertexAttribL4ui64vNV = NULL; +PFNGLGETVERTEXATTRIBLI64VNVPROC glad_glGetVertexAttribLi64vNV = NULL; +PFNGLGETVERTEXATTRIBLUI64VNVPROC glad_glGetVertexAttribLui64vNV = NULL; +PFNGLVERTEXATTRIBLFORMATNVPROC glad_glVertexAttribLFormatNV = NULL; +PFNGLBUFFERADDRESSRANGENVPROC glad_glBufferAddressRangeNV = NULL; +PFNGLVERTEXFORMATNVPROC glad_glVertexFormatNV = NULL; +PFNGLNORMALFORMATNVPROC glad_glNormalFormatNV = NULL; +PFNGLCOLORFORMATNVPROC glad_glColorFormatNV = NULL; +PFNGLINDEXFORMATNVPROC glad_glIndexFormatNV = NULL; +PFNGLTEXCOORDFORMATNVPROC glad_glTexCoordFormatNV = NULL; +PFNGLEDGEFLAGFORMATNVPROC glad_glEdgeFlagFormatNV = NULL; +PFNGLSECONDARYCOLORFORMATNVPROC glad_glSecondaryColorFormatNV = NULL; +PFNGLFOGCOORDFORMATNVPROC glad_glFogCoordFormatNV = NULL; +PFNGLVERTEXATTRIBFORMATNVPROC glad_glVertexAttribFormatNV = NULL; +PFNGLVERTEXATTRIBIFORMATNVPROC glad_glVertexAttribIFormatNV = NULL; +PFNGLGETINTEGERUI64I_VNVPROC glad_glGetIntegerui64i_vNV = NULL; +PFNGLAREPROGRAMSRESIDENTNVPROC glad_glAreProgramsResidentNV = NULL; +PFNGLBINDPROGRAMNVPROC glad_glBindProgramNV = NULL; +PFNGLDELETEPROGRAMSNVPROC glad_glDeleteProgramsNV = NULL; +PFNGLEXECUTEPROGRAMNVPROC glad_glExecuteProgramNV = NULL; +PFNGLGENPROGRAMSNVPROC glad_glGenProgramsNV = NULL; +PFNGLGETPROGRAMPARAMETERDVNVPROC glad_glGetProgramParameterdvNV = NULL; +PFNGLGETPROGRAMPARAMETERFVNVPROC glad_glGetProgramParameterfvNV = NULL; +PFNGLGETPROGRAMIVNVPROC glad_glGetProgramivNV = NULL; +PFNGLGETPROGRAMSTRINGNVPROC glad_glGetProgramStringNV = NULL; +PFNGLGETTRACKMATRIXIVNVPROC glad_glGetTrackMatrixivNV = NULL; +PFNGLGETVERTEXATTRIBDVNVPROC glad_glGetVertexAttribdvNV = NULL; +PFNGLGETVERTEXATTRIBFVNVPROC glad_glGetVertexAttribfvNV = NULL; +PFNGLGETVERTEXATTRIBIVNVPROC glad_glGetVertexAttribivNV = NULL; +PFNGLGETVERTEXATTRIBPOINTERVNVPROC glad_glGetVertexAttribPointervNV = NULL; +PFNGLISPROGRAMNVPROC glad_glIsProgramNV = NULL; +PFNGLLOADPROGRAMNVPROC glad_glLoadProgramNV = NULL; +PFNGLPROGRAMPARAMETER4DNVPROC glad_glProgramParameter4dNV = NULL; +PFNGLPROGRAMPARAMETER4DVNVPROC glad_glProgramParameter4dvNV = NULL; +PFNGLPROGRAMPARAMETER4FNVPROC glad_glProgramParameter4fNV = NULL; +PFNGLPROGRAMPARAMETER4FVNVPROC glad_glProgramParameter4fvNV = NULL; +PFNGLPROGRAMPARAMETERS4DVNVPROC glad_glProgramParameters4dvNV = NULL; +PFNGLPROGRAMPARAMETERS4FVNVPROC glad_glProgramParameters4fvNV = NULL; +PFNGLREQUESTRESIDENTPROGRAMSNVPROC glad_glRequestResidentProgramsNV = NULL; +PFNGLTRACKMATRIXNVPROC glad_glTrackMatrixNV = NULL; +PFNGLVERTEXATTRIBPOINTERNVPROC glad_glVertexAttribPointerNV = NULL; +PFNGLVERTEXATTRIB1DNVPROC glad_glVertexAttrib1dNV = NULL; +PFNGLVERTEXATTRIB1DVNVPROC glad_glVertexAttrib1dvNV = NULL; +PFNGLVERTEXATTRIB1FNVPROC glad_glVertexAttrib1fNV = NULL; +PFNGLVERTEXATTRIB1FVNVPROC glad_glVertexAttrib1fvNV = NULL; +PFNGLVERTEXATTRIB1SNVPROC glad_glVertexAttrib1sNV = NULL; +PFNGLVERTEXATTRIB1SVNVPROC glad_glVertexAttrib1svNV = NULL; +PFNGLVERTEXATTRIB2DNVPROC glad_glVertexAttrib2dNV = NULL; +PFNGLVERTEXATTRIB2DVNVPROC glad_glVertexAttrib2dvNV = NULL; +PFNGLVERTEXATTRIB2FNVPROC glad_glVertexAttrib2fNV = NULL; +PFNGLVERTEXATTRIB2FVNVPROC glad_glVertexAttrib2fvNV = NULL; +PFNGLVERTEXATTRIB2SNVPROC glad_glVertexAttrib2sNV = NULL; +PFNGLVERTEXATTRIB2SVNVPROC glad_glVertexAttrib2svNV = NULL; +PFNGLVERTEXATTRIB3DNVPROC glad_glVertexAttrib3dNV = NULL; +PFNGLVERTEXATTRIB3DVNVPROC glad_glVertexAttrib3dvNV = NULL; +PFNGLVERTEXATTRIB3FNVPROC glad_glVertexAttrib3fNV = NULL; +PFNGLVERTEXATTRIB3FVNVPROC glad_glVertexAttrib3fvNV = NULL; +PFNGLVERTEXATTRIB3SNVPROC glad_glVertexAttrib3sNV = NULL; +PFNGLVERTEXATTRIB3SVNVPROC glad_glVertexAttrib3svNV = NULL; +PFNGLVERTEXATTRIB4DNVPROC glad_glVertexAttrib4dNV = NULL; +PFNGLVERTEXATTRIB4DVNVPROC glad_glVertexAttrib4dvNV = NULL; +PFNGLVERTEXATTRIB4FNVPROC glad_glVertexAttrib4fNV = NULL; +PFNGLVERTEXATTRIB4FVNVPROC glad_glVertexAttrib4fvNV = NULL; +PFNGLVERTEXATTRIB4SNVPROC glad_glVertexAttrib4sNV = NULL; +PFNGLVERTEXATTRIB4SVNVPROC glad_glVertexAttrib4svNV = NULL; +PFNGLVERTEXATTRIB4UBNVPROC glad_glVertexAttrib4ubNV = NULL; +PFNGLVERTEXATTRIB4UBVNVPROC glad_glVertexAttrib4ubvNV = NULL; +PFNGLVERTEXATTRIBS1DVNVPROC glad_glVertexAttribs1dvNV = NULL; +PFNGLVERTEXATTRIBS1FVNVPROC glad_glVertexAttribs1fvNV = NULL; +PFNGLVERTEXATTRIBS1SVNVPROC glad_glVertexAttribs1svNV = NULL; +PFNGLVERTEXATTRIBS2DVNVPROC glad_glVertexAttribs2dvNV = NULL; +PFNGLVERTEXATTRIBS2FVNVPROC glad_glVertexAttribs2fvNV = NULL; +PFNGLVERTEXATTRIBS2SVNVPROC glad_glVertexAttribs2svNV = NULL; +PFNGLVERTEXATTRIBS3DVNVPROC glad_glVertexAttribs3dvNV = NULL; +PFNGLVERTEXATTRIBS3FVNVPROC glad_glVertexAttribs3fvNV = NULL; +PFNGLVERTEXATTRIBS3SVNVPROC glad_glVertexAttribs3svNV = NULL; +PFNGLVERTEXATTRIBS4DVNVPROC glad_glVertexAttribs4dvNV = NULL; +PFNGLVERTEXATTRIBS4FVNVPROC glad_glVertexAttribs4fvNV = NULL; +PFNGLVERTEXATTRIBS4SVNVPROC glad_glVertexAttribs4svNV = NULL; +PFNGLVERTEXATTRIBS4UBVNVPROC glad_glVertexAttribs4ubvNV = NULL; +PFNGLBEGINVIDEOCAPTURENVPROC glad_glBeginVideoCaptureNV = NULL; +PFNGLBINDVIDEOCAPTURESTREAMBUFFERNVPROC glad_glBindVideoCaptureStreamBufferNV = NULL; +PFNGLBINDVIDEOCAPTURESTREAMTEXTURENVPROC glad_glBindVideoCaptureStreamTextureNV = NULL; +PFNGLENDVIDEOCAPTURENVPROC glad_glEndVideoCaptureNV = NULL; +PFNGLGETVIDEOCAPTUREIVNVPROC glad_glGetVideoCaptureivNV = NULL; +PFNGLGETVIDEOCAPTURESTREAMIVNVPROC glad_glGetVideoCaptureStreamivNV = NULL; +PFNGLGETVIDEOCAPTURESTREAMFVNVPROC glad_glGetVideoCaptureStreamfvNV = NULL; +PFNGLGETVIDEOCAPTURESTREAMDVNVPROC glad_glGetVideoCaptureStreamdvNV = NULL; +PFNGLVIDEOCAPTURENVPROC glad_glVideoCaptureNV = NULL; +PFNGLVIDEOCAPTURESTREAMPARAMETERIVNVPROC glad_glVideoCaptureStreamParameterivNV = NULL; +PFNGLVIDEOCAPTURESTREAMPARAMETERFVNVPROC glad_glVideoCaptureStreamParameterfvNV = NULL; +PFNGLVIDEOCAPTURESTREAMPARAMETERDVNVPROC glad_glVideoCaptureStreamParameterdvNV = NULL; +PFNGLVIEWPORTSWIZZLENVPROC glad_glViewportSwizzleNV = NULL; +PFNGLMULTITEXCOORD1BOESPROC glad_glMultiTexCoord1bOES = NULL; +PFNGLMULTITEXCOORD1BVOESPROC glad_glMultiTexCoord1bvOES = NULL; +PFNGLMULTITEXCOORD2BOESPROC glad_glMultiTexCoord2bOES = NULL; +PFNGLMULTITEXCOORD2BVOESPROC glad_glMultiTexCoord2bvOES = NULL; +PFNGLMULTITEXCOORD3BOESPROC glad_glMultiTexCoord3bOES = NULL; +PFNGLMULTITEXCOORD3BVOESPROC glad_glMultiTexCoord3bvOES = NULL; +PFNGLMULTITEXCOORD4BOESPROC glad_glMultiTexCoord4bOES = NULL; +PFNGLMULTITEXCOORD4BVOESPROC glad_glMultiTexCoord4bvOES = NULL; +PFNGLTEXCOORD1BOESPROC glad_glTexCoord1bOES = NULL; +PFNGLTEXCOORD1BVOESPROC glad_glTexCoord1bvOES = NULL; +PFNGLTEXCOORD2BOESPROC glad_glTexCoord2bOES = NULL; +PFNGLTEXCOORD2BVOESPROC glad_glTexCoord2bvOES = NULL; +PFNGLTEXCOORD3BOESPROC glad_glTexCoord3bOES = NULL; +PFNGLTEXCOORD3BVOESPROC glad_glTexCoord3bvOES = NULL; +PFNGLTEXCOORD4BOESPROC glad_glTexCoord4bOES = NULL; +PFNGLTEXCOORD4BVOESPROC glad_glTexCoord4bvOES = NULL; +PFNGLVERTEX2BOESPROC glad_glVertex2bOES = NULL; +PFNGLVERTEX2BVOESPROC glad_glVertex2bvOES = NULL; +PFNGLVERTEX3BOESPROC glad_glVertex3bOES = NULL; +PFNGLVERTEX3BVOESPROC glad_glVertex3bvOES = NULL; +PFNGLVERTEX4BOESPROC glad_glVertex4bOES = NULL; +PFNGLVERTEX4BVOESPROC glad_glVertex4bvOES = NULL; +PFNGLALPHAFUNCXOESPROC glad_glAlphaFuncxOES = NULL; +PFNGLCLEARCOLORXOESPROC glad_glClearColorxOES = NULL; +PFNGLCLEARDEPTHXOESPROC glad_glClearDepthxOES = NULL; +PFNGLCLIPPLANEXOESPROC glad_glClipPlanexOES = NULL; +PFNGLCOLOR4XOESPROC glad_glColor4xOES = NULL; +PFNGLDEPTHRANGEXOESPROC glad_glDepthRangexOES = NULL; +PFNGLFOGXOESPROC glad_glFogxOES = NULL; +PFNGLFOGXVOESPROC glad_glFogxvOES = NULL; +PFNGLFRUSTUMXOESPROC glad_glFrustumxOES = NULL; +PFNGLGETCLIPPLANEXOESPROC glad_glGetClipPlanexOES = NULL; +PFNGLGETFIXEDVOESPROC glad_glGetFixedvOES = NULL; +PFNGLGETTEXENVXVOESPROC glad_glGetTexEnvxvOES = NULL; +PFNGLGETTEXPARAMETERXVOESPROC glad_glGetTexParameterxvOES = NULL; +PFNGLLIGHTMODELXOESPROC glad_glLightModelxOES = NULL; +PFNGLLIGHTMODELXVOESPROC glad_glLightModelxvOES = NULL; +PFNGLLIGHTXOESPROC glad_glLightxOES = NULL; +PFNGLLIGHTXVOESPROC glad_glLightxvOES = NULL; +PFNGLLINEWIDTHXOESPROC glad_glLineWidthxOES = NULL; +PFNGLLOADMATRIXXOESPROC glad_glLoadMatrixxOES = NULL; +PFNGLMATERIALXOESPROC glad_glMaterialxOES = NULL; +PFNGLMATERIALXVOESPROC glad_glMaterialxvOES = NULL; +PFNGLMULTMATRIXXOESPROC glad_glMultMatrixxOES = NULL; +PFNGLMULTITEXCOORD4XOESPROC glad_glMultiTexCoord4xOES = NULL; +PFNGLNORMAL3XOESPROC glad_glNormal3xOES = NULL; +PFNGLORTHOXOESPROC glad_glOrthoxOES = NULL; +PFNGLPOINTPARAMETERXVOESPROC glad_glPointParameterxvOES = NULL; +PFNGLPOINTSIZEXOESPROC glad_glPointSizexOES = NULL; +PFNGLPOLYGONOFFSETXOESPROC glad_glPolygonOffsetxOES = NULL; +PFNGLROTATEXOESPROC glad_glRotatexOES = NULL; +PFNGLSCALEXOESPROC glad_glScalexOES = NULL; +PFNGLTEXENVXOESPROC glad_glTexEnvxOES = NULL; +PFNGLTEXENVXVOESPROC glad_glTexEnvxvOES = NULL; +PFNGLTEXPARAMETERXOESPROC glad_glTexParameterxOES = NULL; +PFNGLTEXPARAMETERXVOESPROC glad_glTexParameterxvOES = NULL; +PFNGLTRANSLATEXOESPROC glad_glTranslatexOES = NULL; +PFNGLGETLIGHTXVOESPROC glad_glGetLightxvOES = NULL; +PFNGLGETMATERIALXVOESPROC glad_glGetMaterialxvOES = NULL; +PFNGLPOINTPARAMETERXOESPROC glad_glPointParameterxOES = NULL; +PFNGLSAMPLECOVERAGEXOESPROC glad_glSampleCoveragexOES = NULL; +PFNGLACCUMXOESPROC glad_glAccumxOES = NULL; +PFNGLBITMAPXOESPROC glad_glBitmapxOES = NULL; +PFNGLBLENDCOLORXOESPROC glad_glBlendColorxOES = NULL; +PFNGLCLEARACCUMXOESPROC glad_glClearAccumxOES = NULL; +PFNGLCOLOR3XOESPROC glad_glColor3xOES = NULL; +PFNGLCOLOR3XVOESPROC glad_glColor3xvOES = NULL; +PFNGLCOLOR4XVOESPROC glad_glColor4xvOES = NULL; +PFNGLCONVOLUTIONPARAMETERXOESPROC glad_glConvolutionParameterxOES = NULL; +PFNGLCONVOLUTIONPARAMETERXVOESPROC glad_glConvolutionParameterxvOES = NULL; +PFNGLEVALCOORD1XOESPROC glad_glEvalCoord1xOES = NULL; +PFNGLEVALCOORD1XVOESPROC glad_glEvalCoord1xvOES = NULL; +PFNGLEVALCOORD2XOESPROC glad_glEvalCoord2xOES = NULL; +PFNGLEVALCOORD2XVOESPROC glad_glEvalCoord2xvOES = NULL; +PFNGLFEEDBACKBUFFERXOESPROC glad_glFeedbackBufferxOES = NULL; +PFNGLGETCONVOLUTIONPARAMETERXVOESPROC glad_glGetConvolutionParameterxvOES = NULL; +PFNGLGETHISTOGRAMPARAMETERXVOESPROC glad_glGetHistogramParameterxvOES = NULL; +PFNGLGETLIGHTXOESPROC glad_glGetLightxOES = NULL; +PFNGLGETMAPXVOESPROC glad_glGetMapxvOES = NULL; +PFNGLGETMATERIALXOESPROC glad_glGetMaterialxOES = NULL; +PFNGLGETPIXELMAPXVPROC glad_glGetPixelMapxv = NULL; +PFNGLGETTEXGENXVOESPROC glad_glGetTexGenxvOES = NULL; +PFNGLGETTEXLEVELPARAMETERXVOESPROC glad_glGetTexLevelParameterxvOES = NULL; +PFNGLINDEXXOESPROC glad_glIndexxOES = NULL; +PFNGLINDEXXVOESPROC glad_glIndexxvOES = NULL; +PFNGLLOADTRANSPOSEMATRIXXOESPROC glad_glLoadTransposeMatrixxOES = NULL; +PFNGLMAP1XOESPROC glad_glMap1xOES = NULL; +PFNGLMAP2XOESPROC glad_glMap2xOES = NULL; +PFNGLMAPGRID1XOESPROC glad_glMapGrid1xOES = NULL; +PFNGLMAPGRID2XOESPROC glad_glMapGrid2xOES = NULL; +PFNGLMULTTRANSPOSEMATRIXXOESPROC glad_glMultTransposeMatrixxOES = NULL; +PFNGLMULTITEXCOORD1XOESPROC glad_glMultiTexCoord1xOES = NULL; +PFNGLMULTITEXCOORD1XVOESPROC glad_glMultiTexCoord1xvOES = NULL; +PFNGLMULTITEXCOORD2XOESPROC glad_glMultiTexCoord2xOES = NULL; +PFNGLMULTITEXCOORD2XVOESPROC glad_glMultiTexCoord2xvOES = NULL; +PFNGLMULTITEXCOORD3XOESPROC glad_glMultiTexCoord3xOES = NULL; +PFNGLMULTITEXCOORD3XVOESPROC glad_glMultiTexCoord3xvOES = NULL; +PFNGLMULTITEXCOORD4XVOESPROC glad_glMultiTexCoord4xvOES = NULL; +PFNGLNORMAL3XVOESPROC glad_glNormal3xvOES = NULL; +PFNGLPASSTHROUGHXOESPROC glad_glPassThroughxOES = NULL; +PFNGLPIXELMAPXPROC glad_glPixelMapx = NULL; +PFNGLPIXELSTOREXPROC glad_glPixelStorex = NULL; +PFNGLPIXELTRANSFERXOESPROC glad_glPixelTransferxOES = NULL; +PFNGLPIXELZOOMXOESPROC glad_glPixelZoomxOES = NULL; +PFNGLPRIORITIZETEXTURESXOESPROC glad_glPrioritizeTexturesxOES = NULL; +PFNGLRASTERPOS2XOESPROC glad_glRasterPos2xOES = NULL; +PFNGLRASTERPOS2XVOESPROC glad_glRasterPos2xvOES = NULL; +PFNGLRASTERPOS3XOESPROC glad_glRasterPos3xOES = NULL; +PFNGLRASTERPOS3XVOESPROC glad_glRasterPos3xvOES = NULL; +PFNGLRASTERPOS4XOESPROC glad_glRasterPos4xOES = NULL; +PFNGLRASTERPOS4XVOESPROC glad_glRasterPos4xvOES = NULL; +PFNGLRECTXOESPROC glad_glRectxOES = NULL; +PFNGLRECTXVOESPROC glad_glRectxvOES = NULL; +PFNGLTEXCOORD1XOESPROC glad_glTexCoord1xOES = NULL; +PFNGLTEXCOORD1XVOESPROC glad_glTexCoord1xvOES = NULL; +PFNGLTEXCOORD2XOESPROC glad_glTexCoord2xOES = NULL; +PFNGLTEXCOORD2XVOESPROC glad_glTexCoord2xvOES = NULL; +PFNGLTEXCOORD3XOESPROC glad_glTexCoord3xOES = NULL; +PFNGLTEXCOORD3XVOESPROC glad_glTexCoord3xvOES = NULL; +PFNGLTEXCOORD4XOESPROC glad_glTexCoord4xOES = NULL; +PFNGLTEXCOORD4XVOESPROC glad_glTexCoord4xvOES = NULL; +PFNGLTEXGENXOESPROC glad_glTexGenxOES = NULL; +PFNGLTEXGENXVOESPROC glad_glTexGenxvOES = NULL; +PFNGLVERTEX2XOESPROC glad_glVertex2xOES = NULL; +PFNGLVERTEX2XVOESPROC glad_glVertex2xvOES = NULL; +PFNGLVERTEX3XOESPROC glad_glVertex3xOES = NULL; +PFNGLVERTEX3XVOESPROC glad_glVertex3xvOES = NULL; +PFNGLVERTEX4XOESPROC glad_glVertex4xOES = NULL; +PFNGLVERTEX4XVOESPROC glad_glVertex4xvOES = NULL; +PFNGLQUERYMATRIXXOESPROC glad_glQueryMatrixxOES = NULL; +PFNGLCLEARDEPTHFOESPROC glad_glClearDepthfOES = NULL; +PFNGLCLIPPLANEFOESPROC glad_glClipPlanefOES = NULL; +PFNGLDEPTHRANGEFOESPROC glad_glDepthRangefOES = NULL; +PFNGLFRUSTUMFOESPROC glad_glFrustumfOES = NULL; +PFNGLGETCLIPPLANEFOESPROC glad_glGetClipPlanefOES = NULL; +PFNGLORTHOFOESPROC glad_glOrthofOES = NULL; +PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glad_glFramebufferTextureMultiviewOVR = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glad_glNamedFramebufferTextureMultiviewOVR = NULL; +PFNGLHINTPGIPROC glad_glHintPGI = NULL; +PFNGLDETAILTEXFUNCSGISPROC glad_glDetailTexFuncSGIS = NULL; +PFNGLGETDETAILTEXFUNCSGISPROC glad_glGetDetailTexFuncSGIS = NULL; +PFNGLFOGFUNCSGISPROC glad_glFogFuncSGIS = NULL; +PFNGLGETFOGFUNCSGISPROC glad_glGetFogFuncSGIS = NULL; +PFNGLSAMPLEMASKSGISPROC glad_glSampleMaskSGIS = NULL; +PFNGLSAMPLEPATTERNSGISPROC glad_glSamplePatternSGIS = NULL; +PFNGLPIXELTEXGENPARAMETERISGISPROC glad_glPixelTexGenParameteriSGIS = NULL; +PFNGLPIXELTEXGENPARAMETERIVSGISPROC glad_glPixelTexGenParameterivSGIS = NULL; +PFNGLPIXELTEXGENPARAMETERFSGISPROC glad_glPixelTexGenParameterfSGIS = NULL; +PFNGLPIXELTEXGENPARAMETERFVSGISPROC glad_glPixelTexGenParameterfvSGIS = NULL; +PFNGLGETPIXELTEXGENPARAMETERIVSGISPROC glad_glGetPixelTexGenParameterivSGIS = NULL; +PFNGLGETPIXELTEXGENPARAMETERFVSGISPROC glad_glGetPixelTexGenParameterfvSGIS = NULL; +PFNGLPOINTPARAMETERFSGISPROC glad_glPointParameterfSGIS = NULL; +PFNGLPOINTPARAMETERFVSGISPROC glad_glPointParameterfvSGIS = NULL; +PFNGLSHARPENTEXFUNCSGISPROC glad_glSharpenTexFuncSGIS = NULL; +PFNGLGETSHARPENTEXFUNCSGISPROC glad_glGetSharpenTexFuncSGIS = NULL; +PFNGLTEXIMAGE4DSGISPROC glad_glTexImage4DSGIS = NULL; +PFNGLTEXSUBIMAGE4DSGISPROC glad_glTexSubImage4DSGIS = NULL; +PFNGLTEXTURECOLORMASKSGISPROC glad_glTextureColorMaskSGIS = NULL; +PFNGLGETTEXFILTERFUNCSGISPROC glad_glGetTexFilterFuncSGIS = NULL; +PFNGLTEXFILTERFUNCSGISPROC glad_glTexFilterFuncSGIS = NULL; +PFNGLASYNCMARKERSGIXPROC glad_glAsyncMarkerSGIX = NULL; +PFNGLFINISHASYNCSGIXPROC glad_glFinishAsyncSGIX = NULL; +PFNGLPOLLASYNCSGIXPROC glad_glPollAsyncSGIX = NULL; +PFNGLGENASYNCMARKERSSGIXPROC glad_glGenAsyncMarkersSGIX = NULL; +PFNGLDELETEASYNCMARKERSSGIXPROC glad_glDeleteAsyncMarkersSGIX = NULL; +PFNGLISASYNCMARKERSGIXPROC glad_glIsAsyncMarkerSGIX = NULL; +PFNGLFLUSHRASTERSGIXPROC glad_glFlushRasterSGIX = NULL; +PFNGLFRAGMENTCOLORMATERIALSGIXPROC glad_glFragmentColorMaterialSGIX = NULL; +PFNGLFRAGMENTLIGHTFSGIXPROC glad_glFragmentLightfSGIX = NULL; +PFNGLFRAGMENTLIGHTFVSGIXPROC glad_glFragmentLightfvSGIX = NULL; +PFNGLFRAGMENTLIGHTISGIXPROC glad_glFragmentLightiSGIX = NULL; +PFNGLFRAGMENTLIGHTIVSGIXPROC glad_glFragmentLightivSGIX = NULL; +PFNGLFRAGMENTLIGHTMODELFSGIXPROC glad_glFragmentLightModelfSGIX = NULL; +PFNGLFRAGMENTLIGHTMODELFVSGIXPROC glad_glFragmentLightModelfvSGIX = NULL; +PFNGLFRAGMENTLIGHTMODELISGIXPROC glad_glFragmentLightModeliSGIX = NULL; +PFNGLFRAGMENTLIGHTMODELIVSGIXPROC glad_glFragmentLightModelivSGIX = NULL; +PFNGLFRAGMENTMATERIALFSGIXPROC glad_glFragmentMaterialfSGIX = NULL; +PFNGLFRAGMENTMATERIALFVSGIXPROC glad_glFragmentMaterialfvSGIX = NULL; +PFNGLFRAGMENTMATERIALISGIXPROC glad_glFragmentMaterialiSGIX = NULL; +PFNGLFRAGMENTMATERIALIVSGIXPROC glad_glFragmentMaterialivSGIX = NULL; +PFNGLGETFRAGMENTLIGHTFVSGIXPROC glad_glGetFragmentLightfvSGIX = NULL; +PFNGLGETFRAGMENTLIGHTIVSGIXPROC glad_glGetFragmentLightivSGIX = NULL; +PFNGLGETFRAGMENTMATERIALFVSGIXPROC glad_glGetFragmentMaterialfvSGIX = NULL; +PFNGLGETFRAGMENTMATERIALIVSGIXPROC glad_glGetFragmentMaterialivSGIX = NULL; +PFNGLLIGHTENVISGIXPROC glad_glLightEnviSGIX = NULL; +PFNGLFRAMEZOOMSGIXPROC glad_glFrameZoomSGIX = NULL; +PFNGLIGLOOINTERFACESGIXPROC glad_glIglooInterfaceSGIX = NULL; +PFNGLGETINSTRUMENTSSGIXPROC glad_glGetInstrumentsSGIX = NULL; +PFNGLINSTRUMENTSBUFFERSGIXPROC glad_glInstrumentsBufferSGIX = NULL; +PFNGLPOLLINSTRUMENTSSGIXPROC glad_glPollInstrumentsSGIX = NULL; +PFNGLREADINSTRUMENTSSGIXPROC glad_glReadInstrumentsSGIX = NULL; +PFNGLSTARTINSTRUMENTSSGIXPROC glad_glStartInstrumentsSGIX = NULL; +PFNGLSTOPINSTRUMENTSSGIXPROC glad_glStopInstrumentsSGIX = NULL; +PFNGLGETLISTPARAMETERFVSGIXPROC glad_glGetListParameterfvSGIX = NULL; +PFNGLGETLISTPARAMETERIVSGIXPROC glad_glGetListParameterivSGIX = NULL; +PFNGLLISTPARAMETERFSGIXPROC glad_glListParameterfSGIX = NULL; +PFNGLLISTPARAMETERFVSGIXPROC glad_glListParameterfvSGIX = NULL; +PFNGLLISTPARAMETERISGIXPROC glad_glListParameteriSGIX = NULL; +PFNGLLISTPARAMETERIVSGIXPROC glad_glListParameterivSGIX = NULL; +PFNGLPIXELTEXGENSGIXPROC glad_glPixelTexGenSGIX = NULL; +PFNGLDEFORMATIONMAP3DSGIXPROC glad_glDeformationMap3dSGIX = NULL; +PFNGLDEFORMATIONMAP3FSGIXPROC glad_glDeformationMap3fSGIX = NULL; +PFNGLDEFORMSGIXPROC glad_glDeformSGIX = NULL; +PFNGLLOADIDENTITYDEFORMATIONMAPSGIXPROC glad_glLoadIdentityDeformationMapSGIX = NULL; +PFNGLREFERENCEPLANESGIXPROC glad_glReferencePlaneSGIX = NULL; +PFNGLSPRITEPARAMETERFSGIXPROC glad_glSpriteParameterfSGIX = NULL; +PFNGLSPRITEPARAMETERFVSGIXPROC glad_glSpriteParameterfvSGIX = NULL; +PFNGLSPRITEPARAMETERISGIXPROC glad_glSpriteParameteriSGIX = NULL; +PFNGLSPRITEPARAMETERIVSGIXPROC glad_glSpriteParameterivSGIX = NULL; +PFNGLTAGSAMPLEBUFFERSGIXPROC glad_glTagSampleBufferSGIX = NULL; +PFNGLCOLORTABLESGIPROC glad_glColorTableSGI = NULL; +PFNGLCOLORTABLEPARAMETERFVSGIPROC glad_glColorTableParameterfvSGI = NULL; +PFNGLCOLORTABLEPARAMETERIVSGIPROC glad_glColorTableParameterivSGI = NULL; +PFNGLCOPYCOLORTABLESGIPROC glad_glCopyColorTableSGI = NULL; +PFNGLGETCOLORTABLESGIPROC glad_glGetColorTableSGI = NULL; +PFNGLGETCOLORTABLEPARAMETERFVSGIPROC glad_glGetColorTableParameterfvSGI = NULL; +PFNGLGETCOLORTABLEPARAMETERIVSGIPROC glad_glGetColorTableParameterivSGI = NULL; +PFNGLFINISHTEXTURESUNXPROC glad_glFinishTextureSUNX = NULL; +PFNGLGLOBALALPHAFACTORBSUNPROC glad_glGlobalAlphaFactorbSUN = NULL; +PFNGLGLOBALALPHAFACTORSSUNPROC glad_glGlobalAlphaFactorsSUN = NULL; +PFNGLGLOBALALPHAFACTORISUNPROC glad_glGlobalAlphaFactoriSUN = NULL; +PFNGLGLOBALALPHAFACTORFSUNPROC glad_glGlobalAlphaFactorfSUN = NULL; +PFNGLGLOBALALPHAFACTORDSUNPROC glad_glGlobalAlphaFactordSUN = NULL; +PFNGLGLOBALALPHAFACTORUBSUNPROC glad_glGlobalAlphaFactorubSUN = NULL; +PFNGLGLOBALALPHAFACTORUSSUNPROC glad_glGlobalAlphaFactorusSUN = NULL; +PFNGLGLOBALALPHAFACTORUISUNPROC glad_glGlobalAlphaFactoruiSUN = NULL; +PFNGLDRAWMESHARRAYSSUNPROC glad_glDrawMeshArraysSUN = NULL; +PFNGLREPLACEMENTCODEUISUNPROC glad_glReplacementCodeuiSUN = NULL; +PFNGLREPLACEMENTCODEUSSUNPROC glad_glReplacementCodeusSUN = NULL; +PFNGLREPLACEMENTCODEUBSUNPROC glad_glReplacementCodeubSUN = NULL; +PFNGLREPLACEMENTCODEUIVSUNPROC glad_glReplacementCodeuivSUN = NULL; +PFNGLREPLACEMENTCODEUSVSUNPROC glad_glReplacementCodeusvSUN = NULL; +PFNGLREPLACEMENTCODEUBVSUNPROC glad_glReplacementCodeubvSUN = NULL; +PFNGLREPLACEMENTCODEPOINTERSUNPROC glad_glReplacementCodePointerSUN = NULL; +PFNGLCOLOR4UBVERTEX2FSUNPROC glad_glColor4ubVertex2fSUN = NULL; +PFNGLCOLOR4UBVERTEX2FVSUNPROC glad_glColor4ubVertex2fvSUN = NULL; +PFNGLCOLOR4UBVERTEX3FSUNPROC glad_glColor4ubVertex3fSUN = NULL; +PFNGLCOLOR4UBVERTEX3FVSUNPROC glad_glColor4ubVertex3fvSUN = NULL; +PFNGLCOLOR3FVERTEX3FSUNPROC glad_glColor3fVertex3fSUN = NULL; +PFNGLCOLOR3FVERTEX3FVSUNPROC glad_glColor3fVertex3fvSUN = NULL; +PFNGLNORMAL3FVERTEX3FSUNPROC glad_glNormal3fVertex3fSUN = NULL; +PFNGLNORMAL3FVERTEX3FVSUNPROC glad_glNormal3fVertex3fvSUN = NULL; +PFNGLCOLOR4FNORMAL3FVERTEX3FSUNPROC glad_glColor4fNormal3fVertex3fSUN = NULL; +PFNGLCOLOR4FNORMAL3FVERTEX3FVSUNPROC glad_glColor4fNormal3fVertex3fvSUN = NULL; +PFNGLTEXCOORD2FVERTEX3FSUNPROC glad_glTexCoord2fVertex3fSUN = NULL; +PFNGLTEXCOORD2FVERTEX3FVSUNPROC glad_glTexCoord2fVertex3fvSUN = NULL; +PFNGLTEXCOORD4FVERTEX4FSUNPROC glad_glTexCoord4fVertex4fSUN = NULL; +PFNGLTEXCOORD4FVERTEX4FVSUNPROC glad_glTexCoord4fVertex4fvSUN = NULL; +PFNGLTEXCOORD2FCOLOR4UBVERTEX3FSUNPROC glad_glTexCoord2fColor4ubVertex3fSUN = NULL; +PFNGLTEXCOORD2FCOLOR4UBVERTEX3FVSUNPROC glad_glTexCoord2fColor4ubVertex3fvSUN = NULL; +PFNGLTEXCOORD2FCOLOR3FVERTEX3FSUNPROC glad_glTexCoord2fColor3fVertex3fSUN = NULL; +PFNGLTEXCOORD2FCOLOR3FVERTEX3FVSUNPROC glad_glTexCoord2fColor3fVertex3fvSUN = NULL; +PFNGLTEXCOORD2FNORMAL3FVERTEX3FSUNPROC glad_glTexCoord2fNormal3fVertex3fSUN = NULL; +PFNGLTEXCOORD2FNORMAL3FVERTEX3FVSUNPROC glad_glTexCoord2fNormal3fVertex3fvSUN = NULL; +PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC glad_glTexCoord2fColor4fNormal3fVertex3fSUN = NULL; +PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC glad_glTexCoord2fColor4fNormal3fVertex3fvSUN = NULL; +PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FSUNPROC glad_glTexCoord4fColor4fNormal3fVertex4fSUN = NULL; +PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FVSUNPROC glad_glTexCoord4fColor4fNormal3fVertex4fvSUN = NULL; +PFNGLREPLACEMENTCODEUIVERTEX3FSUNPROC glad_glReplacementCodeuiVertex3fSUN = NULL; +PFNGLREPLACEMENTCODEUIVERTEX3FVSUNPROC glad_glReplacementCodeuiVertex3fvSUN = NULL; +PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FSUNPROC glad_glReplacementCodeuiColor4ubVertex3fSUN = NULL; +PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FVSUNPROC glad_glReplacementCodeuiColor4ubVertex3fvSUN = NULL; +PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FSUNPROC glad_glReplacementCodeuiColor3fVertex3fSUN = NULL; +PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FVSUNPROC glad_glReplacementCodeuiColor3fVertex3fvSUN = NULL; +PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FSUNPROC glad_glReplacementCodeuiNormal3fVertex3fSUN = NULL; +PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FVSUNPROC glad_glReplacementCodeuiNormal3fVertex3fvSUN = NULL; +PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FSUNPROC glad_glReplacementCodeuiColor4fNormal3fVertex3fSUN = NULL; +PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FVSUNPROC glad_glReplacementCodeuiColor4fNormal3fVertex3fvSUN = NULL; +PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FSUNPROC glad_glReplacementCodeuiTexCoord2fVertex3fSUN = NULL; +PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FVSUNPROC glad_glReplacementCodeuiTexCoord2fVertex3fvSUN = NULL; +PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FSUNPROC glad_glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN = NULL; +PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FVSUNPROC glad_glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN = NULL; +PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC glad_glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN = NULL; +PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC glad_glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN = NULL; +PFNGLCOPYTEXTURELEVELSAPPLEPROC glad_glCopyTextureLevelsAPPLE = NULL; +PFNGLRENDERBUFFERSTORAGEMULTISAMPLEAPPLEPROC glad_glRenderbufferStorageMultisampleAPPLE = NULL; +PFNGLRESOLVEMULTISAMPLEFRAMEBUFFERAPPLEPROC glad_glResolveMultisampleFramebufferAPPLE = NULL; +PFNGLFENCESYNCAPPLEPROC glad_glFenceSyncAPPLE = NULL; +PFNGLISSYNCAPPLEPROC glad_glIsSyncAPPLE = NULL; +PFNGLDELETESYNCAPPLEPROC glad_glDeleteSyncAPPLE = NULL; +PFNGLCLIENTWAITSYNCAPPLEPROC glad_glClientWaitSyncAPPLE = NULL; +PFNGLWAITSYNCAPPLEPROC glad_glWaitSyncAPPLE = NULL; +PFNGLGETINTEGER64VAPPLEPROC glad_glGetInteger64vAPPLE = NULL; +PFNGLGETSYNCIVAPPLEPROC glad_glGetSyncivAPPLE = NULL; +PFNGLDISCARDFRAMEBUFFEREXTPROC glad_glDiscardFramebufferEXT = NULL; +PFNGLMAPBUFFERRANGEEXTPROC glad_glMapBufferRangeEXT = NULL; +PFNGLFLUSHMAPPEDBUFFERRANGEEXTPROC glad_glFlushMappedBufferRangeEXT = NULL; +PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC glad_glFramebufferTexture2DMultisampleEXT = NULL; +PFNGLGETGRAPHICSRESETSTATUSEXTPROC glad_glGetGraphicsResetStatusEXT = NULL; +PFNGLREADNPIXELSEXTPROC glad_glReadnPixelsEXT = NULL; +PFNGLGETNUNIFORMFVEXTPROC glad_glGetnUniformfvEXT = NULL; +PFNGLGETNUNIFORMIVEXTPROC glad_glGetnUniformivEXT = NULL; +PFNGLRENDERBUFFERSTORAGEMULTISAMPLEIMGPROC glad_glRenderbufferStorageMultisampleIMG = NULL; +PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEIMGPROC glad_glFramebufferTexture2DMultisampleIMG = NULL; +PFNGLCLIPPLANEFIMGPROC glad_glClipPlanefIMG = NULL; +PFNGLCLIPPLANEXIMGPROC glad_glClipPlanexIMG = NULL; +PFNGLEGLIMAGETARGETTEXTURE2DOESPROC glad_glEGLImageTargetTexture2DOES = NULL; +PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC glad_glEGLImageTargetRenderbufferStorageOES = NULL; +PFNGLBLENDEQUATIONSEPARATEOESPROC glad_glBlendEquationSeparateOES = NULL; +PFNGLBLENDFUNCSEPARATEOESPROC glad_glBlendFuncSeparateOES = NULL; +PFNGLBLENDEQUATIONOESPROC glad_glBlendEquationOES = NULL; +PFNGLDRAWTEXSOESPROC glad_glDrawTexsOES = NULL; +PFNGLDRAWTEXIOESPROC glad_glDrawTexiOES = NULL; +PFNGLDRAWTEXXOESPROC glad_glDrawTexxOES = NULL; +PFNGLDRAWTEXSVOESPROC glad_glDrawTexsvOES = NULL; +PFNGLDRAWTEXIVOESPROC glad_glDrawTexivOES = NULL; +PFNGLDRAWTEXXVOESPROC glad_glDrawTexxvOES = NULL; +PFNGLDRAWTEXFOESPROC glad_glDrawTexfOES = NULL; +PFNGLDRAWTEXFVOESPROC glad_glDrawTexfvOES = NULL; +PFNGLISRENDERBUFFEROESPROC glad_glIsRenderbufferOES = NULL; +PFNGLBINDRENDERBUFFEROESPROC glad_glBindRenderbufferOES = NULL; +PFNGLDELETERENDERBUFFERSOESPROC glad_glDeleteRenderbuffersOES = NULL; +PFNGLGENRENDERBUFFERSOESPROC glad_glGenRenderbuffersOES = NULL; +PFNGLRENDERBUFFERSTORAGEOESPROC glad_glRenderbufferStorageOES = NULL; +PFNGLGETRENDERBUFFERPARAMETERIVOESPROC glad_glGetRenderbufferParameterivOES = NULL; +PFNGLISFRAMEBUFFEROESPROC glad_glIsFramebufferOES = NULL; +PFNGLBINDFRAMEBUFFEROESPROC glad_glBindFramebufferOES = NULL; +PFNGLDELETEFRAMEBUFFERSOESPROC glad_glDeleteFramebuffersOES = NULL; +PFNGLGENFRAMEBUFFERSOESPROC glad_glGenFramebuffersOES = NULL; +PFNGLCHECKFRAMEBUFFERSTATUSOESPROC glad_glCheckFramebufferStatusOES = NULL; +PFNGLFRAMEBUFFERRENDERBUFFEROESPROC glad_glFramebufferRenderbufferOES = NULL; +PFNGLFRAMEBUFFERTEXTURE2DOESPROC glad_glFramebufferTexture2DOES = NULL; +PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVOESPROC glad_glGetFramebufferAttachmentParameterivOES = NULL; +PFNGLGENERATEMIPMAPOESPROC glad_glGenerateMipmapOES = NULL; +PFNGLMAPBUFFEROESPROC glad_glMapBufferOES = NULL; +PFNGLUNMAPBUFFEROESPROC glad_glUnmapBufferOES = NULL; +PFNGLGETBUFFERPOINTERVOESPROC glad_glGetBufferPointervOES = NULL; +PFNGLCURRENTPALETTEMATRIXOESPROC glad_glCurrentPaletteMatrixOES = NULL; +PFNGLLOADPALETTEFROMMODELVIEWMATRIXOESPROC glad_glLoadPaletteFromModelViewMatrixOES = NULL; +PFNGLMATRIXINDEXPOINTEROESPROC glad_glMatrixIndexPointerOES = NULL; +PFNGLWEIGHTPOINTEROESPROC glad_glWeightPointerOES = NULL; +PFNGLPOINTSIZEPOINTEROESPROC glad_glPointSizePointerOES = NULL; +PFNGLTEXGENFOESPROC glad_glTexGenfOES = NULL; +PFNGLTEXGENFVOESPROC glad_glTexGenfvOES = NULL; +PFNGLTEXGENIOESPROC glad_glTexGeniOES = NULL; +PFNGLTEXGENIVOESPROC glad_glTexGenivOES = NULL; +PFNGLGETTEXGENFVOESPROC glad_glGetTexGenfvOES = NULL; +PFNGLGETTEXGENIVOESPROC glad_glGetTexGenivOES = NULL; +PFNGLBINDVERTEXARRAYOESPROC glad_glBindVertexArrayOES = NULL; +PFNGLDELETEVERTEXARRAYSOESPROC glad_glDeleteVertexArraysOES = NULL; +PFNGLGENVERTEXARRAYSOESPROC glad_glGenVertexArraysOES = NULL; +PFNGLISVERTEXARRAYOESPROC glad_glIsVertexArrayOES = NULL; +PFNGLGETDRIVERCONTROLSQCOMPROC glad_glGetDriverControlsQCOM = NULL; +PFNGLGETDRIVERCONTROLSTRINGQCOMPROC glad_glGetDriverControlStringQCOM = NULL; +PFNGLENABLEDRIVERCONTROLQCOMPROC glad_glEnableDriverControlQCOM = NULL; +PFNGLDISABLEDRIVERCONTROLQCOMPROC glad_glDisableDriverControlQCOM = NULL; +PFNGLEXTGETTEXTURESQCOMPROC glad_glExtGetTexturesQCOM = NULL; +PFNGLEXTGETBUFFERSQCOMPROC glad_glExtGetBuffersQCOM = NULL; +PFNGLEXTGETRENDERBUFFERSQCOMPROC glad_glExtGetRenderbuffersQCOM = NULL; +PFNGLEXTGETFRAMEBUFFERSQCOMPROC glad_glExtGetFramebuffersQCOM = NULL; +PFNGLEXTGETTEXLEVELPARAMETERIVQCOMPROC glad_glExtGetTexLevelParameterivQCOM = NULL; +PFNGLEXTTEXOBJECTSTATEOVERRIDEIQCOMPROC glad_glExtTexObjectStateOverrideiQCOM = NULL; +PFNGLEXTGETTEXSUBIMAGEQCOMPROC glad_glExtGetTexSubImageQCOM = NULL; +PFNGLEXTGETBUFFERPOINTERVQCOMPROC glad_glExtGetBufferPointervQCOM = NULL; +PFNGLEXTGETSHADERSQCOMPROC glad_glExtGetShadersQCOM = NULL; +PFNGLEXTGETPROGRAMSQCOMPROC glad_glExtGetProgramsQCOM = NULL; +PFNGLEXTISPROGRAMBINARYQCOMPROC glad_glExtIsProgramBinaryQCOM = NULL; +PFNGLEXTGETPROGRAMBINARYSOURCEQCOMPROC glad_glExtGetProgramBinarySourceQCOM = NULL; +PFNGLSTARTTILINGQCOMPROC glad_glStartTilingQCOM = NULL; +PFNGLENDTILINGQCOMPROC glad_glEndTilingQCOM = NULL; +PFNGLBLITFRAMEBUFFERANGLEPROC glad_glBlitFramebufferANGLE = NULL; +PFNGLRENDERBUFFERSTORAGEMULTISAMPLEANGLEPROC glad_glRenderbufferStorageMultisampleANGLE = NULL; +PFNGLDRAWARRAYSINSTANCEDANGLEPROC glad_glDrawArraysInstancedANGLE = NULL; +PFNGLDRAWELEMENTSINSTANCEDANGLEPROC glad_glDrawElementsInstancedANGLE = NULL; +PFNGLVERTEXATTRIBDIVISORANGLEPROC glad_glVertexAttribDivisorANGLE = NULL; +PFNGLGETTRANSLATEDSHADERSOURCEANGLEPROC glad_glGetTranslatedShaderSourceANGLE = NULL; +PFNGLMAXACTIVESHADERCORESARMPROC glad_glMaxActiveShaderCoresARM = NULL; +PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEEXTPROC glad_glDrawArraysInstancedBaseInstanceEXT = NULL; +PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEEXTPROC glad_glDrawElementsInstancedBaseInstanceEXT = NULL; +PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEEXTPROC glad_glDrawElementsInstancedBaseVertexBaseInstanceEXT = NULL; +PFNGLBINDFRAGDATALOCATIONINDEXEDEXTPROC glad_glBindFragDataLocationIndexedEXT = NULL; +PFNGLGETPROGRAMRESOURCELOCATIONINDEXEXTPROC glad_glGetProgramResourceLocationIndexEXT = NULL; +PFNGLGETFRAGDATAINDEXEXTPROC glad_glGetFragDataIndexEXT = NULL; +PFNGLBUFFERSTORAGEEXTPROC glad_glBufferStorageEXT = NULL; +PFNGLCLEARTEXIMAGEEXTPROC glad_glClearTexImageEXT = NULL; +PFNGLCLEARTEXSUBIMAGEEXTPROC glad_glClearTexSubImageEXT = NULL; +PFNGLCLIPCONTROLEXTPROC glad_glClipControlEXT = NULL; +PFNGLCOPYIMAGESUBDATAEXTPROC glad_glCopyImageSubDataEXT = NULL; +PFNGLGENQUERIESEXTPROC glad_glGenQueriesEXT = NULL; +PFNGLDELETEQUERIESEXTPROC glad_glDeleteQueriesEXT = NULL; +PFNGLISQUERYEXTPROC glad_glIsQueryEXT = NULL; +PFNGLBEGINQUERYEXTPROC glad_glBeginQueryEXT = NULL; +PFNGLENDQUERYEXTPROC glad_glEndQueryEXT = NULL; +PFNGLQUERYCOUNTEREXTPROC glad_glQueryCounterEXT = NULL; +PFNGLGETQUERYIVEXTPROC glad_glGetQueryivEXT = NULL; +PFNGLGETQUERYOBJECTIVEXTPROC glad_glGetQueryObjectivEXT = NULL; +PFNGLGETQUERYOBJECTUIVEXTPROC glad_glGetQueryObjectuivEXT = NULL; +PFNGLGETINTEGER64VEXTPROC glad_glGetInteger64vEXT = NULL; +PFNGLDRAWBUFFERSEXTPROC glad_glDrawBuffersEXT = NULL; +PFNGLENABLEIEXTPROC glad_glEnableiEXT = NULL; +PFNGLDISABLEIEXTPROC glad_glDisableiEXT = NULL; +PFNGLBLENDEQUATIONIEXTPROC glad_glBlendEquationiEXT = NULL; +PFNGLBLENDEQUATIONSEPARATEIEXTPROC glad_glBlendEquationSeparateiEXT = NULL; +PFNGLBLENDFUNCIEXTPROC glad_glBlendFunciEXT = NULL; +PFNGLBLENDFUNCSEPARATEIEXTPROC glad_glBlendFuncSeparateiEXT = NULL; +PFNGLCOLORMASKIEXTPROC glad_glColorMaskiEXT = NULL; +PFNGLISENABLEDIEXTPROC glad_glIsEnablediEXT = NULL; +PFNGLDRAWELEMENTSBASEVERTEXEXTPROC glad_glDrawElementsBaseVertexEXT = NULL; +PFNGLDRAWRANGEELEMENTSBASEVERTEXEXTPROC glad_glDrawRangeElementsBaseVertexEXT = NULL; +PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXEXTPROC glad_glDrawElementsInstancedBaseVertexEXT = NULL; +PFNGLMULTIDRAWELEMENTSBASEVERTEXEXTPROC glad_glMultiDrawElementsBaseVertexEXT = NULL; +PFNGLDRAWTRANSFORMFEEDBACKEXTPROC glad_glDrawTransformFeedbackEXT = NULL; +PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDEXTPROC glad_glDrawTransformFeedbackInstancedEXT = NULL; +PFNGLVERTEXATTRIBDIVISOREXTPROC glad_glVertexAttribDivisorEXT = NULL; +PFNGLMULTIDRAWARRAYSINDIRECTEXTPROC glad_glMultiDrawArraysIndirectEXT = NULL; +PFNGLMULTIDRAWELEMENTSINDIRECTEXTPROC glad_glMultiDrawElementsIndirectEXT = NULL; +PFNGLREADBUFFERINDEXEDEXTPROC glad_glReadBufferIndexedEXT = NULL; +PFNGLDRAWBUFFERSINDEXEDEXTPROC glad_glDrawBuffersIndexedEXT = NULL; +PFNGLGETINTEGERI_VEXTPROC glad_glGetIntegeri_vEXT = NULL; +PFNGLPRIMITIVEBOUNDINGBOXEXTPROC glad_glPrimitiveBoundingBoxEXT = NULL; +PFNGLFRAMEBUFFERPIXELLOCALSTORAGESIZEEXTPROC glad_glFramebufferPixelLocalStorageSizeEXT = NULL; +PFNGLGETFRAMEBUFFERPIXELLOCALSTORAGESIZEEXTPROC glad_glGetFramebufferPixelLocalStorageSizeEXT = NULL; +PFNGLCLEARPIXELLOCALSTORAGEUIEXTPROC glad_glClearPixelLocalStorageuiEXT = NULL; +PFNGLTEXPAGECOMMITMENTEXTPROC glad_glTexPageCommitmentEXT = NULL; +PFNGLPATCHPARAMETERIEXTPROC glad_glPatchParameteriEXT = NULL; +PFNGLSAMPLERPARAMETERIIVEXTPROC glad_glSamplerParameterIivEXT = NULL; +PFNGLSAMPLERPARAMETERIUIVEXTPROC glad_glSamplerParameterIuivEXT = NULL; +PFNGLGETSAMPLERPARAMETERIIVEXTPROC glad_glGetSamplerParameterIivEXT = NULL; +PFNGLGETSAMPLERPARAMETERIUIVEXTPROC glad_glGetSamplerParameterIuivEXT = NULL; +PFNGLTEXBUFFERRANGEEXTPROC glad_glTexBufferRangeEXT = NULL; +PFNGLTEXSTORAGEATTRIBS2DEXTPROC glad_glTexStorageAttribs2DEXT = NULL; +PFNGLTEXSTORAGEATTRIBS3DEXTPROC glad_glTexStorageAttribs3DEXT = NULL; +PFNGLTEXTUREVIEWEXTPROC glad_glTextureViewEXT = NULL; +PFNGLGETTEXTUREHANDLEIMGPROC glad_glGetTextureHandleIMG = NULL; +PFNGLGETTEXTURESAMPLERHANDLEIMGPROC glad_glGetTextureSamplerHandleIMG = NULL; +PFNGLUNIFORMHANDLEUI64IMGPROC glad_glUniformHandleui64IMG = NULL; +PFNGLUNIFORMHANDLEUI64VIMGPROC glad_glUniformHandleui64vIMG = NULL; +PFNGLPROGRAMUNIFORMHANDLEUI64IMGPROC glad_glProgramUniformHandleui64IMG = NULL; +PFNGLPROGRAMUNIFORMHANDLEUI64VIMGPROC glad_glProgramUniformHandleui64vIMG = NULL; +PFNGLFRAMEBUFFERTEXTURE2DDOWNSAMPLEIMGPROC glad_glFramebufferTexture2DDownsampleIMG = NULL; +PFNGLFRAMEBUFFERTEXTURELAYERDOWNSAMPLEIMGPROC glad_glFramebufferTextureLayerDownsampleIMG = NULL; +PFNGLCOPYBUFFERSUBDATANVPROC glad_glCopyBufferSubDataNV = NULL; +PFNGLCOVERAGEMASKNVPROC glad_glCoverageMaskNV = NULL; +PFNGLCOVERAGEOPERATIONNVPROC glad_glCoverageOperationNV = NULL; +PFNGLDRAWBUFFERSNVPROC glad_glDrawBuffersNV = NULL; +PFNGLDRAWARRAYSINSTANCEDNVPROC glad_glDrawArraysInstancedNV = NULL; +PFNGLDRAWELEMENTSINSTANCEDNVPROC glad_glDrawElementsInstancedNV = NULL; +PFNGLBLITFRAMEBUFFERNVPROC glad_glBlitFramebufferNV = NULL; +PFNGLRENDERBUFFERSTORAGEMULTISAMPLENVPROC glad_glRenderbufferStorageMultisampleNV = NULL; +PFNGLVERTEXATTRIBDIVISORNVPROC glad_glVertexAttribDivisorNV = NULL; +PFNGLUNIFORMMATRIX2X3FVNVPROC glad_glUniformMatrix2x3fvNV = NULL; +PFNGLUNIFORMMATRIX3X2FVNVPROC glad_glUniformMatrix3x2fvNV = NULL; +PFNGLUNIFORMMATRIX2X4FVNVPROC glad_glUniformMatrix2x4fvNV = NULL; +PFNGLUNIFORMMATRIX4X2FVNVPROC glad_glUniformMatrix4x2fvNV = NULL; +PFNGLUNIFORMMATRIX3X4FVNVPROC glad_glUniformMatrix3x4fvNV = NULL; +PFNGLUNIFORMMATRIX4X3FVNVPROC glad_glUniformMatrix4x3fvNV = NULL; +PFNGLPOLYGONMODENVPROC glad_glPolygonModeNV = NULL; +PFNGLREADBUFFERNVPROC glad_glReadBufferNV = NULL; PFNGLVIEWPORTARRAYVNVPROC glad_glViewportArrayvNV = NULL; PFNGLVIEWPORTINDEXEDFNVPROC glad_glViewportIndexedfNV = NULL; PFNGLVIEWPORTINDEXEDFVNVPROC glad_glViewportIndexedfvNV = NULL; @@ -1608,9 +5241,6 @@ PFNGLGETFLOATI_VNVPROC glad_glGetFloati_vNV = NULL; PFNGLENABLEINVPROC glad_glEnableiNV = NULL; PFNGLDISABLEINVPROC glad_glDisableiNV = NULL; PFNGLISENABLEDINVPROC glad_glIsEnablediNV = NULL; -PFNGLVIEWPORTSWIZZLENVPROC glad_glViewportSwizzleNV = NULL; -PFNGLEGLIMAGETARGETTEXTURE2DOESPROC glad_glEGLImageTargetTexture2DOES = NULL; -PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC glad_glEGLImageTargetRenderbufferStorageOES = NULL; PFNGLCOPYIMAGESUBDATAOESPROC glad_glCopyImageSubDataOES = NULL; PFNGLENABLEIOESPROC glad_glEnableiOES = NULL; PFNGLDISABLEIOESPROC glad_glDisableiOES = NULL; @@ -1626,9 +5256,6 @@ PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXOESPROC glad_glDrawElementsInstancedBaseVert PFNGLFRAMEBUFFERTEXTUREOESPROC glad_glFramebufferTextureOES = NULL; PFNGLGETPROGRAMBINARYOESPROC glad_glGetProgramBinaryOES = NULL; PFNGLPROGRAMBINARYOESPROC glad_glProgramBinaryOES = NULL; -PFNGLMAPBUFFEROESPROC glad_glMapBufferOES = NULL; -PFNGLUNMAPBUFFEROESPROC glad_glUnmapBufferOES = NULL; -PFNGLGETBUFFERPOINTERVOESPROC glad_glGetBufferPointervOES = NULL; PFNGLPRIMITIVEBOUNDINGBOXOESPROC glad_glPrimitiveBoundingBoxOES = NULL; PFNGLMINSAMPLESHADINGOESPROC glad_glMinSampleShadingOES = NULL; PFNGLPATCHPARAMETERIOESPROC glad_glPatchParameteriOES = NULL; @@ -1650,10 +5277,6 @@ PFNGLTEXBUFFEROESPROC glad_glTexBufferOES = NULL; PFNGLTEXBUFFERRANGEOESPROC glad_glTexBufferRangeOES = NULL; PFNGLTEXSTORAGE3DMULTISAMPLEOESPROC glad_glTexStorage3DMultisampleOES = NULL; PFNGLTEXTUREVIEWOESPROC glad_glTextureViewOES = NULL; -PFNGLBINDVERTEXARRAYOESPROC glad_glBindVertexArrayOES = NULL; -PFNGLDELETEVERTEXARRAYSOESPROC glad_glDeleteVertexArraysOES = NULL; -PFNGLGENVERTEXARRAYSOESPROC glad_glGenVertexArraysOES = NULL; -PFNGLISVERTEXARRAYOESPROC glad_glIsVertexArrayOES = NULL; PFNGLVIEWPORTARRAYVOESPROC glad_glViewportArrayvOES = NULL; PFNGLVIEWPORTINDEXEDFOESPROC glad_glViewportIndexedfOES = NULL; PFNGLVIEWPORTINDEXEDFVOESPROC glad_glViewportIndexedfvOES = NULL; @@ -1663,26 +5286,8 @@ PFNGLSCISSORINDEXEDVOESPROC glad_glScissorIndexedvOES = NULL; PFNGLDEPTHRANGEARRAYFVOESPROC glad_glDepthRangeArrayfvOES = NULL; PFNGLDEPTHRANGEINDEXEDFOESPROC glad_glDepthRangeIndexedfOES = NULL; PFNGLGETFLOATI_VOESPROC glad_glGetFloati_vOES = NULL; -PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glad_glFramebufferTextureMultiviewOVR = NULL; -PFNGLNAMEDFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glad_glNamedFramebufferTextureMultiviewOVR = NULL; PFNGLFRAMEBUFFERTEXTUREMULTISAMPLEMULTIVIEWOVRPROC glad_glFramebufferTextureMultisampleMultiviewOVR = NULL; PFNGLALPHAFUNCQCOMPROC glad_glAlphaFuncQCOM = NULL; -PFNGLGETDRIVERCONTROLSQCOMPROC glad_glGetDriverControlsQCOM = NULL; -PFNGLGETDRIVERCONTROLSTRINGQCOMPROC glad_glGetDriverControlStringQCOM = NULL; -PFNGLENABLEDRIVERCONTROLQCOMPROC glad_glEnableDriverControlQCOM = NULL; -PFNGLDISABLEDRIVERCONTROLQCOMPROC glad_glDisableDriverControlQCOM = NULL; -PFNGLEXTGETTEXTURESQCOMPROC glad_glExtGetTexturesQCOM = NULL; -PFNGLEXTGETBUFFERSQCOMPROC glad_glExtGetBuffersQCOM = NULL; -PFNGLEXTGETRENDERBUFFERSQCOMPROC glad_glExtGetRenderbuffersQCOM = NULL; -PFNGLEXTGETFRAMEBUFFERSQCOMPROC glad_glExtGetFramebuffersQCOM = NULL; -PFNGLEXTGETTEXLEVELPARAMETERIVQCOMPROC glad_glExtGetTexLevelParameterivQCOM = NULL; -PFNGLEXTTEXOBJECTSTATEOVERRIDEIQCOMPROC glad_glExtTexObjectStateOverrideiQCOM = NULL; -PFNGLEXTGETTEXSUBIMAGEQCOMPROC glad_glExtGetTexSubImageQCOM = NULL; -PFNGLEXTGETBUFFERPOINTERVQCOMPROC glad_glExtGetBufferPointervQCOM = NULL; -PFNGLEXTGETSHADERSQCOMPROC glad_glExtGetShadersQCOM = NULL; -PFNGLEXTGETPROGRAMSQCOMPROC glad_glExtGetProgramsQCOM = NULL; -PFNGLEXTISPROGRAMBINARYQCOMPROC glad_glExtIsProgramBinaryQCOM = NULL; -PFNGLEXTGETPROGRAMBINARYSOURCEQCOMPROC glad_glExtGetProgramBinarySourceQCOM = NULL; PFNGLEXTRAPOLATETEX2DQCOMPROC glad_glExtrapolateTex2DQCOM = NULL; PFNGLFRAMEBUFFERFOVEATIONCONFIGQCOMPROC glad_glFramebufferFoveationConfigQCOM = NULL; PFNGLFRAMEBUFFERFOVEATIONPARAMETERSQCOMPROC glad_glFramebufferFoveationParametersQCOM = NULL; @@ -1691,8 +5296,6113 @@ PFNGLTEXESTIMATEMOTIONREGIONSQCOMPROC glad_glTexEstimateMotionRegionsQCOM = NULL PFNGLFRAMEBUFFERFETCHBARRIERQCOMPROC glad_glFramebufferFetchBarrierQCOM = NULL; PFNGLSHADINGRATEQCOMPROC glad_glShadingRateQCOM = NULL; PFNGLTEXTUREFOVEATIONPARAMETERSQCOMPROC glad_glTextureFoveationParametersQCOM = NULL; -PFNGLSTARTTILINGQCOMPROC glad_glStartTilingQCOM = NULL; -PFNGLENDTILINGQCOMPROC glad_glEndTilingQCOM = NULL; +static void load_GL_VERSION_1_0(GLADloadproc load) { + if(!GLAD_GL_VERSION_1_0) return; + glad_glCullFace = (PFNGLCULLFACEPROC)load("glCullFace"); + glad_glFrontFace = (PFNGLFRONTFACEPROC)load("glFrontFace"); + glad_glHint = (PFNGLHINTPROC)load("glHint"); + glad_glLineWidth = (PFNGLLINEWIDTHPROC)load("glLineWidth"); + glad_glPointSize = (PFNGLPOINTSIZEPROC)load("glPointSize"); + glad_glPolygonMode = (PFNGLPOLYGONMODEPROC)load("glPolygonMode"); + glad_glScissor = (PFNGLSCISSORPROC)load("glScissor"); + glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC)load("glTexParameterf"); + glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC)load("glTexParameterfv"); + glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC)load("glTexParameteri"); + glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC)load("glTexParameteriv"); + glad_glTexImage1D = (PFNGLTEXIMAGE1DPROC)load("glTexImage1D"); + glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC)load("glTexImage2D"); + glad_glDrawBuffer = (PFNGLDRAWBUFFERPROC)load("glDrawBuffer"); + glad_glClear = (PFNGLCLEARPROC)load("glClear"); + glad_glClearColor = (PFNGLCLEARCOLORPROC)load("glClearColor"); + glad_glClearStencil = (PFNGLCLEARSTENCILPROC)load("glClearStencil"); + glad_glClearDepth = (PFNGLCLEARDEPTHPROC)load("glClearDepth"); + glad_glStencilMask = (PFNGLSTENCILMASKPROC)load("glStencilMask"); + glad_glColorMask = (PFNGLCOLORMASKPROC)load("glColorMask"); + glad_glDepthMask = (PFNGLDEPTHMASKPROC)load("glDepthMask"); + glad_glDisable = (PFNGLDISABLEPROC)load("glDisable"); + glad_glEnable = (PFNGLENABLEPROC)load("glEnable"); + glad_glFinish = (PFNGLFINISHPROC)load("glFinish"); + glad_glFlush = (PFNGLFLUSHPROC)load("glFlush"); + glad_glBlendFunc = (PFNGLBLENDFUNCPROC)load("glBlendFunc"); + glad_glLogicOp = (PFNGLLOGICOPPROC)load("glLogicOp"); + glad_glStencilFunc = (PFNGLSTENCILFUNCPROC)load("glStencilFunc"); + glad_glStencilOp = (PFNGLSTENCILOPPROC)load("glStencilOp"); + glad_glDepthFunc = (PFNGLDEPTHFUNCPROC)load("glDepthFunc"); + glad_glPixelStoref = (PFNGLPIXELSTOREFPROC)load("glPixelStoref"); + glad_glPixelStorei = (PFNGLPIXELSTOREIPROC)load("glPixelStorei"); + glad_glReadBuffer = (PFNGLREADBUFFERPROC)load("glReadBuffer"); + glad_glReadPixels = (PFNGLREADPIXELSPROC)load("glReadPixels"); + glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC)load("glGetBooleanv"); + glad_glGetDoublev = (PFNGLGETDOUBLEVPROC)load("glGetDoublev"); + glad_glGetError = (PFNGLGETERRORPROC)load("glGetError"); + glad_glGetFloatv = (PFNGLGETFLOATVPROC)load("glGetFloatv"); + glad_glGetIntegerv = (PFNGLGETINTEGERVPROC)load("glGetIntegerv"); + glad_glGetString = (PFNGLGETSTRINGPROC)load("glGetString"); + glad_glGetTexImage = (PFNGLGETTEXIMAGEPROC)load("glGetTexImage"); + glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC)load("glGetTexParameterfv"); + glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC)load("glGetTexParameteriv"); + glad_glGetTexLevelParameterfv = (PFNGLGETTEXLEVELPARAMETERFVPROC)load("glGetTexLevelParameterfv"); + glad_glGetTexLevelParameteriv = (PFNGLGETTEXLEVELPARAMETERIVPROC)load("glGetTexLevelParameteriv"); + glad_glIsEnabled = (PFNGLISENABLEDPROC)load("glIsEnabled"); + glad_glDepthRange = (PFNGLDEPTHRANGEPROC)load("glDepthRange"); + glad_glViewport = (PFNGLVIEWPORTPROC)load("glViewport"); + glad_glNewList = (PFNGLNEWLISTPROC)load("glNewList"); + glad_glEndList = (PFNGLENDLISTPROC)load("glEndList"); + glad_glCallList = (PFNGLCALLLISTPROC)load("glCallList"); + glad_glCallLists = (PFNGLCALLLISTSPROC)load("glCallLists"); + glad_glDeleteLists = (PFNGLDELETELISTSPROC)load("glDeleteLists"); + glad_glGenLists = (PFNGLGENLISTSPROC)load("glGenLists"); + glad_glListBase = (PFNGLLISTBASEPROC)load("glListBase"); + glad_glBegin = (PFNGLBEGINPROC)load("glBegin"); + glad_glBitmap = (PFNGLBITMAPPROC)load("glBitmap"); + glad_glColor3b = (PFNGLCOLOR3BPROC)load("glColor3b"); + glad_glColor3bv = (PFNGLCOLOR3BVPROC)load("glColor3bv"); + glad_glColor3d = (PFNGLCOLOR3DPROC)load("glColor3d"); + glad_glColor3dv = (PFNGLCOLOR3DVPROC)load("glColor3dv"); + glad_glColor3f = (PFNGLCOLOR3FPROC)load("glColor3f"); + glad_glColor3fv = (PFNGLCOLOR3FVPROC)load("glColor3fv"); + glad_glColor3i = (PFNGLCOLOR3IPROC)load("glColor3i"); + glad_glColor3iv = (PFNGLCOLOR3IVPROC)load("glColor3iv"); + glad_glColor3s = (PFNGLCOLOR3SPROC)load("glColor3s"); + glad_glColor3sv = (PFNGLCOLOR3SVPROC)load("glColor3sv"); + glad_glColor3ub = (PFNGLCOLOR3UBPROC)load("glColor3ub"); + glad_glColor3ubv = (PFNGLCOLOR3UBVPROC)load("glColor3ubv"); + glad_glColor3ui = (PFNGLCOLOR3UIPROC)load("glColor3ui"); + glad_glColor3uiv = (PFNGLCOLOR3UIVPROC)load("glColor3uiv"); + glad_glColor3us = (PFNGLCOLOR3USPROC)load("glColor3us"); + glad_glColor3usv = (PFNGLCOLOR3USVPROC)load("glColor3usv"); + glad_glColor4b = (PFNGLCOLOR4BPROC)load("glColor4b"); + glad_glColor4bv = (PFNGLCOLOR4BVPROC)load("glColor4bv"); + glad_glColor4d = (PFNGLCOLOR4DPROC)load("glColor4d"); + glad_glColor4dv = (PFNGLCOLOR4DVPROC)load("glColor4dv"); + glad_glColor4f = (PFNGLCOLOR4FPROC)load("glColor4f"); + glad_glColor4fv = (PFNGLCOLOR4FVPROC)load("glColor4fv"); + glad_glColor4i = (PFNGLCOLOR4IPROC)load("glColor4i"); + glad_glColor4iv = (PFNGLCOLOR4IVPROC)load("glColor4iv"); + glad_glColor4s = (PFNGLCOLOR4SPROC)load("glColor4s"); + glad_glColor4sv = (PFNGLCOLOR4SVPROC)load("glColor4sv"); + glad_glColor4ub = (PFNGLCOLOR4UBPROC)load("glColor4ub"); + glad_glColor4ubv = (PFNGLCOLOR4UBVPROC)load("glColor4ubv"); + glad_glColor4ui = (PFNGLCOLOR4UIPROC)load("glColor4ui"); + glad_glColor4uiv = (PFNGLCOLOR4UIVPROC)load("glColor4uiv"); + glad_glColor4us = (PFNGLCOLOR4USPROC)load("glColor4us"); + glad_glColor4usv = (PFNGLCOLOR4USVPROC)load("glColor4usv"); + glad_glEdgeFlag = (PFNGLEDGEFLAGPROC)load("glEdgeFlag"); + glad_glEdgeFlagv = (PFNGLEDGEFLAGVPROC)load("glEdgeFlagv"); + glad_glEnd = (PFNGLENDPROC)load("glEnd"); + glad_glIndexd = (PFNGLINDEXDPROC)load("glIndexd"); + glad_glIndexdv = (PFNGLINDEXDVPROC)load("glIndexdv"); + glad_glIndexf = (PFNGLINDEXFPROC)load("glIndexf"); + glad_glIndexfv = (PFNGLINDEXFVPROC)load("glIndexfv"); + glad_glIndexi = (PFNGLINDEXIPROC)load("glIndexi"); + glad_glIndexiv = (PFNGLINDEXIVPROC)load("glIndexiv"); + glad_glIndexs = (PFNGLINDEXSPROC)load("glIndexs"); + glad_glIndexsv = (PFNGLINDEXSVPROC)load("glIndexsv"); + glad_glNormal3b = (PFNGLNORMAL3BPROC)load("glNormal3b"); + glad_glNormal3bv = (PFNGLNORMAL3BVPROC)load("glNormal3bv"); + glad_glNormal3d = (PFNGLNORMAL3DPROC)load("glNormal3d"); + glad_glNormal3dv = (PFNGLNORMAL3DVPROC)load("glNormal3dv"); + glad_glNormal3f = (PFNGLNORMAL3FPROC)load("glNormal3f"); + glad_glNormal3fv = (PFNGLNORMAL3FVPROC)load("glNormal3fv"); + glad_glNormal3i = (PFNGLNORMAL3IPROC)load("glNormal3i"); + glad_glNormal3iv = (PFNGLNORMAL3IVPROC)load("glNormal3iv"); + glad_glNormal3s = (PFNGLNORMAL3SPROC)load("glNormal3s"); + glad_glNormal3sv = (PFNGLNORMAL3SVPROC)load("glNormal3sv"); + glad_glRasterPos2d = (PFNGLRASTERPOS2DPROC)load("glRasterPos2d"); + glad_glRasterPos2dv = (PFNGLRASTERPOS2DVPROC)load("glRasterPos2dv"); + glad_glRasterPos2f = (PFNGLRASTERPOS2FPROC)load("glRasterPos2f"); + glad_glRasterPos2fv = (PFNGLRASTERPOS2FVPROC)load("glRasterPos2fv"); + glad_glRasterPos2i = (PFNGLRASTERPOS2IPROC)load("glRasterPos2i"); + glad_glRasterPos2iv = (PFNGLRASTERPOS2IVPROC)load("glRasterPos2iv"); + glad_glRasterPos2s = (PFNGLRASTERPOS2SPROC)load("glRasterPos2s"); + glad_glRasterPos2sv = (PFNGLRASTERPOS2SVPROC)load("glRasterPos2sv"); + glad_glRasterPos3d = (PFNGLRASTERPOS3DPROC)load("glRasterPos3d"); + glad_glRasterPos3dv = (PFNGLRASTERPOS3DVPROC)load("glRasterPos3dv"); + glad_glRasterPos3f = (PFNGLRASTERPOS3FPROC)load("glRasterPos3f"); + glad_glRasterPos3fv = (PFNGLRASTERPOS3FVPROC)load("glRasterPos3fv"); + glad_glRasterPos3i = (PFNGLRASTERPOS3IPROC)load("glRasterPos3i"); + glad_glRasterPos3iv = (PFNGLRASTERPOS3IVPROC)load("glRasterPos3iv"); + glad_glRasterPos3s = (PFNGLRASTERPOS3SPROC)load("glRasterPos3s"); + glad_glRasterPos3sv = (PFNGLRASTERPOS3SVPROC)load("glRasterPos3sv"); + glad_glRasterPos4d = (PFNGLRASTERPOS4DPROC)load("glRasterPos4d"); + glad_glRasterPos4dv = (PFNGLRASTERPOS4DVPROC)load("glRasterPos4dv"); + glad_glRasterPos4f = (PFNGLRASTERPOS4FPROC)load("glRasterPos4f"); + glad_glRasterPos4fv = (PFNGLRASTERPOS4FVPROC)load("glRasterPos4fv"); + glad_glRasterPos4i = (PFNGLRASTERPOS4IPROC)load("glRasterPos4i"); + glad_glRasterPos4iv = (PFNGLRASTERPOS4IVPROC)load("glRasterPos4iv"); + glad_glRasterPos4s = (PFNGLRASTERPOS4SPROC)load("glRasterPos4s"); + glad_glRasterPos4sv = (PFNGLRASTERPOS4SVPROC)load("glRasterPos4sv"); + glad_glRectd = (PFNGLRECTDPROC)load("glRectd"); + glad_glRectdv = (PFNGLRECTDVPROC)load("glRectdv"); + glad_glRectf = (PFNGLRECTFPROC)load("glRectf"); + glad_glRectfv = (PFNGLRECTFVPROC)load("glRectfv"); + glad_glRecti = (PFNGLRECTIPROC)load("glRecti"); + glad_glRectiv = (PFNGLRECTIVPROC)load("glRectiv"); + glad_glRects = (PFNGLRECTSPROC)load("glRects"); + glad_glRectsv = (PFNGLRECTSVPROC)load("glRectsv"); + glad_glTexCoord1d = (PFNGLTEXCOORD1DPROC)load("glTexCoord1d"); + glad_glTexCoord1dv = (PFNGLTEXCOORD1DVPROC)load("glTexCoord1dv"); + glad_glTexCoord1f = (PFNGLTEXCOORD1FPROC)load("glTexCoord1f"); + glad_glTexCoord1fv = (PFNGLTEXCOORD1FVPROC)load("glTexCoord1fv"); + glad_glTexCoord1i = (PFNGLTEXCOORD1IPROC)load("glTexCoord1i"); + glad_glTexCoord1iv = (PFNGLTEXCOORD1IVPROC)load("glTexCoord1iv"); + glad_glTexCoord1s = (PFNGLTEXCOORD1SPROC)load("glTexCoord1s"); + glad_glTexCoord1sv = (PFNGLTEXCOORD1SVPROC)load("glTexCoord1sv"); + glad_glTexCoord2d = (PFNGLTEXCOORD2DPROC)load("glTexCoord2d"); + glad_glTexCoord2dv = (PFNGLTEXCOORD2DVPROC)load("glTexCoord2dv"); + glad_glTexCoord2f = (PFNGLTEXCOORD2FPROC)load("glTexCoord2f"); + glad_glTexCoord2fv = (PFNGLTEXCOORD2FVPROC)load("glTexCoord2fv"); + glad_glTexCoord2i = (PFNGLTEXCOORD2IPROC)load("glTexCoord2i"); + glad_glTexCoord2iv = (PFNGLTEXCOORD2IVPROC)load("glTexCoord2iv"); + glad_glTexCoord2s = (PFNGLTEXCOORD2SPROC)load("glTexCoord2s"); + glad_glTexCoord2sv = (PFNGLTEXCOORD2SVPROC)load("glTexCoord2sv"); + glad_glTexCoord3d = (PFNGLTEXCOORD3DPROC)load("glTexCoord3d"); + glad_glTexCoord3dv = (PFNGLTEXCOORD3DVPROC)load("glTexCoord3dv"); + glad_glTexCoord3f = (PFNGLTEXCOORD3FPROC)load("glTexCoord3f"); + glad_glTexCoord3fv = (PFNGLTEXCOORD3FVPROC)load("glTexCoord3fv"); + glad_glTexCoord3i = (PFNGLTEXCOORD3IPROC)load("glTexCoord3i"); + glad_glTexCoord3iv = (PFNGLTEXCOORD3IVPROC)load("glTexCoord3iv"); + glad_glTexCoord3s = (PFNGLTEXCOORD3SPROC)load("glTexCoord3s"); + glad_glTexCoord3sv = (PFNGLTEXCOORD3SVPROC)load("glTexCoord3sv"); + glad_glTexCoord4d = (PFNGLTEXCOORD4DPROC)load("glTexCoord4d"); + glad_glTexCoord4dv = (PFNGLTEXCOORD4DVPROC)load("glTexCoord4dv"); + glad_glTexCoord4f = (PFNGLTEXCOORD4FPROC)load("glTexCoord4f"); + glad_glTexCoord4fv = (PFNGLTEXCOORD4FVPROC)load("glTexCoord4fv"); + glad_glTexCoord4i = (PFNGLTEXCOORD4IPROC)load("glTexCoord4i"); + glad_glTexCoord4iv = (PFNGLTEXCOORD4IVPROC)load("glTexCoord4iv"); + glad_glTexCoord4s = (PFNGLTEXCOORD4SPROC)load("glTexCoord4s"); + glad_glTexCoord4sv = (PFNGLTEXCOORD4SVPROC)load("glTexCoord4sv"); + glad_glVertex2d = (PFNGLVERTEX2DPROC)load("glVertex2d"); + glad_glVertex2dv = (PFNGLVERTEX2DVPROC)load("glVertex2dv"); + glad_glVertex2f = (PFNGLVERTEX2FPROC)load("glVertex2f"); + glad_glVertex2fv = (PFNGLVERTEX2FVPROC)load("glVertex2fv"); + glad_glVertex2i = (PFNGLVERTEX2IPROC)load("glVertex2i"); + glad_glVertex2iv = (PFNGLVERTEX2IVPROC)load("glVertex2iv"); + glad_glVertex2s = (PFNGLVERTEX2SPROC)load("glVertex2s"); + glad_glVertex2sv = (PFNGLVERTEX2SVPROC)load("glVertex2sv"); + glad_glVertex3d = (PFNGLVERTEX3DPROC)load("glVertex3d"); + glad_glVertex3dv = (PFNGLVERTEX3DVPROC)load("glVertex3dv"); + glad_glVertex3f = (PFNGLVERTEX3FPROC)load("glVertex3f"); + glad_glVertex3fv = (PFNGLVERTEX3FVPROC)load("glVertex3fv"); + glad_glVertex3i = (PFNGLVERTEX3IPROC)load("glVertex3i"); + glad_glVertex3iv = (PFNGLVERTEX3IVPROC)load("glVertex3iv"); + glad_glVertex3s = (PFNGLVERTEX3SPROC)load("glVertex3s"); + glad_glVertex3sv = (PFNGLVERTEX3SVPROC)load("glVertex3sv"); + glad_glVertex4d = (PFNGLVERTEX4DPROC)load("glVertex4d"); + glad_glVertex4dv = (PFNGLVERTEX4DVPROC)load("glVertex4dv"); + glad_glVertex4f = (PFNGLVERTEX4FPROC)load("glVertex4f"); + glad_glVertex4fv = (PFNGLVERTEX4FVPROC)load("glVertex4fv"); + glad_glVertex4i = (PFNGLVERTEX4IPROC)load("glVertex4i"); + glad_glVertex4iv = (PFNGLVERTEX4IVPROC)load("glVertex4iv"); + glad_glVertex4s = (PFNGLVERTEX4SPROC)load("glVertex4s"); + glad_glVertex4sv = (PFNGLVERTEX4SVPROC)load("glVertex4sv"); + glad_glClipPlane = (PFNGLCLIPPLANEPROC)load("glClipPlane"); + glad_glColorMaterial = (PFNGLCOLORMATERIALPROC)load("glColorMaterial"); + glad_glFogf = (PFNGLFOGFPROC)load("glFogf"); + glad_glFogfv = (PFNGLFOGFVPROC)load("glFogfv"); + glad_glFogi = (PFNGLFOGIPROC)load("glFogi"); + glad_glFogiv = (PFNGLFOGIVPROC)load("glFogiv"); + glad_glLightf = (PFNGLLIGHTFPROC)load("glLightf"); + glad_glLightfv = (PFNGLLIGHTFVPROC)load("glLightfv"); + glad_glLighti = (PFNGLLIGHTIPROC)load("glLighti"); + glad_glLightiv = (PFNGLLIGHTIVPROC)load("glLightiv"); + glad_glLightModelf = (PFNGLLIGHTMODELFPROC)load("glLightModelf"); + glad_glLightModelfv = (PFNGLLIGHTMODELFVPROC)load("glLightModelfv"); + glad_glLightModeli = (PFNGLLIGHTMODELIPROC)load("glLightModeli"); + glad_glLightModeliv = (PFNGLLIGHTMODELIVPROC)load("glLightModeliv"); + glad_glLineStipple = (PFNGLLINESTIPPLEPROC)load("glLineStipple"); + glad_glMaterialf = (PFNGLMATERIALFPROC)load("glMaterialf"); + glad_glMaterialfv = (PFNGLMATERIALFVPROC)load("glMaterialfv"); + glad_glMateriali = (PFNGLMATERIALIPROC)load("glMateriali"); + glad_glMaterialiv = (PFNGLMATERIALIVPROC)load("glMaterialiv"); + glad_glPolygonStipple = (PFNGLPOLYGONSTIPPLEPROC)load("glPolygonStipple"); + glad_glShadeModel = (PFNGLSHADEMODELPROC)load("glShadeModel"); + glad_glTexEnvf = (PFNGLTEXENVFPROC)load("glTexEnvf"); + glad_glTexEnvfv = (PFNGLTEXENVFVPROC)load("glTexEnvfv"); + glad_glTexEnvi = (PFNGLTEXENVIPROC)load("glTexEnvi"); + glad_glTexEnviv = (PFNGLTEXENVIVPROC)load("glTexEnviv"); + glad_glTexGend = (PFNGLTEXGENDPROC)load("glTexGend"); + glad_glTexGendv = (PFNGLTEXGENDVPROC)load("glTexGendv"); + glad_glTexGenf = (PFNGLTEXGENFPROC)load("glTexGenf"); + glad_glTexGenfv = (PFNGLTEXGENFVPROC)load("glTexGenfv"); + glad_glTexGeni = (PFNGLTEXGENIPROC)load("glTexGeni"); + glad_glTexGeniv = (PFNGLTEXGENIVPROC)load("glTexGeniv"); + glad_glFeedbackBuffer = (PFNGLFEEDBACKBUFFERPROC)load("glFeedbackBuffer"); + glad_glSelectBuffer = (PFNGLSELECTBUFFERPROC)load("glSelectBuffer"); + glad_glRenderMode = (PFNGLRENDERMODEPROC)load("glRenderMode"); + glad_glInitNames = (PFNGLINITNAMESPROC)load("glInitNames"); + glad_glLoadName = (PFNGLLOADNAMEPROC)load("glLoadName"); + glad_glPassThrough = (PFNGLPASSTHROUGHPROC)load("glPassThrough"); + glad_glPopName = (PFNGLPOPNAMEPROC)load("glPopName"); + glad_glPushName = (PFNGLPUSHNAMEPROC)load("glPushName"); + glad_glClearAccum = (PFNGLCLEARACCUMPROC)load("glClearAccum"); + glad_glClearIndex = (PFNGLCLEARINDEXPROC)load("glClearIndex"); + glad_glIndexMask = (PFNGLINDEXMASKPROC)load("glIndexMask"); + glad_glAccum = (PFNGLACCUMPROC)load("glAccum"); + glad_glPopAttrib = (PFNGLPOPATTRIBPROC)load("glPopAttrib"); + glad_glPushAttrib = (PFNGLPUSHATTRIBPROC)load("glPushAttrib"); + glad_glMap1d = (PFNGLMAP1DPROC)load("glMap1d"); + glad_glMap1f = (PFNGLMAP1FPROC)load("glMap1f"); + glad_glMap2d = (PFNGLMAP2DPROC)load("glMap2d"); + glad_glMap2f = (PFNGLMAP2FPROC)load("glMap2f"); + glad_glMapGrid1d = (PFNGLMAPGRID1DPROC)load("glMapGrid1d"); + glad_glMapGrid1f = (PFNGLMAPGRID1FPROC)load("glMapGrid1f"); + glad_glMapGrid2d = (PFNGLMAPGRID2DPROC)load("glMapGrid2d"); + glad_glMapGrid2f = (PFNGLMAPGRID2FPROC)load("glMapGrid2f"); + glad_glEvalCoord1d = (PFNGLEVALCOORD1DPROC)load("glEvalCoord1d"); + glad_glEvalCoord1dv = (PFNGLEVALCOORD1DVPROC)load("glEvalCoord1dv"); + glad_glEvalCoord1f = (PFNGLEVALCOORD1FPROC)load("glEvalCoord1f"); + glad_glEvalCoord1fv = (PFNGLEVALCOORD1FVPROC)load("glEvalCoord1fv"); + glad_glEvalCoord2d = (PFNGLEVALCOORD2DPROC)load("glEvalCoord2d"); + glad_glEvalCoord2dv = (PFNGLEVALCOORD2DVPROC)load("glEvalCoord2dv"); + glad_glEvalCoord2f = (PFNGLEVALCOORD2FPROC)load("glEvalCoord2f"); + glad_glEvalCoord2fv = (PFNGLEVALCOORD2FVPROC)load("glEvalCoord2fv"); + glad_glEvalMesh1 = (PFNGLEVALMESH1PROC)load("glEvalMesh1"); + glad_glEvalPoint1 = (PFNGLEVALPOINT1PROC)load("glEvalPoint1"); + glad_glEvalMesh2 = (PFNGLEVALMESH2PROC)load("glEvalMesh2"); + glad_glEvalPoint2 = (PFNGLEVALPOINT2PROC)load("glEvalPoint2"); + glad_glAlphaFunc = (PFNGLALPHAFUNCPROC)load("glAlphaFunc"); + glad_glPixelZoom = (PFNGLPIXELZOOMPROC)load("glPixelZoom"); + glad_glPixelTransferf = (PFNGLPIXELTRANSFERFPROC)load("glPixelTransferf"); + glad_glPixelTransferi = (PFNGLPIXELTRANSFERIPROC)load("glPixelTransferi"); + glad_glPixelMapfv = (PFNGLPIXELMAPFVPROC)load("glPixelMapfv"); + glad_glPixelMapuiv = (PFNGLPIXELMAPUIVPROC)load("glPixelMapuiv"); + glad_glPixelMapusv = (PFNGLPIXELMAPUSVPROC)load("glPixelMapusv"); + glad_glCopyPixels = (PFNGLCOPYPIXELSPROC)load("glCopyPixels"); + glad_glDrawPixels = (PFNGLDRAWPIXELSPROC)load("glDrawPixels"); + glad_glGetClipPlane = (PFNGLGETCLIPPLANEPROC)load("glGetClipPlane"); + glad_glGetLightfv = (PFNGLGETLIGHTFVPROC)load("glGetLightfv"); + glad_glGetLightiv = (PFNGLGETLIGHTIVPROC)load("glGetLightiv"); + glad_glGetMapdv = (PFNGLGETMAPDVPROC)load("glGetMapdv"); + glad_glGetMapfv = (PFNGLGETMAPFVPROC)load("glGetMapfv"); + glad_glGetMapiv = (PFNGLGETMAPIVPROC)load("glGetMapiv"); + glad_glGetMaterialfv = (PFNGLGETMATERIALFVPROC)load("glGetMaterialfv"); + glad_glGetMaterialiv = (PFNGLGETMATERIALIVPROC)load("glGetMaterialiv"); + glad_glGetPixelMapfv = (PFNGLGETPIXELMAPFVPROC)load("glGetPixelMapfv"); + glad_glGetPixelMapuiv = (PFNGLGETPIXELMAPUIVPROC)load("glGetPixelMapuiv"); + glad_glGetPixelMapusv = (PFNGLGETPIXELMAPUSVPROC)load("glGetPixelMapusv"); + glad_glGetPolygonStipple = (PFNGLGETPOLYGONSTIPPLEPROC)load("glGetPolygonStipple"); + glad_glGetTexEnvfv = (PFNGLGETTEXENVFVPROC)load("glGetTexEnvfv"); + glad_glGetTexEnviv = (PFNGLGETTEXENVIVPROC)load("glGetTexEnviv"); + glad_glGetTexGendv = (PFNGLGETTEXGENDVPROC)load("glGetTexGendv"); + glad_glGetTexGenfv = (PFNGLGETTEXGENFVPROC)load("glGetTexGenfv"); + glad_glGetTexGeniv = (PFNGLGETTEXGENIVPROC)load("glGetTexGeniv"); + glad_glIsList = (PFNGLISLISTPROC)load("glIsList"); + glad_glFrustum = (PFNGLFRUSTUMPROC)load("glFrustum"); + glad_glLoadIdentity = (PFNGLLOADIDENTITYPROC)load("glLoadIdentity"); + glad_glLoadMatrixf = (PFNGLLOADMATRIXFPROC)load("glLoadMatrixf"); + glad_glLoadMatrixd = (PFNGLLOADMATRIXDPROC)load("glLoadMatrixd"); + glad_glMatrixMode = (PFNGLMATRIXMODEPROC)load("glMatrixMode"); + glad_glMultMatrixf = (PFNGLMULTMATRIXFPROC)load("glMultMatrixf"); + glad_glMultMatrixd = (PFNGLMULTMATRIXDPROC)load("glMultMatrixd"); + glad_glOrtho = (PFNGLORTHOPROC)load("glOrtho"); + glad_glPopMatrix = (PFNGLPOPMATRIXPROC)load("glPopMatrix"); + glad_glPushMatrix = (PFNGLPUSHMATRIXPROC)load("glPushMatrix"); + glad_glRotated = (PFNGLROTATEDPROC)load("glRotated"); + glad_glRotatef = (PFNGLROTATEFPROC)load("glRotatef"); + glad_glScaled = (PFNGLSCALEDPROC)load("glScaled"); + glad_glScalef = (PFNGLSCALEFPROC)load("glScalef"); + glad_glTranslated = (PFNGLTRANSLATEDPROC)load("glTranslated"); + glad_glTranslatef = (PFNGLTRANSLATEFPROC)load("glTranslatef"); +} +static void load_GL_VERSION_1_1(GLADloadproc load) { + if(!GLAD_GL_VERSION_1_1) return; + glad_glDrawArrays = (PFNGLDRAWARRAYSPROC)load("glDrawArrays"); + glad_glDrawElements = (PFNGLDRAWELEMENTSPROC)load("glDrawElements"); + glad_glGetPointerv = (PFNGLGETPOINTERVPROC)load("glGetPointerv"); + glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC)load("glPolygonOffset"); + glad_glCopyTexImage1D = (PFNGLCOPYTEXIMAGE1DPROC)load("glCopyTexImage1D"); + glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC)load("glCopyTexImage2D"); + glad_glCopyTexSubImage1D = (PFNGLCOPYTEXSUBIMAGE1DPROC)load("glCopyTexSubImage1D"); + glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC)load("glCopyTexSubImage2D"); + glad_glTexSubImage1D = (PFNGLTEXSUBIMAGE1DPROC)load("glTexSubImage1D"); + glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC)load("glTexSubImage2D"); + glad_glBindTexture = (PFNGLBINDTEXTUREPROC)load("glBindTexture"); + glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC)load("glDeleteTextures"); + glad_glGenTextures = (PFNGLGENTEXTURESPROC)load("glGenTextures"); + glad_glIsTexture = (PFNGLISTEXTUREPROC)load("glIsTexture"); + glad_glArrayElement = (PFNGLARRAYELEMENTPROC)load("glArrayElement"); + glad_glColorPointer = (PFNGLCOLORPOINTERPROC)load("glColorPointer"); + glad_glDisableClientState = (PFNGLDISABLECLIENTSTATEPROC)load("glDisableClientState"); + glad_glEdgeFlagPointer = (PFNGLEDGEFLAGPOINTERPROC)load("glEdgeFlagPointer"); + glad_glEnableClientState = (PFNGLENABLECLIENTSTATEPROC)load("glEnableClientState"); + glad_glIndexPointer = (PFNGLINDEXPOINTERPROC)load("glIndexPointer"); + glad_glInterleavedArrays = (PFNGLINTERLEAVEDARRAYSPROC)load("glInterleavedArrays"); + glad_glNormalPointer = (PFNGLNORMALPOINTERPROC)load("glNormalPointer"); + glad_glTexCoordPointer = (PFNGLTEXCOORDPOINTERPROC)load("glTexCoordPointer"); + glad_glVertexPointer = (PFNGLVERTEXPOINTERPROC)load("glVertexPointer"); + glad_glAreTexturesResident = (PFNGLARETEXTURESRESIDENTPROC)load("glAreTexturesResident"); + glad_glPrioritizeTextures = (PFNGLPRIORITIZETEXTURESPROC)load("glPrioritizeTextures"); + glad_glIndexub = (PFNGLINDEXUBPROC)load("glIndexub"); + glad_glIndexubv = (PFNGLINDEXUBVPROC)load("glIndexubv"); + glad_glPopClientAttrib = (PFNGLPOPCLIENTATTRIBPROC)load("glPopClientAttrib"); + glad_glPushClientAttrib = (PFNGLPUSHCLIENTATTRIBPROC)load("glPushClientAttrib"); +} +static void load_GL_VERSION_1_2(GLADloadproc load) { + if(!GLAD_GL_VERSION_1_2) return; + glad_glDrawRangeElements = (PFNGLDRAWRANGEELEMENTSPROC)load("glDrawRangeElements"); + glad_glTexImage3D = (PFNGLTEXIMAGE3DPROC)load("glTexImage3D"); + glad_glTexSubImage3D = (PFNGLTEXSUBIMAGE3DPROC)load("glTexSubImage3D"); + glad_glCopyTexSubImage3D = (PFNGLCOPYTEXSUBIMAGE3DPROC)load("glCopyTexSubImage3D"); +} +static void load_GL_VERSION_1_3(GLADloadproc load) { + if(!GLAD_GL_VERSION_1_3) return; + glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC)load("glActiveTexture"); + glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC)load("glSampleCoverage"); + glad_glCompressedTexImage3D = (PFNGLCOMPRESSEDTEXIMAGE3DPROC)load("glCompressedTexImage3D"); + glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)load("glCompressedTexImage2D"); + glad_glCompressedTexImage1D = (PFNGLCOMPRESSEDTEXIMAGE1DPROC)load("glCompressedTexImage1D"); + glad_glCompressedTexSubImage3D = (PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)load("glCompressedTexSubImage3D"); + glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)load("glCompressedTexSubImage2D"); + glad_glCompressedTexSubImage1D = (PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)load("glCompressedTexSubImage1D"); + glad_glGetCompressedTexImage = (PFNGLGETCOMPRESSEDTEXIMAGEPROC)load("glGetCompressedTexImage"); + glad_glClientActiveTexture = (PFNGLCLIENTACTIVETEXTUREPROC)load("glClientActiveTexture"); + glad_glMultiTexCoord1d = (PFNGLMULTITEXCOORD1DPROC)load("glMultiTexCoord1d"); + glad_glMultiTexCoord1dv = (PFNGLMULTITEXCOORD1DVPROC)load("glMultiTexCoord1dv"); + glad_glMultiTexCoord1f = (PFNGLMULTITEXCOORD1FPROC)load("glMultiTexCoord1f"); + glad_glMultiTexCoord1fv = (PFNGLMULTITEXCOORD1FVPROC)load("glMultiTexCoord1fv"); + glad_glMultiTexCoord1i = (PFNGLMULTITEXCOORD1IPROC)load("glMultiTexCoord1i"); + glad_glMultiTexCoord1iv = (PFNGLMULTITEXCOORD1IVPROC)load("glMultiTexCoord1iv"); + glad_glMultiTexCoord1s = (PFNGLMULTITEXCOORD1SPROC)load("glMultiTexCoord1s"); + glad_glMultiTexCoord1sv = (PFNGLMULTITEXCOORD1SVPROC)load("glMultiTexCoord1sv"); + glad_glMultiTexCoord2d = (PFNGLMULTITEXCOORD2DPROC)load("glMultiTexCoord2d"); + glad_glMultiTexCoord2dv = (PFNGLMULTITEXCOORD2DVPROC)load("glMultiTexCoord2dv"); + glad_glMultiTexCoord2f = (PFNGLMULTITEXCOORD2FPROC)load("glMultiTexCoord2f"); + glad_glMultiTexCoord2fv = (PFNGLMULTITEXCOORD2FVPROC)load("glMultiTexCoord2fv"); + glad_glMultiTexCoord2i = (PFNGLMULTITEXCOORD2IPROC)load("glMultiTexCoord2i"); + glad_glMultiTexCoord2iv = (PFNGLMULTITEXCOORD2IVPROC)load("glMultiTexCoord2iv"); + glad_glMultiTexCoord2s = (PFNGLMULTITEXCOORD2SPROC)load("glMultiTexCoord2s"); + glad_glMultiTexCoord2sv = (PFNGLMULTITEXCOORD2SVPROC)load("glMultiTexCoord2sv"); + glad_glMultiTexCoord3d = (PFNGLMULTITEXCOORD3DPROC)load("glMultiTexCoord3d"); + glad_glMultiTexCoord3dv = (PFNGLMULTITEXCOORD3DVPROC)load("glMultiTexCoord3dv"); + glad_glMultiTexCoord3f = (PFNGLMULTITEXCOORD3FPROC)load("glMultiTexCoord3f"); + glad_glMultiTexCoord3fv = (PFNGLMULTITEXCOORD3FVPROC)load("glMultiTexCoord3fv"); + glad_glMultiTexCoord3i = (PFNGLMULTITEXCOORD3IPROC)load("glMultiTexCoord3i"); + glad_glMultiTexCoord3iv = (PFNGLMULTITEXCOORD3IVPROC)load("glMultiTexCoord3iv"); + glad_glMultiTexCoord3s = (PFNGLMULTITEXCOORD3SPROC)load("glMultiTexCoord3s"); + glad_glMultiTexCoord3sv = (PFNGLMULTITEXCOORD3SVPROC)load("glMultiTexCoord3sv"); + glad_glMultiTexCoord4d = (PFNGLMULTITEXCOORD4DPROC)load("glMultiTexCoord4d"); + glad_glMultiTexCoord4dv = (PFNGLMULTITEXCOORD4DVPROC)load("glMultiTexCoord4dv"); + glad_glMultiTexCoord4f = (PFNGLMULTITEXCOORD4FPROC)load("glMultiTexCoord4f"); + glad_glMultiTexCoord4fv = (PFNGLMULTITEXCOORD4FVPROC)load("glMultiTexCoord4fv"); + glad_glMultiTexCoord4i = (PFNGLMULTITEXCOORD4IPROC)load("glMultiTexCoord4i"); + glad_glMultiTexCoord4iv = (PFNGLMULTITEXCOORD4IVPROC)load("glMultiTexCoord4iv"); + glad_glMultiTexCoord4s = (PFNGLMULTITEXCOORD4SPROC)load("glMultiTexCoord4s"); + glad_glMultiTexCoord4sv = (PFNGLMULTITEXCOORD4SVPROC)load("glMultiTexCoord4sv"); + glad_glLoadTransposeMatrixf = (PFNGLLOADTRANSPOSEMATRIXFPROC)load("glLoadTransposeMatrixf"); + glad_glLoadTransposeMatrixd = (PFNGLLOADTRANSPOSEMATRIXDPROC)load("glLoadTransposeMatrixd"); + glad_glMultTransposeMatrixf = (PFNGLMULTTRANSPOSEMATRIXFPROC)load("glMultTransposeMatrixf"); + glad_glMultTransposeMatrixd = (PFNGLMULTTRANSPOSEMATRIXDPROC)load("glMultTransposeMatrixd"); +} +static void load_GL_VERSION_1_4(GLADloadproc load) { + if(!GLAD_GL_VERSION_1_4) return; + glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC)load("glBlendFuncSeparate"); + glad_glMultiDrawArrays = (PFNGLMULTIDRAWARRAYSPROC)load("glMultiDrawArrays"); + glad_glMultiDrawElements = (PFNGLMULTIDRAWELEMENTSPROC)load("glMultiDrawElements"); + glad_glPointParameterf = (PFNGLPOINTPARAMETERFPROC)load("glPointParameterf"); + glad_glPointParameterfv = (PFNGLPOINTPARAMETERFVPROC)load("glPointParameterfv"); + glad_glPointParameteri = (PFNGLPOINTPARAMETERIPROC)load("glPointParameteri"); + glad_glPointParameteriv = (PFNGLPOINTPARAMETERIVPROC)load("glPointParameteriv"); + glad_glFogCoordf = (PFNGLFOGCOORDFPROC)load("glFogCoordf"); + glad_glFogCoordfv = (PFNGLFOGCOORDFVPROC)load("glFogCoordfv"); + glad_glFogCoordd = (PFNGLFOGCOORDDPROC)load("glFogCoordd"); + glad_glFogCoorddv = (PFNGLFOGCOORDDVPROC)load("glFogCoorddv"); + glad_glFogCoordPointer = (PFNGLFOGCOORDPOINTERPROC)load("glFogCoordPointer"); + glad_glSecondaryColor3b = (PFNGLSECONDARYCOLOR3BPROC)load("glSecondaryColor3b"); + glad_glSecondaryColor3bv = (PFNGLSECONDARYCOLOR3BVPROC)load("glSecondaryColor3bv"); + glad_glSecondaryColor3d = (PFNGLSECONDARYCOLOR3DPROC)load("glSecondaryColor3d"); + glad_glSecondaryColor3dv = (PFNGLSECONDARYCOLOR3DVPROC)load("glSecondaryColor3dv"); + glad_glSecondaryColor3f = (PFNGLSECONDARYCOLOR3FPROC)load("glSecondaryColor3f"); + glad_glSecondaryColor3fv = (PFNGLSECONDARYCOLOR3FVPROC)load("glSecondaryColor3fv"); + glad_glSecondaryColor3i = (PFNGLSECONDARYCOLOR3IPROC)load("glSecondaryColor3i"); + glad_glSecondaryColor3iv = (PFNGLSECONDARYCOLOR3IVPROC)load("glSecondaryColor3iv"); + glad_glSecondaryColor3s = (PFNGLSECONDARYCOLOR3SPROC)load("glSecondaryColor3s"); + glad_glSecondaryColor3sv = (PFNGLSECONDARYCOLOR3SVPROC)load("glSecondaryColor3sv"); + glad_glSecondaryColor3ub = (PFNGLSECONDARYCOLOR3UBPROC)load("glSecondaryColor3ub"); + glad_glSecondaryColor3ubv = (PFNGLSECONDARYCOLOR3UBVPROC)load("glSecondaryColor3ubv"); + glad_glSecondaryColor3ui = (PFNGLSECONDARYCOLOR3UIPROC)load("glSecondaryColor3ui"); + glad_glSecondaryColor3uiv = (PFNGLSECONDARYCOLOR3UIVPROC)load("glSecondaryColor3uiv"); + glad_glSecondaryColor3us = (PFNGLSECONDARYCOLOR3USPROC)load("glSecondaryColor3us"); + glad_glSecondaryColor3usv = (PFNGLSECONDARYCOLOR3USVPROC)load("glSecondaryColor3usv"); + glad_glSecondaryColorPointer = (PFNGLSECONDARYCOLORPOINTERPROC)load("glSecondaryColorPointer"); + glad_glWindowPos2d = (PFNGLWINDOWPOS2DPROC)load("glWindowPos2d"); + glad_glWindowPos2dv = (PFNGLWINDOWPOS2DVPROC)load("glWindowPos2dv"); + glad_glWindowPos2f = (PFNGLWINDOWPOS2FPROC)load("glWindowPos2f"); + glad_glWindowPos2fv = (PFNGLWINDOWPOS2FVPROC)load("glWindowPos2fv"); + glad_glWindowPos2i = (PFNGLWINDOWPOS2IPROC)load("glWindowPos2i"); + glad_glWindowPos2iv = (PFNGLWINDOWPOS2IVPROC)load("glWindowPos2iv"); + glad_glWindowPos2s = (PFNGLWINDOWPOS2SPROC)load("glWindowPos2s"); + glad_glWindowPos2sv = (PFNGLWINDOWPOS2SVPROC)load("glWindowPos2sv"); + glad_glWindowPos3d = (PFNGLWINDOWPOS3DPROC)load("glWindowPos3d"); + glad_glWindowPos3dv = (PFNGLWINDOWPOS3DVPROC)load("glWindowPos3dv"); + glad_glWindowPos3f = (PFNGLWINDOWPOS3FPROC)load("glWindowPos3f"); + glad_glWindowPos3fv = (PFNGLWINDOWPOS3FVPROC)load("glWindowPos3fv"); + glad_glWindowPos3i = (PFNGLWINDOWPOS3IPROC)load("glWindowPos3i"); + glad_glWindowPos3iv = (PFNGLWINDOWPOS3IVPROC)load("glWindowPos3iv"); + glad_glWindowPos3s = (PFNGLWINDOWPOS3SPROC)load("glWindowPos3s"); + glad_glWindowPos3sv = (PFNGLWINDOWPOS3SVPROC)load("glWindowPos3sv"); + glad_glBlendColor = (PFNGLBLENDCOLORPROC)load("glBlendColor"); + glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC)load("glBlendEquation"); +} +static void load_GL_VERSION_1_5(GLADloadproc load) { + if(!GLAD_GL_VERSION_1_5) return; + glad_glGenQueries = (PFNGLGENQUERIESPROC)load("glGenQueries"); + glad_glDeleteQueries = (PFNGLDELETEQUERIESPROC)load("glDeleteQueries"); + glad_glIsQuery = (PFNGLISQUERYPROC)load("glIsQuery"); + glad_glBeginQuery = (PFNGLBEGINQUERYPROC)load("glBeginQuery"); + glad_glEndQuery = (PFNGLENDQUERYPROC)load("glEndQuery"); + glad_glGetQueryiv = (PFNGLGETQUERYIVPROC)load("glGetQueryiv"); + glad_glGetQueryObjectiv = (PFNGLGETQUERYOBJECTIVPROC)load("glGetQueryObjectiv"); + glad_glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC)load("glGetQueryObjectuiv"); + glad_glBindBuffer = (PFNGLBINDBUFFERPROC)load("glBindBuffer"); + glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)load("glDeleteBuffers"); + glad_glGenBuffers = (PFNGLGENBUFFERSPROC)load("glGenBuffers"); + glad_glIsBuffer = (PFNGLISBUFFERPROC)load("glIsBuffer"); + glad_glBufferData = (PFNGLBUFFERDATAPROC)load("glBufferData"); + glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC)load("glBufferSubData"); + glad_glGetBufferSubData = (PFNGLGETBUFFERSUBDATAPROC)load("glGetBufferSubData"); + glad_glMapBuffer = (PFNGLMAPBUFFERPROC)load("glMapBuffer"); + glad_glUnmapBuffer = (PFNGLUNMAPBUFFERPROC)load("glUnmapBuffer"); + glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC)load("glGetBufferParameteriv"); + glad_glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC)load("glGetBufferPointerv"); +} +static void load_GL_VERSION_2_0(GLADloadproc load) { + if(!GLAD_GL_VERSION_2_0) return; + glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC)load("glBlendEquationSeparate"); + glad_glDrawBuffers = (PFNGLDRAWBUFFERSPROC)load("glDrawBuffers"); + glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC)load("glStencilOpSeparate"); + glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC)load("glStencilFuncSeparate"); + glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC)load("glStencilMaskSeparate"); + glad_glAttachShader = (PFNGLATTACHSHADERPROC)load("glAttachShader"); + glad_glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC)load("glBindAttribLocation"); + glad_glCompileShader = (PFNGLCOMPILESHADERPROC)load("glCompileShader"); + glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC)load("glCreateProgram"); + glad_glCreateShader = (PFNGLCREATESHADERPROC)load("glCreateShader"); + glad_glDeleteProgram = (PFNGLDELETEPROGRAMPROC)load("glDeleteProgram"); + glad_glDeleteShader = (PFNGLDELETESHADERPROC)load("glDeleteShader"); + glad_glDetachShader = (PFNGLDETACHSHADERPROC)load("glDetachShader"); + glad_glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC)load("glDisableVertexAttribArray"); + glad_glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC)load("glEnableVertexAttribArray"); + glad_glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC)load("glGetActiveAttrib"); + glad_glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC)load("glGetActiveUniform"); + glad_glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC)load("glGetAttachedShaders"); + glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC)load("glGetAttribLocation"); + glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC)load("glGetProgramiv"); + glad_glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC)load("glGetProgramInfoLog"); + glad_glGetShaderiv = (PFNGLGETSHADERIVPROC)load("glGetShaderiv"); + glad_glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC)load("glGetShaderInfoLog"); + glad_glGetShaderSource = (PFNGLGETSHADERSOURCEPROC)load("glGetShaderSource"); + glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC)load("glGetUniformLocation"); + glad_glGetUniformfv = (PFNGLGETUNIFORMFVPROC)load("glGetUniformfv"); + glad_glGetUniformiv = (PFNGLGETUNIFORMIVPROC)load("glGetUniformiv"); + glad_glGetVertexAttribdv = (PFNGLGETVERTEXATTRIBDVPROC)load("glGetVertexAttribdv"); + glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC)load("glGetVertexAttribfv"); + glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC)load("glGetVertexAttribiv"); + glad_glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC)load("glGetVertexAttribPointerv"); + glad_glIsProgram = (PFNGLISPROGRAMPROC)load("glIsProgram"); + glad_glIsShader = (PFNGLISSHADERPROC)load("glIsShader"); + glad_glLinkProgram = (PFNGLLINKPROGRAMPROC)load("glLinkProgram"); + glad_glShaderSource = (PFNGLSHADERSOURCEPROC)load("glShaderSource"); + glad_glUseProgram = (PFNGLUSEPROGRAMPROC)load("glUseProgram"); + glad_glUniform1f = (PFNGLUNIFORM1FPROC)load("glUniform1f"); + glad_glUniform2f = (PFNGLUNIFORM2FPROC)load("glUniform2f"); + glad_glUniform3f = (PFNGLUNIFORM3FPROC)load("glUniform3f"); + glad_glUniform4f = (PFNGLUNIFORM4FPROC)load("glUniform4f"); + glad_glUniform1i = (PFNGLUNIFORM1IPROC)load("glUniform1i"); + glad_glUniform2i = (PFNGLUNIFORM2IPROC)load("glUniform2i"); + glad_glUniform3i = (PFNGLUNIFORM3IPROC)load("glUniform3i"); + glad_glUniform4i = (PFNGLUNIFORM4IPROC)load("glUniform4i"); + glad_glUniform1fv = (PFNGLUNIFORM1FVPROC)load("glUniform1fv"); + glad_glUniform2fv = (PFNGLUNIFORM2FVPROC)load("glUniform2fv"); + glad_glUniform3fv = (PFNGLUNIFORM3FVPROC)load("glUniform3fv"); + glad_glUniform4fv = (PFNGLUNIFORM4FVPROC)load("glUniform4fv"); + glad_glUniform1iv = (PFNGLUNIFORM1IVPROC)load("glUniform1iv"); + glad_glUniform2iv = (PFNGLUNIFORM2IVPROC)load("glUniform2iv"); + glad_glUniform3iv = (PFNGLUNIFORM3IVPROC)load("glUniform3iv"); + glad_glUniform4iv = (PFNGLUNIFORM4IVPROC)load("glUniform4iv"); + glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC)load("glUniformMatrix2fv"); + glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC)load("glUniformMatrix3fv"); + glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC)load("glUniformMatrix4fv"); + glad_glValidateProgram = (PFNGLVALIDATEPROGRAMPROC)load("glValidateProgram"); + glad_glVertexAttrib1d = (PFNGLVERTEXATTRIB1DPROC)load("glVertexAttrib1d"); + glad_glVertexAttrib1dv = (PFNGLVERTEXATTRIB1DVPROC)load("glVertexAttrib1dv"); + glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC)load("glVertexAttrib1f"); + glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC)load("glVertexAttrib1fv"); + glad_glVertexAttrib1s = (PFNGLVERTEXATTRIB1SPROC)load("glVertexAttrib1s"); + glad_glVertexAttrib1sv = (PFNGLVERTEXATTRIB1SVPROC)load("glVertexAttrib1sv"); + glad_glVertexAttrib2d = (PFNGLVERTEXATTRIB2DPROC)load("glVertexAttrib2d"); + glad_glVertexAttrib2dv = (PFNGLVERTEXATTRIB2DVPROC)load("glVertexAttrib2dv"); + glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC)load("glVertexAttrib2f"); + glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC)load("glVertexAttrib2fv"); + glad_glVertexAttrib2s = (PFNGLVERTEXATTRIB2SPROC)load("glVertexAttrib2s"); + glad_glVertexAttrib2sv = (PFNGLVERTEXATTRIB2SVPROC)load("glVertexAttrib2sv"); + glad_glVertexAttrib3d = (PFNGLVERTEXATTRIB3DPROC)load("glVertexAttrib3d"); + glad_glVertexAttrib3dv = (PFNGLVERTEXATTRIB3DVPROC)load("glVertexAttrib3dv"); + glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC)load("glVertexAttrib3f"); + glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC)load("glVertexAttrib3fv"); + glad_glVertexAttrib3s = (PFNGLVERTEXATTRIB3SPROC)load("glVertexAttrib3s"); + glad_glVertexAttrib3sv = (PFNGLVERTEXATTRIB3SVPROC)load("glVertexAttrib3sv"); + glad_glVertexAttrib4Nbv = (PFNGLVERTEXATTRIB4NBVPROC)load("glVertexAttrib4Nbv"); + glad_glVertexAttrib4Niv = (PFNGLVERTEXATTRIB4NIVPROC)load("glVertexAttrib4Niv"); + glad_glVertexAttrib4Nsv = (PFNGLVERTEXATTRIB4NSVPROC)load("glVertexAttrib4Nsv"); + glad_glVertexAttrib4Nub = (PFNGLVERTEXATTRIB4NUBPROC)load("glVertexAttrib4Nub"); + glad_glVertexAttrib4Nubv = (PFNGLVERTEXATTRIB4NUBVPROC)load("glVertexAttrib4Nubv"); + glad_glVertexAttrib4Nuiv = (PFNGLVERTEXATTRIB4NUIVPROC)load("glVertexAttrib4Nuiv"); + glad_glVertexAttrib4Nusv = (PFNGLVERTEXATTRIB4NUSVPROC)load("glVertexAttrib4Nusv"); + glad_glVertexAttrib4bv = (PFNGLVERTEXATTRIB4BVPROC)load("glVertexAttrib4bv"); + glad_glVertexAttrib4d = (PFNGLVERTEXATTRIB4DPROC)load("glVertexAttrib4d"); + glad_glVertexAttrib4dv = (PFNGLVERTEXATTRIB4DVPROC)load("glVertexAttrib4dv"); + glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC)load("glVertexAttrib4f"); + glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC)load("glVertexAttrib4fv"); + glad_glVertexAttrib4iv = (PFNGLVERTEXATTRIB4IVPROC)load("glVertexAttrib4iv"); + glad_glVertexAttrib4s = (PFNGLVERTEXATTRIB4SPROC)load("glVertexAttrib4s"); + glad_glVertexAttrib4sv = (PFNGLVERTEXATTRIB4SVPROC)load("glVertexAttrib4sv"); + glad_glVertexAttrib4ubv = (PFNGLVERTEXATTRIB4UBVPROC)load("glVertexAttrib4ubv"); + glad_glVertexAttrib4uiv = (PFNGLVERTEXATTRIB4UIVPROC)load("glVertexAttrib4uiv"); + glad_glVertexAttrib4usv = (PFNGLVERTEXATTRIB4USVPROC)load("glVertexAttrib4usv"); + glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC)load("glVertexAttribPointer"); +} +static void load_GL_VERSION_2_1(GLADloadproc load) { + if(!GLAD_GL_VERSION_2_1) return; + glad_glUniformMatrix2x3fv = (PFNGLUNIFORMMATRIX2X3FVPROC)load("glUniformMatrix2x3fv"); + glad_glUniformMatrix3x2fv = (PFNGLUNIFORMMATRIX3X2FVPROC)load("glUniformMatrix3x2fv"); + glad_glUniformMatrix2x4fv = (PFNGLUNIFORMMATRIX2X4FVPROC)load("glUniformMatrix2x4fv"); + glad_glUniformMatrix4x2fv = (PFNGLUNIFORMMATRIX4X2FVPROC)load("glUniformMatrix4x2fv"); + glad_glUniformMatrix3x4fv = (PFNGLUNIFORMMATRIX3X4FVPROC)load("glUniformMatrix3x4fv"); + glad_glUniformMatrix4x3fv = (PFNGLUNIFORMMATRIX4X3FVPROC)load("glUniformMatrix4x3fv"); +} +static void load_GL_VERSION_3_0(GLADloadproc load) { + if(!GLAD_GL_VERSION_3_0) return; + glad_glColorMaski = (PFNGLCOLORMASKIPROC)load("glColorMaski"); + glad_glGetBooleani_v = (PFNGLGETBOOLEANI_VPROC)load("glGetBooleani_v"); + glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC)load("glGetIntegeri_v"); + glad_glEnablei = (PFNGLENABLEIPROC)load("glEnablei"); + glad_glDisablei = (PFNGLDISABLEIPROC)load("glDisablei"); + glad_glIsEnabledi = (PFNGLISENABLEDIPROC)load("glIsEnabledi"); + glad_glBeginTransformFeedback = (PFNGLBEGINTRANSFORMFEEDBACKPROC)load("glBeginTransformFeedback"); + glad_glEndTransformFeedback = (PFNGLENDTRANSFORMFEEDBACKPROC)load("glEndTransformFeedback"); + glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC)load("glBindBufferRange"); + glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC)load("glBindBufferBase"); + glad_glTransformFeedbackVaryings = (PFNGLTRANSFORMFEEDBACKVARYINGSPROC)load("glTransformFeedbackVaryings"); + glad_glGetTransformFeedbackVarying = (PFNGLGETTRANSFORMFEEDBACKVARYINGPROC)load("glGetTransformFeedbackVarying"); + glad_glClampColor = (PFNGLCLAMPCOLORPROC)load("glClampColor"); + glad_glBeginConditionalRender = (PFNGLBEGINCONDITIONALRENDERPROC)load("glBeginConditionalRender"); + glad_glEndConditionalRender = (PFNGLENDCONDITIONALRENDERPROC)load("glEndConditionalRender"); + glad_glVertexAttribIPointer = (PFNGLVERTEXATTRIBIPOINTERPROC)load("glVertexAttribIPointer"); + glad_glGetVertexAttribIiv = (PFNGLGETVERTEXATTRIBIIVPROC)load("glGetVertexAttribIiv"); + glad_glGetVertexAttribIuiv = (PFNGLGETVERTEXATTRIBIUIVPROC)load("glGetVertexAttribIuiv"); + glad_glVertexAttribI1i = (PFNGLVERTEXATTRIBI1IPROC)load("glVertexAttribI1i"); + glad_glVertexAttribI2i = (PFNGLVERTEXATTRIBI2IPROC)load("glVertexAttribI2i"); + glad_glVertexAttribI3i = (PFNGLVERTEXATTRIBI3IPROC)load("glVertexAttribI3i"); + glad_glVertexAttribI4i = (PFNGLVERTEXATTRIBI4IPROC)load("glVertexAttribI4i"); + glad_glVertexAttribI1ui = (PFNGLVERTEXATTRIBI1UIPROC)load("glVertexAttribI1ui"); + glad_glVertexAttribI2ui = (PFNGLVERTEXATTRIBI2UIPROC)load("glVertexAttribI2ui"); + glad_glVertexAttribI3ui = (PFNGLVERTEXATTRIBI3UIPROC)load("glVertexAttribI3ui"); + glad_glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC)load("glVertexAttribI4ui"); + glad_glVertexAttribI1iv = (PFNGLVERTEXATTRIBI1IVPROC)load("glVertexAttribI1iv"); + glad_glVertexAttribI2iv = (PFNGLVERTEXATTRIBI2IVPROC)load("glVertexAttribI2iv"); + glad_glVertexAttribI3iv = (PFNGLVERTEXATTRIBI3IVPROC)load("glVertexAttribI3iv"); + glad_glVertexAttribI4iv = (PFNGLVERTEXATTRIBI4IVPROC)load("glVertexAttribI4iv"); + glad_glVertexAttribI1uiv = (PFNGLVERTEXATTRIBI1UIVPROC)load("glVertexAttribI1uiv"); + glad_glVertexAttribI2uiv = (PFNGLVERTEXATTRIBI2UIVPROC)load("glVertexAttribI2uiv"); + glad_glVertexAttribI3uiv = (PFNGLVERTEXATTRIBI3UIVPROC)load("glVertexAttribI3uiv"); + glad_glVertexAttribI4uiv = (PFNGLVERTEXATTRIBI4UIVPROC)load("glVertexAttribI4uiv"); + glad_glVertexAttribI4bv = (PFNGLVERTEXATTRIBI4BVPROC)load("glVertexAttribI4bv"); + glad_glVertexAttribI4sv = (PFNGLVERTEXATTRIBI4SVPROC)load("glVertexAttribI4sv"); + glad_glVertexAttribI4ubv = (PFNGLVERTEXATTRIBI4UBVPROC)load("glVertexAttribI4ubv"); + glad_glVertexAttribI4usv = (PFNGLVERTEXATTRIBI4USVPROC)load("glVertexAttribI4usv"); + glad_glGetUniformuiv = (PFNGLGETUNIFORMUIVPROC)load("glGetUniformuiv"); + glad_glBindFragDataLocation = (PFNGLBINDFRAGDATALOCATIONPROC)load("glBindFragDataLocation"); + glad_glGetFragDataLocation = (PFNGLGETFRAGDATALOCATIONPROC)load("glGetFragDataLocation"); + glad_glUniform1ui = (PFNGLUNIFORM1UIPROC)load("glUniform1ui"); + glad_glUniform2ui = (PFNGLUNIFORM2UIPROC)load("glUniform2ui"); + glad_glUniform3ui = (PFNGLUNIFORM3UIPROC)load("glUniform3ui"); + glad_glUniform4ui = (PFNGLUNIFORM4UIPROC)load("glUniform4ui"); + glad_glUniform1uiv = (PFNGLUNIFORM1UIVPROC)load("glUniform1uiv"); + glad_glUniform2uiv = (PFNGLUNIFORM2UIVPROC)load("glUniform2uiv"); + glad_glUniform3uiv = (PFNGLUNIFORM3UIVPROC)load("glUniform3uiv"); + glad_glUniform4uiv = (PFNGLUNIFORM4UIVPROC)load("glUniform4uiv"); + glad_glTexParameterIiv = (PFNGLTEXPARAMETERIIVPROC)load("glTexParameterIiv"); + glad_glTexParameterIuiv = (PFNGLTEXPARAMETERIUIVPROC)load("glTexParameterIuiv"); + glad_glGetTexParameterIiv = (PFNGLGETTEXPARAMETERIIVPROC)load("glGetTexParameterIiv"); + glad_glGetTexParameterIuiv = (PFNGLGETTEXPARAMETERIUIVPROC)load("glGetTexParameterIuiv"); + glad_glClearBufferiv = (PFNGLCLEARBUFFERIVPROC)load("glClearBufferiv"); + glad_glClearBufferuiv = (PFNGLCLEARBUFFERUIVPROC)load("glClearBufferuiv"); + glad_glClearBufferfv = (PFNGLCLEARBUFFERFVPROC)load("glClearBufferfv"); + glad_glClearBufferfi = (PFNGLCLEARBUFFERFIPROC)load("glClearBufferfi"); + glad_glGetStringi = (PFNGLGETSTRINGIPROC)load("glGetStringi"); + glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC)load("glIsRenderbuffer"); + glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC)load("glBindRenderbuffer"); + glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC)load("glDeleteRenderbuffers"); + glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC)load("glGenRenderbuffers"); + glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC)load("glRenderbufferStorage"); + glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC)load("glGetRenderbufferParameteriv"); + glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC)load("glIsFramebuffer"); + glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC)load("glBindFramebuffer"); + glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC)load("glDeleteFramebuffers"); + glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC)load("glGenFramebuffers"); + glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC)load("glCheckFramebufferStatus"); + glad_glFramebufferTexture1D = (PFNGLFRAMEBUFFERTEXTURE1DPROC)load("glFramebufferTexture1D"); + glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC)load("glFramebufferTexture2D"); + glad_glFramebufferTexture3D = (PFNGLFRAMEBUFFERTEXTURE3DPROC)load("glFramebufferTexture3D"); + glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC)load("glFramebufferRenderbuffer"); + glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)load("glGetFramebufferAttachmentParameteriv"); + glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC)load("glGenerateMipmap"); + glad_glBlitFramebuffer = (PFNGLBLITFRAMEBUFFERPROC)load("glBlitFramebuffer"); + glad_glRenderbufferStorageMultisample = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)load("glRenderbufferStorageMultisample"); + glad_glFramebufferTextureLayer = (PFNGLFRAMEBUFFERTEXTURELAYERPROC)load("glFramebufferTextureLayer"); + glad_glMapBufferRange = (PFNGLMAPBUFFERRANGEPROC)load("glMapBufferRange"); + glad_glFlushMappedBufferRange = (PFNGLFLUSHMAPPEDBUFFERRANGEPROC)load("glFlushMappedBufferRange"); + glad_glBindVertexArray = (PFNGLBINDVERTEXARRAYPROC)load("glBindVertexArray"); + glad_glDeleteVertexArrays = (PFNGLDELETEVERTEXARRAYSPROC)load("glDeleteVertexArrays"); + glad_glGenVertexArrays = (PFNGLGENVERTEXARRAYSPROC)load("glGenVertexArrays"); + glad_glIsVertexArray = (PFNGLISVERTEXARRAYPROC)load("glIsVertexArray"); +} +static void load_GL_VERSION_3_1(GLADloadproc load) { + if(!GLAD_GL_VERSION_3_1) return; + glad_glDrawArraysInstanced = (PFNGLDRAWARRAYSINSTANCEDPROC)load("glDrawArraysInstanced"); + glad_glDrawElementsInstanced = (PFNGLDRAWELEMENTSINSTANCEDPROC)load("glDrawElementsInstanced"); + glad_glTexBuffer = (PFNGLTEXBUFFERPROC)load("glTexBuffer"); + glad_glPrimitiveRestartIndex = (PFNGLPRIMITIVERESTARTINDEXPROC)load("glPrimitiveRestartIndex"); + glad_glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC)load("glCopyBufferSubData"); + glad_glGetUniformIndices = (PFNGLGETUNIFORMINDICESPROC)load("glGetUniformIndices"); + glad_glGetActiveUniformsiv = (PFNGLGETACTIVEUNIFORMSIVPROC)load("glGetActiveUniformsiv"); + glad_glGetActiveUniformName = (PFNGLGETACTIVEUNIFORMNAMEPROC)load("glGetActiveUniformName"); + glad_glGetUniformBlockIndex = (PFNGLGETUNIFORMBLOCKINDEXPROC)load("glGetUniformBlockIndex"); + glad_glGetActiveUniformBlockiv = (PFNGLGETACTIVEUNIFORMBLOCKIVPROC)load("glGetActiveUniformBlockiv"); + glad_glGetActiveUniformBlockName = (PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)load("glGetActiveUniformBlockName"); + glad_glUniformBlockBinding = (PFNGLUNIFORMBLOCKBINDINGPROC)load("glUniformBlockBinding"); + glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC)load("glBindBufferRange"); + glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC)load("glBindBufferBase"); + glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC)load("glGetIntegeri_v"); +} +static void load_GL_VERSION_3_2(GLADloadproc load) { + if(!GLAD_GL_VERSION_3_2) return; + glad_glDrawElementsBaseVertex = (PFNGLDRAWELEMENTSBASEVERTEXPROC)load("glDrawElementsBaseVertex"); + glad_glDrawRangeElementsBaseVertex = (PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)load("glDrawRangeElementsBaseVertex"); + glad_glDrawElementsInstancedBaseVertex = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)load("glDrawElementsInstancedBaseVertex"); + glad_glMultiDrawElementsBaseVertex = (PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)load("glMultiDrawElementsBaseVertex"); + glad_glProvokingVertex = (PFNGLPROVOKINGVERTEXPROC)load("glProvokingVertex"); + glad_glFenceSync = (PFNGLFENCESYNCPROC)load("glFenceSync"); + glad_glIsSync = (PFNGLISSYNCPROC)load("glIsSync"); + glad_glDeleteSync = (PFNGLDELETESYNCPROC)load("glDeleteSync"); + glad_glClientWaitSync = (PFNGLCLIENTWAITSYNCPROC)load("glClientWaitSync"); + glad_glWaitSync = (PFNGLWAITSYNCPROC)load("glWaitSync"); + glad_glGetInteger64v = (PFNGLGETINTEGER64VPROC)load("glGetInteger64v"); + glad_glGetSynciv = (PFNGLGETSYNCIVPROC)load("glGetSynciv"); + glad_glGetInteger64i_v = (PFNGLGETINTEGER64I_VPROC)load("glGetInteger64i_v"); + glad_glGetBufferParameteri64v = (PFNGLGETBUFFERPARAMETERI64VPROC)load("glGetBufferParameteri64v"); + glad_glFramebufferTexture = (PFNGLFRAMEBUFFERTEXTUREPROC)load("glFramebufferTexture"); + glad_glTexImage2DMultisample = (PFNGLTEXIMAGE2DMULTISAMPLEPROC)load("glTexImage2DMultisample"); + glad_glTexImage3DMultisample = (PFNGLTEXIMAGE3DMULTISAMPLEPROC)load("glTexImage3DMultisample"); + glad_glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC)load("glGetMultisamplefv"); + glad_glSampleMaski = (PFNGLSAMPLEMASKIPROC)load("glSampleMaski"); +} +static void load_GL_VERSION_3_3(GLADloadproc load) { + if(!GLAD_GL_VERSION_3_3) return; + glad_glBindFragDataLocationIndexed = (PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)load("glBindFragDataLocationIndexed"); + glad_glGetFragDataIndex = (PFNGLGETFRAGDATAINDEXPROC)load("glGetFragDataIndex"); + glad_glGenSamplers = (PFNGLGENSAMPLERSPROC)load("glGenSamplers"); + glad_glDeleteSamplers = (PFNGLDELETESAMPLERSPROC)load("glDeleteSamplers"); + glad_glIsSampler = (PFNGLISSAMPLERPROC)load("glIsSampler"); + glad_glBindSampler = (PFNGLBINDSAMPLERPROC)load("glBindSampler"); + glad_glSamplerParameteri = (PFNGLSAMPLERPARAMETERIPROC)load("glSamplerParameteri"); + glad_glSamplerParameteriv = (PFNGLSAMPLERPARAMETERIVPROC)load("glSamplerParameteriv"); + glad_glSamplerParameterf = (PFNGLSAMPLERPARAMETERFPROC)load("glSamplerParameterf"); + glad_glSamplerParameterfv = (PFNGLSAMPLERPARAMETERFVPROC)load("glSamplerParameterfv"); + glad_glSamplerParameterIiv = (PFNGLSAMPLERPARAMETERIIVPROC)load("glSamplerParameterIiv"); + glad_glSamplerParameterIuiv = (PFNGLSAMPLERPARAMETERIUIVPROC)load("glSamplerParameterIuiv"); + glad_glGetSamplerParameteriv = (PFNGLGETSAMPLERPARAMETERIVPROC)load("glGetSamplerParameteriv"); + glad_glGetSamplerParameterIiv = (PFNGLGETSAMPLERPARAMETERIIVPROC)load("glGetSamplerParameterIiv"); + glad_glGetSamplerParameterfv = (PFNGLGETSAMPLERPARAMETERFVPROC)load("glGetSamplerParameterfv"); + glad_glGetSamplerParameterIuiv = (PFNGLGETSAMPLERPARAMETERIUIVPROC)load("glGetSamplerParameterIuiv"); + glad_glQueryCounter = (PFNGLQUERYCOUNTERPROC)load("glQueryCounter"); + glad_glGetQueryObjecti64v = (PFNGLGETQUERYOBJECTI64VPROC)load("glGetQueryObjecti64v"); + glad_glGetQueryObjectui64v = (PFNGLGETQUERYOBJECTUI64VPROC)load("glGetQueryObjectui64v"); + glad_glVertexAttribDivisor = (PFNGLVERTEXATTRIBDIVISORPROC)load("glVertexAttribDivisor"); + glad_glVertexAttribP1ui = (PFNGLVERTEXATTRIBP1UIPROC)load("glVertexAttribP1ui"); + glad_glVertexAttribP1uiv = (PFNGLVERTEXATTRIBP1UIVPROC)load("glVertexAttribP1uiv"); + glad_glVertexAttribP2ui = (PFNGLVERTEXATTRIBP2UIPROC)load("glVertexAttribP2ui"); + glad_glVertexAttribP2uiv = (PFNGLVERTEXATTRIBP2UIVPROC)load("glVertexAttribP2uiv"); + glad_glVertexAttribP3ui = (PFNGLVERTEXATTRIBP3UIPROC)load("glVertexAttribP3ui"); + glad_glVertexAttribP3uiv = (PFNGLVERTEXATTRIBP3UIVPROC)load("glVertexAttribP3uiv"); + glad_glVertexAttribP4ui = (PFNGLVERTEXATTRIBP4UIPROC)load("glVertexAttribP4ui"); + glad_glVertexAttribP4uiv = (PFNGLVERTEXATTRIBP4UIVPROC)load("glVertexAttribP4uiv"); + glad_glVertexP2ui = (PFNGLVERTEXP2UIPROC)load("glVertexP2ui"); + glad_glVertexP2uiv = (PFNGLVERTEXP2UIVPROC)load("glVertexP2uiv"); + glad_glVertexP3ui = (PFNGLVERTEXP3UIPROC)load("glVertexP3ui"); + glad_glVertexP3uiv = (PFNGLVERTEXP3UIVPROC)load("glVertexP3uiv"); + glad_glVertexP4ui = (PFNGLVERTEXP4UIPROC)load("glVertexP4ui"); + glad_glVertexP4uiv = (PFNGLVERTEXP4UIVPROC)load("glVertexP4uiv"); + glad_glTexCoordP1ui = (PFNGLTEXCOORDP1UIPROC)load("glTexCoordP1ui"); + glad_glTexCoordP1uiv = (PFNGLTEXCOORDP1UIVPROC)load("glTexCoordP1uiv"); + glad_glTexCoordP2ui = (PFNGLTEXCOORDP2UIPROC)load("glTexCoordP2ui"); + glad_glTexCoordP2uiv = (PFNGLTEXCOORDP2UIVPROC)load("glTexCoordP2uiv"); + glad_glTexCoordP3ui = (PFNGLTEXCOORDP3UIPROC)load("glTexCoordP3ui"); + glad_glTexCoordP3uiv = (PFNGLTEXCOORDP3UIVPROC)load("glTexCoordP3uiv"); + glad_glTexCoordP4ui = (PFNGLTEXCOORDP4UIPROC)load("glTexCoordP4ui"); + glad_glTexCoordP4uiv = (PFNGLTEXCOORDP4UIVPROC)load("glTexCoordP4uiv"); + glad_glMultiTexCoordP1ui = (PFNGLMULTITEXCOORDP1UIPROC)load("glMultiTexCoordP1ui"); + glad_glMultiTexCoordP1uiv = (PFNGLMULTITEXCOORDP1UIVPROC)load("glMultiTexCoordP1uiv"); + glad_glMultiTexCoordP2ui = (PFNGLMULTITEXCOORDP2UIPROC)load("glMultiTexCoordP2ui"); + glad_glMultiTexCoordP2uiv = (PFNGLMULTITEXCOORDP2UIVPROC)load("glMultiTexCoordP2uiv"); + glad_glMultiTexCoordP3ui = (PFNGLMULTITEXCOORDP3UIPROC)load("glMultiTexCoordP3ui"); + glad_glMultiTexCoordP3uiv = (PFNGLMULTITEXCOORDP3UIVPROC)load("glMultiTexCoordP3uiv"); + glad_glMultiTexCoordP4ui = (PFNGLMULTITEXCOORDP4UIPROC)load("glMultiTexCoordP4ui"); + glad_glMultiTexCoordP4uiv = (PFNGLMULTITEXCOORDP4UIVPROC)load("glMultiTexCoordP4uiv"); + glad_glNormalP3ui = (PFNGLNORMALP3UIPROC)load("glNormalP3ui"); + glad_glNormalP3uiv = (PFNGLNORMALP3UIVPROC)load("glNormalP3uiv"); + glad_glColorP3ui = (PFNGLCOLORP3UIPROC)load("glColorP3ui"); + glad_glColorP3uiv = (PFNGLCOLORP3UIVPROC)load("glColorP3uiv"); + glad_glColorP4ui = (PFNGLCOLORP4UIPROC)load("glColorP4ui"); + glad_glColorP4uiv = (PFNGLCOLORP4UIVPROC)load("glColorP4uiv"); + glad_glSecondaryColorP3ui = (PFNGLSECONDARYCOLORP3UIPROC)load("glSecondaryColorP3ui"); + glad_glSecondaryColorP3uiv = (PFNGLSECONDARYCOLORP3UIVPROC)load("glSecondaryColorP3uiv"); +} +static void load_GL_VERSION_4_0(GLADloadproc load) { + if(!GLAD_GL_VERSION_4_0) return; + glad_glMinSampleShading = (PFNGLMINSAMPLESHADINGPROC)load("glMinSampleShading"); + glad_glBlendEquationi = (PFNGLBLENDEQUATIONIPROC)load("glBlendEquationi"); + glad_glBlendEquationSeparatei = (PFNGLBLENDEQUATIONSEPARATEIPROC)load("glBlendEquationSeparatei"); + glad_glBlendFunci = (PFNGLBLENDFUNCIPROC)load("glBlendFunci"); + glad_glBlendFuncSeparatei = (PFNGLBLENDFUNCSEPARATEIPROC)load("glBlendFuncSeparatei"); + glad_glDrawArraysIndirect = (PFNGLDRAWARRAYSINDIRECTPROC)load("glDrawArraysIndirect"); + glad_glDrawElementsIndirect = (PFNGLDRAWELEMENTSINDIRECTPROC)load("glDrawElementsIndirect"); + glad_glUniform1d = (PFNGLUNIFORM1DPROC)load("glUniform1d"); + glad_glUniform2d = (PFNGLUNIFORM2DPROC)load("glUniform2d"); + glad_glUniform3d = (PFNGLUNIFORM3DPROC)load("glUniform3d"); + glad_glUniform4d = (PFNGLUNIFORM4DPROC)load("glUniform4d"); + glad_glUniform1dv = (PFNGLUNIFORM1DVPROC)load("glUniform1dv"); + glad_glUniform2dv = (PFNGLUNIFORM2DVPROC)load("glUniform2dv"); + glad_glUniform3dv = (PFNGLUNIFORM3DVPROC)load("glUniform3dv"); + glad_glUniform4dv = (PFNGLUNIFORM4DVPROC)load("glUniform4dv"); + glad_glUniformMatrix2dv = (PFNGLUNIFORMMATRIX2DVPROC)load("glUniformMatrix2dv"); + glad_glUniformMatrix3dv = (PFNGLUNIFORMMATRIX3DVPROC)load("glUniformMatrix3dv"); + glad_glUniformMatrix4dv = (PFNGLUNIFORMMATRIX4DVPROC)load("glUniformMatrix4dv"); + glad_glUniformMatrix2x3dv = (PFNGLUNIFORMMATRIX2X3DVPROC)load("glUniformMatrix2x3dv"); + glad_glUniformMatrix2x4dv = (PFNGLUNIFORMMATRIX2X4DVPROC)load("glUniformMatrix2x4dv"); + glad_glUniformMatrix3x2dv = (PFNGLUNIFORMMATRIX3X2DVPROC)load("glUniformMatrix3x2dv"); + glad_glUniformMatrix3x4dv = (PFNGLUNIFORMMATRIX3X4DVPROC)load("glUniformMatrix3x4dv"); + glad_glUniformMatrix4x2dv = (PFNGLUNIFORMMATRIX4X2DVPROC)load("glUniformMatrix4x2dv"); + glad_glUniformMatrix4x3dv = (PFNGLUNIFORMMATRIX4X3DVPROC)load("glUniformMatrix4x3dv"); + glad_glGetUniformdv = (PFNGLGETUNIFORMDVPROC)load("glGetUniformdv"); + glad_glGetSubroutineUniformLocation = (PFNGLGETSUBROUTINEUNIFORMLOCATIONPROC)load("glGetSubroutineUniformLocation"); + glad_glGetSubroutineIndex = (PFNGLGETSUBROUTINEINDEXPROC)load("glGetSubroutineIndex"); + glad_glGetActiveSubroutineUniformiv = (PFNGLGETACTIVESUBROUTINEUNIFORMIVPROC)load("glGetActiveSubroutineUniformiv"); + glad_glGetActiveSubroutineUniformName = (PFNGLGETACTIVESUBROUTINEUNIFORMNAMEPROC)load("glGetActiveSubroutineUniformName"); + glad_glGetActiveSubroutineName = (PFNGLGETACTIVESUBROUTINENAMEPROC)load("glGetActiveSubroutineName"); + glad_glUniformSubroutinesuiv = (PFNGLUNIFORMSUBROUTINESUIVPROC)load("glUniformSubroutinesuiv"); + glad_glGetUniformSubroutineuiv = (PFNGLGETUNIFORMSUBROUTINEUIVPROC)load("glGetUniformSubroutineuiv"); + glad_glGetProgramStageiv = (PFNGLGETPROGRAMSTAGEIVPROC)load("glGetProgramStageiv"); + glad_glPatchParameteri = (PFNGLPATCHPARAMETERIPROC)load("glPatchParameteri"); + glad_glPatchParameterfv = (PFNGLPATCHPARAMETERFVPROC)load("glPatchParameterfv"); + glad_glBindTransformFeedback = (PFNGLBINDTRANSFORMFEEDBACKPROC)load("glBindTransformFeedback"); + glad_glDeleteTransformFeedbacks = (PFNGLDELETETRANSFORMFEEDBACKSPROC)load("glDeleteTransformFeedbacks"); + glad_glGenTransformFeedbacks = (PFNGLGENTRANSFORMFEEDBACKSPROC)load("glGenTransformFeedbacks"); + glad_glIsTransformFeedback = (PFNGLISTRANSFORMFEEDBACKPROC)load("glIsTransformFeedback"); + glad_glPauseTransformFeedback = (PFNGLPAUSETRANSFORMFEEDBACKPROC)load("glPauseTransformFeedback"); + glad_glResumeTransformFeedback = (PFNGLRESUMETRANSFORMFEEDBACKPROC)load("glResumeTransformFeedback"); + glad_glDrawTransformFeedback = (PFNGLDRAWTRANSFORMFEEDBACKPROC)load("glDrawTransformFeedback"); + glad_glDrawTransformFeedbackStream = (PFNGLDRAWTRANSFORMFEEDBACKSTREAMPROC)load("glDrawTransformFeedbackStream"); + glad_glBeginQueryIndexed = (PFNGLBEGINQUERYINDEXEDPROC)load("glBeginQueryIndexed"); + glad_glEndQueryIndexed = (PFNGLENDQUERYINDEXEDPROC)load("glEndQueryIndexed"); + glad_glGetQueryIndexediv = (PFNGLGETQUERYINDEXEDIVPROC)load("glGetQueryIndexediv"); +} +static void load_GL_VERSION_4_1(GLADloadproc load) { + if(!GLAD_GL_VERSION_4_1) return; + glad_glReleaseShaderCompiler = (PFNGLRELEASESHADERCOMPILERPROC)load("glReleaseShaderCompiler"); + glad_glShaderBinary = (PFNGLSHADERBINARYPROC)load("glShaderBinary"); + glad_glGetShaderPrecisionFormat = (PFNGLGETSHADERPRECISIONFORMATPROC)load("glGetShaderPrecisionFormat"); + glad_glDepthRangef = (PFNGLDEPTHRANGEFPROC)load("glDepthRangef"); + glad_glClearDepthf = (PFNGLCLEARDEPTHFPROC)load("glClearDepthf"); + glad_glGetProgramBinary = (PFNGLGETPROGRAMBINARYPROC)load("glGetProgramBinary"); + glad_glProgramBinary = (PFNGLPROGRAMBINARYPROC)load("glProgramBinary"); + glad_glProgramParameteri = (PFNGLPROGRAMPARAMETERIPROC)load("glProgramParameteri"); + glad_glUseProgramStages = (PFNGLUSEPROGRAMSTAGESPROC)load("glUseProgramStages"); + glad_glActiveShaderProgram = (PFNGLACTIVESHADERPROGRAMPROC)load("glActiveShaderProgram"); + glad_glCreateShaderProgramv = (PFNGLCREATESHADERPROGRAMVPROC)load("glCreateShaderProgramv"); + glad_glBindProgramPipeline = (PFNGLBINDPROGRAMPIPELINEPROC)load("glBindProgramPipeline"); + glad_glDeleteProgramPipelines = (PFNGLDELETEPROGRAMPIPELINESPROC)load("glDeleteProgramPipelines"); + glad_glGenProgramPipelines = (PFNGLGENPROGRAMPIPELINESPROC)load("glGenProgramPipelines"); + glad_glIsProgramPipeline = (PFNGLISPROGRAMPIPELINEPROC)load("glIsProgramPipeline"); + glad_glGetProgramPipelineiv = (PFNGLGETPROGRAMPIPELINEIVPROC)load("glGetProgramPipelineiv"); + glad_glProgramParameteri = (PFNGLPROGRAMPARAMETERIPROC)load("glProgramParameteri"); + glad_glProgramUniform1i = (PFNGLPROGRAMUNIFORM1IPROC)load("glProgramUniform1i"); + glad_glProgramUniform1iv = (PFNGLPROGRAMUNIFORM1IVPROC)load("glProgramUniform1iv"); + glad_glProgramUniform1f = (PFNGLPROGRAMUNIFORM1FPROC)load("glProgramUniform1f"); + glad_glProgramUniform1fv = (PFNGLPROGRAMUNIFORM1FVPROC)load("glProgramUniform1fv"); + glad_glProgramUniform1d = (PFNGLPROGRAMUNIFORM1DPROC)load("glProgramUniform1d"); + glad_glProgramUniform1dv = (PFNGLPROGRAMUNIFORM1DVPROC)load("glProgramUniform1dv"); + glad_glProgramUniform1ui = (PFNGLPROGRAMUNIFORM1UIPROC)load("glProgramUniform1ui"); + glad_glProgramUniform1uiv = (PFNGLPROGRAMUNIFORM1UIVPROC)load("glProgramUniform1uiv"); + glad_glProgramUniform2i = (PFNGLPROGRAMUNIFORM2IPROC)load("glProgramUniform2i"); + glad_glProgramUniform2iv = (PFNGLPROGRAMUNIFORM2IVPROC)load("glProgramUniform2iv"); + glad_glProgramUniform2f = (PFNGLPROGRAMUNIFORM2FPROC)load("glProgramUniform2f"); + glad_glProgramUniform2fv = (PFNGLPROGRAMUNIFORM2FVPROC)load("glProgramUniform2fv"); + glad_glProgramUniform2d = (PFNGLPROGRAMUNIFORM2DPROC)load("glProgramUniform2d"); + glad_glProgramUniform2dv = (PFNGLPROGRAMUNIFORM2DVPROC)load("glProgramUniform2dv"); + glad_glProgramUniform2ui = (PFNGLPROGRAMUNIFORM2UIPROC)load("glProgramUniform2ui"); + glad_glProgramUniform2uiv = (PFNGLPROGRAMUNIFORM2UIVPROC)load("glProgramUniform2uiv"); + glad_glProgramUniform3i = (PFNGLPROGRAMUNIFORM3IPROC)load("glProgramUniform3i"); + glad_glProgramUniform3iv = (PFNGLPROGRAMUNIFORM3IVPROC)load("glProgramUniform3iv"); + glad_glProgramUniform3f = (PFNGLPROGRAMUNIFORM3FPROC)load("glProgramUniform3f"); + glad_glProgramUniform3fv = (PFNGLPROGRAMUNIFORM3FVPROC)load("glProgramUniform3fv"); + glad_glProgramUniform3d = (PFNGLPROGRAMUNIFORM3DPROC)load("glProgramUniform3d"); + glad_glProgramUniform3dv = (PFNGLPROGRAMUNIFORM3DVPROC)load("glProgramUniform3dv"); + glad_glProgramUniform3ui = (PFNGLPROGRAMUNIFORM3UIPROC)load("glProgramUniform3ui"); + glad_glProgramUniform3uiv = (PFNGLPROGRAMUNIFORM3UIVPROC)load("glProgramUniform3uiv"); + glad_glProgramUniform4i = (PFNGLPROGRAMUNIFORM4IPROC)load("glProgramUniform4i"); + glad_glProgramUniform4iv = (PFNGLPROGRAMUNIFORM4IVPROC)load("glProgramUniform4iv"); + glad_glProgramUniform4f = (PFNGLPROGRAMUNIFORM4FPROC)load("glProgramUniform4f"); + glad_glProgramUniform4fv = (PFNGLPROGRAMUNIFORM4FVPROC)load("glProgramUniform4fv"); + glad_glProgramUniform4d = (PFNGLPROGRAMUNIFORM4DPROC)load("glProgramUniform4d"); + glad_glProgramUniform4dv = (PFNGLPROGRAMUNIFORM4DVPROC)load("glProgramUniform4dv"); + glad_glProgramUniform4ui = (PFNGLPROGRAMUNIFORM4UIPROC)load("glProgramUniform4ui"); + glad_glProgramUniform4uiv = (PFNGLPROGRAMUNIFORM4UIVPROC)load("glProgramUniform4uiv"); + glad_glProgramUniformMatrix2fv = (PFNGLPROGRAMUNIFORMMATRIX2FVPROC)load("glProgramUniformMatrix2fv"); + glad_glProgramUniformMatrix3fv = (PFNGLPROGRAMUNIFORMMATRIX3FVPROC)load("glProgramUniformMatrix3fv"); + glad_glProgramUniformMatrix4fv = (PFNGLPROGRAMUNIFORMMATRIX4FVPROC)load("glProgramUniformMatrix4fv"); + glad_glProgramUniformMatrix2dv = (PFNGLPROGRAMUNIFORMMATRIX2DVPROC)load("glProgramUniformMatrix2dv"); + glad_glProgramUniformMatrix3dv = (PFNGLPROGRAMUNIFORMMATRIX3DVPROC)load("glProgramUniformMatrix3dv"); + glad_glProgramUniformMatrix4dv = (PFNGLPROGRAMUNIFORMMATRIX4DVPROC)load("glProgramUniformMatrix4dv"); + glad_glProgramUniformMatrix2x3fv = (PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC)load("glProgramUniformMatrix2x3fv"); + glad_glProgramUniformMatrix3x2fv = (PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC)load("glProgramUniformMatrix3x2fv"); + glad_glProgramUniformMatrix2x4fv = (PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC)load("glProgramUniformMatrix2x4fv"); + glad_glProgramUniformMatrix4x2fv = (PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC)load("glProgramUniformMatrix4x2fv"); + glad_glProgramUniformMatrix3x4fv = (PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC)load("glProgramUniformMatrix3x4fv"); + glad_glProgramUniformMatrix4x3fv = (PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC)load("glProgramUniformMatrix4x3fv"); + glad_glProgramUniformMatrix2x3dv = (PFNGLPROGRAMUNIFORMMATRIX2X3DVPROC)load("glProgramUniformMatrix2x3dv"); + glad_glProgramUniformMatrix3x2dv = (PFNGLPROGRAMUNIFORMMATRIX3X2DVPROC)load("glProgramUniformMatrix3x2dv"); + glad_glProgramUniformMatrix2x4dv = (PFNGLPROGRAMUNIFORMMATRIX2X4DVPROC)load("glProgramUniformMatrix2x4dv"); + glad_glProgramUniformMatrix4x2dv = (PFNGLPROGRAMUNIFORMMATRIX4X2DVPROC)load("glProgramUniformMatrix4x2dv"); + glad_glProgramUniformMatrix3x4dv = (PFNGLPROGRAMUNIFORMMATRIX3X4DVPROC)load("glProgramUniformMatrix3x4dv"); + glad_glProgramUniformMatrix4x3dv = (PFNGLPROGRAMUNIFORMMATRIX4X3DVPROC)load("glProgramUniformMatrix4x3dv"); + glad_glValidateProgramPipeline = (PFNGLVALIDATEPROGRAMPIPELINEPROC)load("glValidateProgramPipeline"); + glad_glGetProgramPipelineInfoLog = (PFNGLGETPROGRAMPIPELINEINFOLOGPROC)load("glGetProgramPipelineInfoLog"); + glad_glVertexAttribL1d = (PFNGLVERTEXATTRIBL1DPROC)load("glVertexAttribL1d"); + glad_glVertexAttribL2d = (PFNGLVERTEXATTRIBL2DPROC)load("glVertexAttribL2d"); + glad_glVertexAttribL3d = (PFNGLVERTEXATTRIBL3DPROC)load("glVertexAttribL3d"); + glad_glVertexAttribL4d = (PFNGLVERTEXATTRIBL4DPROC)load("glVertexAttribL4d"); + glad_glVertexAttribL1dv = (PFNGLVERTEXATTRIBL1DVPROC)load("glVertexAttribL1dv"); + glad_glVertexAttribL2dv = (PFNGLVERTEXATTRIBL2DVPROC)load("glVertexAttribL2dv"); + glad_glVertexAttribL3dv = (PFNGLVERTEXATTRIBL3DVPROC)load("glVertexAttribL3dv"); + glad_glVertexAttribL4dv = (PFNGLVERTEXATTRIBL4DVPROC)load("glVertexAttribL4dv"); + glad_glVertexAttribLPointer = (PFNGLVERTEXATTRIBLPOINTERPROC)load("glVertexAttribLPointer"); + glad_glGetVertexAttribLdv = (PFNGLGETVERTEXATTRIBLDVPROC)load("glGetVertexAttribLdv"); + glad_glViewportArrayv = (PFNGLVIEWPORTARRAYVPROC)load("glViewportArrayv"); + glad_glViewportIndexedf = (PFNGLVIEWPORTINDEXEDFPROC)load("glViewportIndexedf"); + glad_glViewportIndexedfv = (PFNGLVIEWPORTINDEXEDFVPROC)load("glViewportIndexedfv"); + glad_glScissorArrayv = (PFNGLSCISSORARRAYVPROC)load("glScissorArrayv"); + glad_glScissorIndexed = (PFNGLSCISSORINDEXEDPROC)load("glScissorIndexed"); + glad_glScissorIndexedv = (PFNGLSCISSORINDEXEDVPROC)load("glScissorIndexedv"); + glad_glDepthRangeArrayv = (PFNGLDEPTHRANGEARRAYVPROC)load("glDepthRangeArrayv"); + glad_glDepthRangeIndexed = (PFNGLDEPTHRANGEINDEXEDPROC)load("glDepthRangeIndexed"); + glad_glGetFloati_v = (PFNGLGETFLOATI_VPROC)load("glGetFloati_v"); + glad_glGetDoublei_v = (PFNGLGETDOUBLEI_VPROC)load("glGetDoublei_v"); +} +static void load_GL_VERSION_4_2(GLADloadproc load) { + if(!GLAD_GL_VERSION_4_2) return; + glad_glDrawArraysInstancedBaseInstance = (PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEPROC)load("glDrawArraysInstancedBaseInstance"); + glad_glDrawElementsInstancedBaseInstance = (PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEPROC)load("glDrawElementsInstancedBaseInstance"); + glad_glDrawElementsInstancedBaseVertexBaseInstance = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC)load("glDrawElementsInstancedBaseVertexBaseInstance"); + glad_glGetInternalformativ = (PFNGLGETINTERNALFORMATIVPROC)load("glGetInternalformativ"); + glad_glGetActiveAtomicCounterBufferiv = (PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC)load("glGetActiveAtomicCounterBufferiv"); + glad_glBindImageTexture = (PFNGLBINDIMAGETEXTUREPROC)load("glBindImageTexture"); + glad_glMemoryBarrier = (PFNGLMEMORYBARRIERPROC)load("glMemoryBarrier"); + glad_glTexStorage1D = (PFNGLTEXSTORAGE1DPROC)load("glTexStorage1D"); + glad_glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)load("glTexStorage2D"); + glad_glTexStorage3D = (PFNGLTEXSTORAGE3DPROC)load("glTexStorage3D"); + glad_glDrawTransformFeedbackInstanced = (PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC)load("glDrawTransformFeedbackInstanced"); + glad_glDrawTransformFeedbackStreamInstanced = (PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC)load("glDrawTransformFeedbackStreamInstanced"); +} +static void load_GL_VERSION_4_3(GLADloadproc load) { + if(!GLAD_GL_VERSION_4_3) return; + glad_glClearBufferData = (PFNGLCLEARBUFFERDATAPROC)load("glClearBufferData"); + glad_glClearBufferSubData = (PFNGLCLEARBUFFERSUBDATAPROC)load("glClearBufferSubData"); + glad_glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC)load("glDispatchCompute"); + glad_glDispatchComputeIndirect = (PFNGLDISPATCHCOMPUTEINDIRECTPROC)load("glDispatchComputeIndirect"); + glad_glCopyImageSubData = (PFNGLCOPYIMAGESUBDATAPROC)load("glCopyImageSubData"); + glad_glFramebufferParameteri = (PFNGLFRAMEBUFFERPARAMETERIPROC)load("glFramebufferParameteri"); + glad_glGetFramebufferParameteriv = (PFNGLGETFRAMEBUFFERPARAMETERIVPROC)load("glGetFramebufferParameteriv"); + glad_glGetInternalformati64v = (PFNGLGETINTERNALFORMATI64VPROC)load("glGetInternalformati64v"); + glad_glInvalidateTexSubImage = (PFNGLINVALIDATETEXSUBIMAGEPROC)load("glInvalidateTexSubImage"); + glad_glInvalidateTexImage = (PFNGLINVALIDATETEXIMAGEPROC)load("glInvalidateTexImage"); + glad_glInvalidateBufferSubData = (PFNGLINVALIDATEBUFFERSUBDATAPROC)load("glInvalidateBufferSubData"); + glad_glInvalidateBufferData = (PFNGLINVALIDATEBUFFERDATAPROC)load("glInvalidateBufferData"); + glad_glInvalidateFramebuffer = (PFNGLINVALIDATEFRAMEBUFFERPROC)load("glInvalidateFramebuffer"); + glad_glInvalidateSubFramebuffer = (PFNGLINVALIDATESUBFRAMEBUFFERPROC)load("glInvalidateSubFramebuffer"); + glad_glMultiDrawArraysIndirect = (PFNGLMULTIDRAWARRAYSINDIRECTPROC)load("glMultiDrawArraysIndirect"); + glad_glMultiDrawElementsIndirect = (PFNGLMULTIDRAWELEMENTSINDIRECTPROC)load("glMultiDrawElementsIndirect"); + glad_glGetProgramInterfaceiv = (PFNGLGETPROGRAMINTERFACEIVPROC)load("glGetProgramInterfaceiv"); + glad_glGetProgramResourceIndex = (PFNGLGETPROGRAMRESOURCEINDEXPROC)load("glGetProgramResourceIndex"); + glad_glGetProgramResourceName = (PFNGLGETPROGRAMRESOURCENAMEPROC)load("glGetProgramResourceName"); + glad_glGetProgramResourceiv = (PFNGLGETPROGRAMRESOURCEIVPROC)load("glGetProgramResourceiv"); + glad_glGetProgramResourceLocation = (PFNGLGETPROGRAMRESOURCELOCATIONPROC)load("glGetProgramResourceLocation"); + glad_glGetProgramResourceLocationIndex = (PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC)load("glGetProgramResourceLocationIndex"); + glad_glShaderStorageBlockBinding = (PFNGLSHADERSTORAGEBLOCKBINDINGPROC)load("glShaderStorageBlockBinding"); + glad_glTexBufferRange = (PFNGLTEXBUFFERRANGEPROC)load("glTexBufferRange"); + glad_glTexStorage2DMultisample = (PFNGLTEXSTORAGE2DMULTISAMPLEPROC)load("glTexStorage2DMultisample"); + glad_glTexStorage3DMultisample = (PFNGLTEXSTORAGE3DMULTISAMPLEPROC)load("glTexStorage3DMultisample"); + glad_glTextureView = (PFNGLTEXTUREVIEWPROC)load("glTextureView"); + glad_glBindVertexBuffer = (PFNGLBINDVERTEXBUFFERPROC)load("glBindVertexBuffer"); + glad_glVertexAttribFormat = (PFNGLVERTEXATTRIBFORMATPROC)load("glVertexAttribFormat"); + glad_glVertexAttribIFormat = (PFNGLVERTEXATTRIBIFORMATPROC)load("glVertexAttribIFormat"); + glad_glVertexAttribLFormat = (PFNGLVERTEXATTRIBLFORMATPROC)load("glVertexAttribLFormat"); + glad_glVertexAttribBinding = (PFNGLVERTEXATTRIBBINDINGPROC)load("glVertexAttribBinding"); + glad_glVertexBindingDivisor = (PFNGLVERTEXBINDINGDIVISORPROC)load("glVertexBindingDivisor"); + glad_glDebugMessageControl = (PFNGLDEBUGMESSAGECONTROLPROC)load("glDebugMessageControl"); + glad_glDebugMessageInsert = (PFNGLDEBUGMESSAGEINSERTPROC)load("glDebugMessageInsert"); + glad_glDebugMessageCallback = (PFNGLDEBUGMESSAGECALLBACKPROC)load("glDebugMessageCallback"); + glad_glGetDebugMessageLog = (PFNGLGETDEBUGMESSAGELOGPROC)load("glGetDebugMessageLog"); + glad_glPushDebugGroup = (PFNGLPUSHDEBUGGROUPPROC)load("glPushDebugGroup"); + glad_glPopDebugGroup = (PFNGLPOPDEBUGGROUPPROC)load("glPopDebugGroup"); + glad_glObjectLabel = (PFNGLOBJECTLABELPROC)load("glObjectLabel"); + glad_glGetObjectLabel = (PFNGLGETOBJECTLABELPROC)load("glGetObjectLabel"); + glad_glObjectPtrLabel = (PFNGLOBJECTPTRLABELPROC)load("glObjectPtrLabel"); + glad_glGetObjectPtrLabel = (PFNGLGETOBJECTPTRLABELPROC)load("glGetObjectPtrLabel"); + glad_glGetPointerv = (PFNGLGETPOINTERVPROC)load("glGetPointerv"); +} +static void load_GL_VERSION_4_4(GLADloadproc load) { + if(!GLAD_GL_VERSION_4_4) return; + glad_glBufferStorage = (PFNGLBUFFERSTORAGEPROC)load("glBufferStorage"); + glad_glClearTexImage = (PFNGLCLEARTEXIMAGEPROC)load("glClearTexImage"); + glad_glClearTexSubImage = (PFNGLCLEARTEXSUBIMAGEPROC)load("glClearTexSubImage"); + glad_glBindBuffersBase = (PFNGLBINDBUFFERSBASEPROC)load("glBindBuffersBase"); + glad_glBindBuffersRange = (PFNGLBINDBUFFERSRANGEPROC)load("glBindBuffersRange"); + glad_glBindTextures = (PFNGLBINDTEXTURESPROC)load("glBindTextures"); + glad_glBindSamplers = (PFNGLBINDSAMPLERSPROC)load("glBindSamplers"); + glad_glBindImageTextures = (PFNGLBINDIMAGETEXTURESPROC)load("glBindImageTextures"); + glad_glBindVertexBuffers = (PFNGLBINDVERTEXBUFFERSPROC)load("glBindVertexBuffers"); +} +static void load_GL_VERSION_4_5(GLADloadproc load) { + if(!GLAD_GL_VERSION_4_5) return; + glad_glClipControl = (PFNGLCLIPCONTROLPROC)load("glClipControl"); + glad_glCreateTransformFeedbacks = (PFNGLCREATETRANSFORMFEEDBACKSPROC)load("glCreateTransformFeedbacks"); + glad_glTransformFeedbackBufferBase = (PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC)load("glTransformFeedbackBufferBase"); + glad_glTransformFeedbackBufferRange = (PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC)load("glTransformFeedbackBufferRange"); + glad_glGetTransformFeedbackiv = (PFNGLGETTRANSFORMFEEDBACKIVPROC)load("glGetTransformFeedbackiv"); + glad_glGetTransformFeedbacki_v = (PFNGLGETTRANSFORMFEEDBACKI_VPROC)load("glGetTransformFeedbacki_v"); + glad_glGetTransformFeedbacki64_v = (PFNGLGETTRANSFORMFEEDBACKI64_VPROC)load("glGetTransformFeedbacki64_v"); + glad_glCreateBuffers = (PFNGLCREATEBUFFERSPROC)load("glCreateBuffers"); + glad_glNamedBufferStorage = (PFNGLNAMEDBUFFERSTORAGEPROC)load("glNamedBufferStorage"); + glad_glNamedBufferData = (PFNGLNAMEDBUFFERDATAPROC)load("glNamedBufferData"); + glad_glNamedBufferSubData = (PFNGLNAMEDBUFFERSUBDATAPROC)load("glNamedBufferSubData"); + glad_glCopyNamedBufferSubData = (PFNGLCOPYNAMEDBUFFERSUBDATAPROC)load("glCopyNamedBufferSubData"); + glad_glClearNamedBufferData = (PFNGLCLEARNAMEDBUFFERDATAPROC)load("glClearNamedBufferData"); + glad_glClearNamedBufferSubData = (PFNGLCLEARNAMEDBUFFERSUBDATAPROC)load("glClearNamedBufferSubData"); + glad_glMapNamedBuffer = (PFNGLMAPNAMEDBUFFERPROC)load("glMapNamedBuffer"); + glad_glMapNamedBufferRange = (PFNGLMAPNAMEDBUFFERRANGEPROC)load("glMapNamedBufferRange"); + glad_glUnmapNamedBuffer = (PFNGLUNMAPNAMEDBUFFERPROC)load("glUnmapNamedBuffer"); + glad_glFlushMappedNamedBufferRange = (PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC)load("glFlushMappedNamedBufferRange"); + glad_glGetNamedBufferParameteriv = (PFNGLGETNAMEDBUFFERPARAMETERIVPROC)load("glGetNamedBufferParameteriv"); + glad_glGetNamedBufferParameteri64v = (PFNGLGETNAMEDBUFFERPARAMETERI64VPROC)load("glGetNamedBufferParameteri64v"); + glad_glGetNamedBufferPointerv = (PFNGLGETNAMEDBUFFERPOINTERVPROC)load("glGetNamedBufferPointerv"); + glad_glGetNamedBufferSubData = (PFNGLGETNAMEDBUFFERSUBDATAPROC)load("glGetNamedBufferSubData"); + glad_glCreateFramebuffers = (PFNGLCREATEFRAMEBUFFERSPROC)load("glCreateFramebuffers"); + glad_glNamedFramebufferRenderbuffer = (PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC)load("glNamedFramebufferRenderbuffer"); + glad_glNamedFramebufferParameteri = (PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC)load("glNamedFramebufferParameteri"); + glad_glNamedFramebufferTexture = (PFNGLNAMEDFRAMEBUFFERTEXTUREPROC)load("glNamedFramebufferTexture"); + glad_glNamedFramebufferTextureLayer = (PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC)load("glNamedFramebufferTextureLayer"); + glad_glNamedFramebufferDrawBuffer = (PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC)load("glNamedFramebufferDrawBuffer"); + glad_glNamedFramebufferDrawBuffers = (PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC)load("glNamedFramebufferDrawBuffers"); + glad_glNamedFramebufferReadBuffer = (PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC)load("glNamedFramebufferReadBuffer"); + glad_glInvalidateNamedFramebufferData = (PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC)load("glInvalidateNamedFramebufferData"); + glad_glInvalidateNamedFramebufferSubData = (PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC)load("glInvalidateNamedFramebufferSubData"); + glad_glClearNamedFramebufferiv = (PFNGLCLEARNAMEDFRAMEBUFFERIVPROC)load("glClearNamedFramebufferiv"); + glad_glClearNamedFramebufferuiv = (PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC)load("glClearNamedFramebufferuiv"); + glad_glClearNamedFramebufferfv = (PFNGLCLEARNAMEDFRAMEBUFFERFVPROC)load("glClearNamedFramebufferfv"); + glad_glClearNamedFramebufferfi = (PFNGLCLEARNAMEDFRAMEBUFFERFIPROC)load("glClearNamedFramebufferfi"); + glad_glBlitNamedFramebuffer = (PFNGLBLITNAMEDFRAMEBUFFERPROC)load("glBlitNamedFramebuffer"); + glad_glCheckNamedFramebufferStatus = (PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC)load("glCheckNamedFramebufferStatus"); + glad_glGetNamedFramebufferParameteriv = (PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC)load("glGetNamedFramebufferParameteriv"); + glad_glGetNamedFramebufferAttachmentParameteriv = (PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC)load("glGetNamedFramebufferAttachmentParameteriv"); + glad_glCreateRenderbuffers = (PFNGLCREATERENDERBUFFERSPROC)load("glCreateRenderbuffers"); + glad_glNamedRenderbufferStorage = (PFNGLNAMEDRENDERBUFFERSTORAGEPROC)load("glNamedRenderbufferStorage"); + glad_glNamedRenderbufferStorageMultisample = (PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC)load("glNamedRenderbufferStorageMultisample"); + glad_glGetNamedRenderbufferParameteriv = (PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC)load("glGetNamedRenderbufferParameteriv"); + glad_glCreateTextures = (PFNGLCREATETEXTURESPROC)load("glCreateTextures"); + glad_glTextureBuffer = (PFNGLTEXTUREBUFFERPROC)load("glTextureBuffer"); + glad_glTextureBufferRange = (PFNGLTEXTUREBUFFERRANGEPROC)load("glTextureBufferRange"); + glad_glTextureStorage1D = (PFNGLTEXTURESTORAGE1DPROC)load("glTextureStorage1D"); + glad_glTextureStorage2D = (PFNGLTEXTURESTORAGE2DPROC)load("glTextureStorage2D"); + glad_glTextureStorage3D = (PFNGLTEXTURESTORAGE3DPROC)load("glTextureStorage3D"); + glad_glTextureStorage2DMultisample = (PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC)load("glTextureStorage2DMultisample"); + glad_glTextureStorage3DMultisample = (PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC)load("glTextureStorage3DMultisample"); + glad_glTextureSubImage1D = (PFNGLTEXTURESUBIMAGE1DPROC)load("glTextureSubImage1D"); + glad_glTextureSubImage2D = (PFNGLTEXTURESUBIMAGE2DPROC)load("glTextureSubImage2D"); + glad_glTextureSubImage3D = (PFNGLTEXTURESUBIMAGE3DPROC)load("glTextureSubImage3D"); + glad_glCompressedTextureSubImage1D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC)load("glCompressedTextureSubImage1D"); + glad_glCompressedTextureSubImage2D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC)load("glCompressedTextureSubImage2D"); + glad_glCompressedTextureSubImage3D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC)load("glCompressedTextureSubImage3D"); + glad_glCopyTextureSubImage1D = (PFNGLCOPYTEXTURESUBIMAGE1DPROC)load("glCopyTextureSubImage1D"); + glad_glCopyTextureSubImage2D = (PFNGLCOPYTEXTURESUBIMAGE2DPROC)load("glCopyTextureSubImage2D"); + glad_glCopyTextureSubImage3D = (PFNGLCOPYTEXTURESUBIMAGE3DPROC)load("glCopyTextureSubImage3D"); + glad_glTextureParameterf = (PFNGLTEXTUREPARAMETERFPROC)load("glTextureParameterf"); + glad_glTextureParameterfv = (PFNGLTEXTUREPARAMETERFVPROC)load("glTextureParameterfv"); + glad_glTextureParameteri = (PFNGLTEXTUREPARAMETERIPROC)load("glTextureParameteri"); + glad_glTextureParameterIiv = (PFNGLTEXTUREPARAMETERIIVPROC)load("glTextureParameterIiv"); + glad_glTextureParameterIuiv = (PFNGLTEXTUREPARAMETERIUIVPROC)load("glTextureParameterIuiv"); + glad_glTextureParameteriv = (PFNGLTEXTUREPARAMETERIVPROC)load("glTextureParameteriv"); + glad_glGenerateTextureMipmap = (PFNGLGENERATETEXTUREMIPMAPPROC)load("glGenerateTextureMipmap"); + glad_glBindTextureUnit = (PFNGLBINDTEXTUREUNITPROC)load("glBindTextureUnit"); + glad_glGetTextureImage = (PFNGLGETTEXTUREIMAGEPROC)load("glGetTextureImage"); + glad_glGetCompressedTextureImage = (PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC)load("glGetCompressedTextureImage"); + glad_glGetTextureLevelParameterfv = (PFNGLGETTEXTURELEVELPARAMETERFVPROC)load("glGetTextureLevelParameterfv"); + glad_glGetTextureLevelParameteriv = (PFNGLGETTEXTURELEVELPARAMETERIVPROC)load("glGetTextureLevelParameteriv"); + glad_glGetTextureParameterfv = (PFNGLGETTEXTUREPARAMETERFVPROC)load("glGetTextureParameterfv"); + glad_glGetTextureParameterIiv = (PFNGLGETTEXTUREPARAMETERIIVPROC)load("glGetTextureParameterIiv"); + glad_glGetTextureParameterIuiv = (PFNGLGETTEXTUREPARAMETERIUIVPROC)load("glGetTextureParameterIuiv"); + glad_glGetTextureParameteriv = (PFNGLGETTEXTUREPARAMETERIVPROC)load("glGetTextureParameteriv"); + glad_glCreateVertexArrays = (PFNGLCREATEVERTEXARRAYSPROC)load("glCreateVertexArrays"); + glad_glDisableVertexArrayAttrib = (PFNGLDISABLEVERTEXARRAYATTRIBPROC)load("glDisableVertexArrayAttrib"); + glad_glEnableVertexArrayAttrib = (PFNGLENABLEVERTEXARRAYATTRIBPROC)load("glEnableVertexArrayAttrib"); + glad_glVertexArrayElementBuffer = (PFNGLVERTEXARRAYELEMENTBUFFERPROC)load("glVertexArrayElementBuffer"); + glad_glVertexArrayVertexBuffer = (PFNGLVERTEXARRAYVERTEXBUFFERPROC)load("glVertexArrayVertexBuffer"); + glad_glVertexArrayVertexBuffers = (PFNGLVERTEXARRAYVERTEXBUFFERSPROC)load("glVertexArrayVertexBuffers"); + glad_glVertexArrayAttribBinding = (PFNGLVERTEXARRAYATTRIBBINDINGPROC)load("glVertexArrayAttribBinding"); + glad_glVertexArrayAttribFormat = (PFNGLVERTEXARRAYATTRIBFORMATPROC)load("glVertexArrayAttribFormat"); + glad_glVertexArrayAttribIFormat = (PFNGLVERTEXARRAYATTRIBIFORMATPROC)load("glVertexArrayAttribIFormat"); + glad_glVertexArrayAttribLFormat = (PFNGLVERTEXARRAYATTRIBLFORMATPROC)load("glVertexArrayAttribLFormat"); + glad_glVertexArrayBindingDivisor = (PFNGLVERTEXARRAYBINDINGDIVISORPROC)load("glVertexArrayBindingDivisor"); + glad_glGetVertexArrayiv = (PFNGLGETVERTEXARRAYIVPROC)load("glGetVertexArrayiv"); + glad_glGetVertexArrayIndexediv = (PFNGLGETVERTEXARRAYINDEXEDIVPROC)load("glGetVertexArrayIndexediv"); + glad_glGetVertexArrayIndexed64iv = (PFNGLGETVERTEXARRAYINDEXED64IVPROC)load("glGetVertexArrayIndexed64iv"); + glad_glCreateSamplers = (PFNGLCREATESAMPLERSPROC)load("glCreateSamplers"); + glad_glCreateProgramPipelines = (PFNGLCREATEPROGRAMPIPELINESPROC)load("glCreateProgramPipelines"); + glad_glCreateQueries = (PFNGLCREATEQUERIESPROC)load("glCreateQueries"); + glad_glGetQueryBufferObjecti64v = (PFNGLGETQUERYBUFFEROBJECTI64VPROC)load("glGetQueryBufferObjecti64v"); + glad_glGetQueryBufferObjectiv = (PFNGLGETQUERYBUFFEROBJECTIVPROC)load("glGetQueryBufferObjectiv"); + glad_glGetQueryBufferObjectui64v = (PFNGLGETQUERYBUFFEROBJECTUI64VPROC)load("glGetQueryBufferObjectui64v"); + glad_glGetQueryBufferObjectuiv = (PFNGLGETQUERYBUFFEROBJECTUIVPROC)load("glGetQueryBufferObjectuiv"); + glad_glMemoryBarrierByRegion = (PFNGLMEMORYBARRIERBYREGIONPROC)load("glMemoryBarrierByRegion"); + glad_glGetTextureSubImage = (PFNGLGETTEXTURESUBIMAGEPROC)load("glGetTextureSubImage"); + glad_glGetCompressedTextureSubImage = (PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC)load("glGetCompressedTextureSubImage"); + glad_glGetGraphicsResetStatus = (PFNGLGETGRAPHICSRESETSTATUSPROC)load("glGetGraphicsResetStatus"); + glad_glGetnCompressedTexImage = (PFNGLGETNCOMPRESSEDTEXIMAGEPROC)load("glGetnCompressedTexImage"); + glad_glGetnTexImage = (PFNGLGETNTEXIMAGEPROC)load("glGetnTexImage"); + glad_glGetnUniformdv = (PFNGLGETNUNIFORMDVPROC)load("glGetnUniformdv"); + glad_glGetnUniformfv = (PFNGLGETNUNIFORMFVPROC)load("glGetnUniformfv"); + glad_glGetnUniformiv = (PFNGLGETNUNIFORMIVPROC)load("glGetnUniformiv"); + glad_glGetnUniformuiv = (PFNGLGETNUNIFORMUIVPROC)load("glGetnUniformuiv"); + glad_glReadnPixels = (PFNGLREADNPIXELSPROC)load("glReadnPixels"); + glad_glGetnMapdv = (PFNGLGETNMAPDVPROC)load("glGetnMapdv"); + glad_glGetnMapfv = (PFNGLGETNMAPFVPROC)load("glGetnMapfv"); + glad_glGetnMapiv = (PFNGLGETNMAPIVPROC)load("glGetnMapiv"); + glad_glGetnPixelMapfv = (PFNGLGETNPIXELMAPFVPROC)load("glGetnPixelMapfv"); + glad_glGetnPixelMapuiv = (PFNGLGETNPIXELMAPUIVPROC)load("glGetnPixelMapuiv"); + glad_glGetnPixelMapusv = (PFNGLGETNPIXELMAPUSVPROC)load("glGetnPixelMapusv"); + glad_glGetnPolygonStipple = (PFNGLGETNPOLYGONSTIPPLEPROC)load("glGetnPolygonStipple"); + glad_glGetnColorTable = (PFNGLGETNCOLORTABLEPROC)load("glGetnColorTable"); + glad_glGetnConvolutionFilter = (PFNGLGETNCONVOLUTIONFILTERPROC)load("glGetnConvolutionFilter"); + glad_glGetnSeparableFilter = (PFNGLGETNSEPARABLEFILTERPROC)load("glGetnSeparableFilter"); + glad_glGetnHistogram = (PFNGLGETNHISTOGRAMPROC)load("glGetnHistogram"); + glad_glGetnMinmax = (PFNGLGETNMINMAXPROC)load("glGetnMinmax"); + glad_glTextureBarrier = (PFNGLTEXTUREBARRIERPROC)load("glTextureBarrier"); +} +static void load_GL_3DFX_tbuffer(GLADloadproc load) { + if(!GLAD_GL_3DFX_tbuffer) return; + glad_glTbufferMask3DFX = (PFNGLTBUFFERMASK3DFXPROC)load("glTbufferMask3DFX"); +} +static void load_GL_AMD_debug_output(GLADloadproc load) { + if(!GLAD_GL_AMD_debug_output) return; + glad_glDebugMessageEnableAMD = (PFNGLDEBUGMESSAGEENABLEAMDPROC)load("glDebugMessageEnableAMD"); + glad_glDebugMessageInsertAMD = (PFNGLDEBUGMESSAGEINSERTAMDPROC)load("glDebugMessageInsertAMD"); + glad_glDebugMessageCallbackAMD = (PFNGLDEBUGMESSAGECALLBACKAMDPROC)load("glDebugMessageCallbackAMD"); + glad_glGetDebugMessageLogAMD = (PFNGLGETDEBUGMESSAGELOGAMDPROC)load("glGetDebugMessageLogAMD"); +} +static void load_GL_AMD_draw_buffers_blend(GLADloadproc load) { + if(!GLAD_GL_AMD_draw_buffers_blend) return; + glad_glBlendFuncIndexedAMD = (PFNGLBLENDFUNCINDEXEDAMDPROC)load("glBlendFuncIndexedAMD"); + glad_glBlendFuncSeparateIndexedAMD = (PFNGLBLENDFUNCSEPARATEINDEXEDAMDPROC)load("glBlendFuncSeparateIndexedAMD"); + glad_glBlendEquationIndexedAMD = (PFNGLBLENDEQUATIONINDEXEDAMDPROC)load("glBlendEquationIndexedAMD"); + glad_glBlendEquationSeparateIndexedAMD = (PFNGLBLENDEQUATIONSEPARATEINDEXEDAMDPROC)load("glBlendEquationSeparateIndexedAMD"); +} +static void load_GL_AMD_framebuffer_multisample_advanced(GLADloadproc load) { + if(!GLAD_GL_AMD_framebuffer_multisample_advanced) return; + glad_glRenderbufferStorageMultisampleAdvancedAMD = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC)load("glRenderbufferStorageMultisampleAdvancedAMD"); + glad_glNamedRenderbufferStorageMultisampleAdvancedAMD = (PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC)load("glNamedRenderbufferStorageMultisampleAdvancedAMD"); +} +static void load_GL_AMD_framebuffer_sample_positions(GLADloadproc load) { + if(!GLAD_GL_AMD_framebuffer_sample_positions) return; + glad_glFramebufferSamplePositionsfvAMD = (PFNGLFRAMEBUFFERSAMPLEPOSITIONSFVAMDPROC)load("glFramebufferSamplePositionsfvAMD"); + glad_glNamedFramebufferSamplePositionsfvAMD = (PFNGLNAMEDFRAMEBUFFERSAMPLEPOSITIONSFVAMDPROC)load("glNamedFramebufferSamplePositionsfvAMD"); + glad_glGetFramebufferParameterfvAMD = (PFNGLGETFRAMEBUFFERPARAMETERFVAMDPROC)load("glGetFramebufferParameterfvAMD"); + glad_glGetNamedFramebufferParameterfvAMD = (PFNGLGETNAMEDFRAMEBUFFERPARAMETERFVAMDPROC)load("glGetNamedFramebufferParameterfvAMD"); +} +static void load_GL_AMD_gpu_shader_int64(GLADloadproc load) { + if(!GLAD_GL_AMD_gpu_shader_int64) return; + glad_glUniform1i64NV = (PFNGLUNIFORM1I64NVPROC)load("glUniform1i64NV"); + glad_glUniform2i64NV = (PFNGLUNIFORM2I64NVPROC)load("glUniform2i64NV"); + glad_glUniform3i64NV = (PFNGLUNIFORM3I64NVPROC)load("glUniform3i64NV"); + glad_glUniform4i64NV = (PFNGLUNIFORM4I64NVPROC)load("glUniform4i64NV"); + glad_glUniform1i64vNV = (PFNGLUNIFORM1I64VNVPROC)load("glUniform1i64vNV"); + glad_glUniform2i64vNV = (PFNGLUNIFORM2I64VNVPROC)load("glUniform2i64vNV"); + glad_glUniform3i64vNV = (PFNGLUNIFORM3I64VNVPROC)load("glUniform3i64vNV"); + glad_glUniform4i64vNV = (PFNGLUNIFORM4I64VNVPROC)load("glUniform4i64vNV"); + glad_glUniform1ui64NV = (PFNGLUNIFORM1UI64NVPROC)load("glUniform1ui64NV"); + glad_glUniform2ui64NV = (PFNGLUNIFORM2UI64NVPROC)load("glUniform2ui64NV"); + glad_glUniform3ui64NV = (PFNGLUNIFORM3UI64NVPROC)load("glUniform3ui64NV"); + glad_glUniform4ui64NV = (PFNGLUNIFORM4UI64NVPROC)load("glUniform4ui64NV"); + glad_glUniform1ui64vNV = (PFNGLUNIFORM1UI64VNVPROC)load("glUniform1ui64vNV"); + glad_glUniform2ui64vNV = (PFNGLUNIFORM2UI64VNVPROC)load("glUniform2ui64vNV"); + glad_glUniform3ui64vNV = (PFNGLUNIFORM3UI64VNVPROC)load("glUniform3ui64vNV"); + glad_glUniform4ui64vNV = (PFNGLUNIFORM4UI64VNVPROC)load("glUniform4ui64vNV"); + glad_glGetUniformi64vNV = (PFNGLGETUNIFORMI64VNVPROC)load("glGetUniformi64vNV"); + glad_glGetUniformui64vNV = (PFNGLGETUNIFORMUI64VNVPROC)load("glGetUniformui64vNV"); + glad_glProgramUniform1i64NV = (PFNGLPROGRAMUNIFORM1I64NVPROC)load("glProgramUniform1i64NV"); + glad_glProgramUniform2i64NV = (PFNGLPROGRAMUNIFORM2I64NVPROC)load("glProgramUniform2i64NV"); + glad_glProgramUniform3i64NV = (PFNGLPROGRAMUNIFORM3I64NVPROC)load("glProgramUniform3i64NV"); + glad_glProgramUniform4i64NV = (PFNGLPROGRAMUNIFORM4I64NVPROC)load("glProgramUniform4i64NV"); + glad_glProgramUniform1i64vNV = (PFNGLPROGRAMUNIFORM1I64VNVPROC)load("glProgramUniform1i64vNV"); + glad_glProgramUniform2i64vNV = (PFNGLPROGRAMUNIFORM2I64VNVPROC)load("glProgramUniform2i64vNV"); + glad_glProgramUniform3i64vNV = (PFNGLPROGRAMUNIFORM3I64VNVPROC)load("glProgramUniform3i64vNV"); + glad_glProgramUniform4i64vNV = (PFNGLPROGRAMUNIFORM4I64VNVPROC)load("glProgramUniform4i64vNV"); + glad_glProgramUniform1ui64NV = (PFNGLPROGRAMUNIFORM1UI64NVPROC)load("glProgramUniform1ui64NV"); + glad_glProgramUniform2ui64NV = (PFNGLPROGRAMUNIFORM2UI64NVPROC)load("glProgramUniform2ui64NV"); + glad_glProgramUniform3ui64NV = (PFNGLPROGRAMUNIFORM3UI64NVPROC)load("glProgramUniform3ui64NV"); + glad_glProgramUniform4ui64NV = (PFNGLPROGRAMUNIFORM4UI64NVPROC)load("glProgramUniform4ui64NV"); + glad_glProgramUniform1ui64vNV = (PFNGLPROGRAMUNIFORM1UI64VNVPROC)load("glProgramUniform1ui64vNV"); + glad_glProgramUniform2ui64vNV = (PFNGLPROGRAMUNIFORM2UI64VNVPROC)load("glProgramUniform2ui64vNV"); + glad_glProgramUniform3ui64vNV = (PFNGLPROGRAMUNIFORM3UI64VNVPROC)load("glProgramUniform3ui64vNV"); + glad_glProgramUniform4ui64vNV = (PFNGLPROGRAMUNIFORM4UI64VNVPROC)load("glProgramUniform4ui64vNV"); +} +static void load_GL_AMD_interleaved_elements(GLADloadproc load) { + if(!GLAD_GL_AMD_interleaved_elements) return; + glad_glVertexAttribParameteriAMD = (PFNGLVERTEXATTRIBPARAMETERIAMDPROC)load("glVertexAttribParameteriAMD"); +} +static void load_GL_AMD_multi_draw_indirect(GLADloadproc load) { + if(!GLAD_GL_AMD_multi_draw_indirect) return; + glad_glMultiDrawArraysIndirectAMD = (PFNGLMULTIDRAWARRAYSINDIRECTAMDPROC)load("glMultiDrawArraysIndirectAMD"); + glad_glMultiDrawElementsIndirectAMD = (PFNGLMULTIDRAWELEMENTSINDIRECTAMDPROC)load("glMultiDrawElementsIndirectAMD"); +} +static void load_GL_AMD_name_gen_delete(GLADloadproc load) { + if(!GLAD_GL_AMD_name_gen_delete) return; + glad_glGenNamesAMD = (PFNGLGENNAMESAMDPROC)load("glGenNamesAMD"); + glad_glDeleteNamesAMD = (PFNGLDELETENAMESAMDPROC)load("glDeleteNamesAMD"); + glad_glIsNameAMD = (PFNGLISNAMEAMDPROC)load("glIsNameAMD"); +} +static void load_GL_AMD_occlusion_query_event(GLADloadproc load) { + if(!GLAD_GL_AMD_occlusion_query_event) return; + glad_glQueryObjectParameteruiAMD = (PFNGLQUERYOBJECTPARAMETERUIAMDPROC)load("glQueryObjectParameteruiAMD"); +} +static void load_GL_AMD_performance_monitor(GLADloadproc load) { + if(!GLAD_GL_AMD_performance_monitor) return; + glad_glGetPerfMonitorGroupsAMD = (PFNGLGETPERFMONITORGROUPSAMDPROC)load("glGetPerfMonitorGroupsAMD"); + glad_glGetPerfMonitorCountersAMD = (PFNGLGETPERFMONITORCOUNTERSAMDPROC)load("glGetPerfMonitorCountersAMD"); + glad_glGetPerfMonitorGroupStringAMD = (PFNGLGETPERFMONITORGROUPSTRINGAMDPROC)load("glGetPerfMonitorGroupStringAMD"); + glad_glGetPerfMonitorCounterStringAMD = (PFNGLGETPERFMONITORCOUNTERSTRINGAMDPROC)load("glGetPerfMonitorCounterStringAMD"); + glad_glGetPerfMonitorCounterInfoAMD = (PFNGLGETPERFMONITORCOUNTERINFOAMDPROC)load("glGetPerfMonitorCounterInfoAMD"); + glad_glGenPerfMonitorsAMD = (PFNGLGENPERFMONITORSAMDPROC)load("glGenPerfMonitorsAMD"); + glad_glDeletePerfMonitorsAMD = (PFNGLDELETEPERFMONITORSAMDPROC)load("glDeletePerfMonitorsAMD"); + glad_glSelectPerfMonitorCountersAMD = (PFNGLSELECTPERFMONITORCOUNTERSAMDPROC)load("glSelectPerfMonitorCountersAMD"); + glad_glBeginPerfMonitorAMD = (PFNGLBEGINPERFMONITORAMDPROC)load("glBeginPerfMonitorAMD"); + glad_glEndPerfMonitorAMD = (PFNGLENDPERFMONITORAMDPROC)load("glEndPerfMonitorAMD"); + glad_glGetPerfMonitorCounterDataAMD = (PFNGLGETPERFMONITORCOUNTERDATAAMDPROC)load("glGetPerfMonitorCounterDataAMD"); +} +static void load_GL_AMD_sample_positions(GLADloadproc load) { + if(!GLAD_GL_AMD_sample_positions) return; + glad_glSetMultisamplefvAMD = (PFNGLSETMULTISAMPLEFVAMDPROC)load("glSetMultisamplefvAMD"); +} +static void load_GL_AMD_sparse_texture(GLADloadproc load) { + if(!GLAD_GL_AMD_sparse_texture) return; + glad_glTexStorageSparseAMD = (PFNGLTEXSTORAGESPARSEAMDPROC)load("glTexStorageSparseAMD"); + glad_glTextureStorageSparseAMD = (PFNGLTEXTURESTORAGESPARSEAMDPROC)load("glTextureStorageSparseAMD"); +} +static void load_GL_AMD_stencil_operation_extended(GLADloadproc load) { + if(!GLAD_GL_AMD_stencil_operation_extended) return; + glad_glStencilOpValueAMD = (PFNGLSTENCILOPVALUEAMDPROC)load("glStencilOpValueAMD"); +} +static void load_GL_AMD_vertex_shader_tessellator(GLADloadproc load) { + if(!GLAD_GL_AMD_vertex_shader_tessellator) return; + glad_glTessellationFactorAMD = (PFNGLTESSELLATIONFACTORAMDPROC)load("glTessellationFactorAMD"); + glad_glTessellationModeAMD = (PFNGLTESSELLATIONMODEAMDPROC)load("glTessellationModeAMD"); +} +static void load_GL_APPLE_element_array(GLADloadproc load) { + if(!GLAD_GL_APPLE_element_array) return; + glad_glElementPointerAPPLE = (PFNGLELEMENTPOINTERAPPLEPROC)load("glElementPointerAPPLE"); + glad_glDrawElementArrayAPPLE = (PFNGLDRAWELEMENTARRAYAPPLEPROC)load("glDrawElementArrayAPPLE"); + glad_glDrawRangeElementArrayAPPLE = (PFNGLDRAWRANGEELEMENTARRAYAPPLEPROC)load("glDrawRangeElementArrayAPPLE"); + glad_glMultiDrawElementArrayAPPLE = (PFNGLMULTIDRAWELEMENTARRAYAPPLEPROC)load("glMultiDrawElementArrayAPPLE"); + glad_glMultiDrawRangeElementArrayAPPLE = (PFNGLMULTIDRAWRANGEELEMENTARRAYAPPLEPROC)load("glMultiDrawRangeElementArrayAPPLE"); +} +static void load_GL_APPLE_fence(GLADloadproc load) { + if(!GLAD_GL_APPLE_fence) return; + glad_glGenFencesAPPLE = (PFNGLGENFENCESAPPLEPROC)load("glGenFencesAPPLE"); + glad_glDeleteFencesAPPLE = (PFNGLDELETEFENCESAPPLEPROC)load("glDeleteFencesAPPLE"); + glad_glSetFenceAPPLE = (PFNGLSETFENCEAPPLEPROC)load("glSetFenceAPPLE"); + glad_glIsFenceAPPLE = (PFNGLISFENCEAPPLEPROC)load("glIsFenceAPPLE"); + glad_glTestFenceAPPLE = (PFNGLTESTFENCEAPPLEPROC)load("glTestFenceAPPLE"); + glad_glFinishFenceAPPLE = (PFNGLFINISHFENCEAPPLEPROC)load("glFinishFenceAPPLE"); + glad_glTestObjectAPPLE = (PFNGLTESTOBJECTAPPLEPROC)load("glTestObjectAPPLE"); + glad_glFinishObjectAPPLE = (PFNGLFINISHOBJECTAPPLEPROC)load("glFinishObjectAPPLE"); +} +static void load_GL_APPLE_flush_buffer_range(GLADloadproc load) { + if(!GLAD_GL_APPLE_flush_buffer_range) return; + glad_glBufferParameteriAPPLE = (PFNGLBUFFERPARAMETERIAPPLEPROC)load("glBufferParameteriAPPLE"); + glad_glFlushMappedBufferRangeAPPLE = (PFNGLFLUSHMAPPEDBUFFERRANGEAPPLEPROC)load("glFlushMappedBufferRangeAPPLE"); +} +static void load_GL_APPLE_object_purgeable(GLADloadproc load) { + if(!GLAD_GL_APPLE_object_purgeable) return; + glad_glObjectPurgeableAPPLE = (PFNGLOBJECTPURGEABLEAPPLEPROC)load("glObjectPurgeableAPPLE"); + glad_glObjectUnpurgeableAPPLE = (PFNGLOBJECTUNPURGEABLEAPPLEPROC)load("glObjectUnpurgeableAPPLE"); + glad_glGetObjectParameterivAPPLE = (PFNGLGETOBJECTPARAMETERIVAPPLEPROC)load("glGetObjectParameterivAPPLE"); +} +static void load_GL_APPLE_texture_range(GLADloadproc load) { + if(!GLAD_GL_APPLE_texture_range) return; + glad_glTextureRangeAPPLE = (PFNGLTEXTURERANGEAPPLEPROC)load("glTextureRangeAPPLE"); + glad_glGetTexParameterPointervAPPLE = (PFNGLGETTEXPARAMETERPOINTERVAPPLEPROC)load("glGetTexParameterPointervAPPLE"); +} +static void load_GL_APPLE_vertex_array_object(GLADloadproc load) { + if(!GLAD_GL_APPLE_vertex_array_object) return; + glad_glBindVertexArrayAPPLE = (PFNGLBINDVERTEXARRAYAPPLEPROC)load("glBindVertexArrayAPPLE"); + glad_glDeleteVertexArraysAPPLE = (PFNGLDELETEVERTEXARRAYSAPPLEPROC)load("glDeleteVertexArraysAPPLE"); + glad_glGenVertexArraysAPPLE = (PFNGLGENVERTEXARRAYSAPPLEPROC)load("glGenVertexArraysAPPLE"); + glad_glIsVertexArrayAPPLE = (PFNGLISVERTEXARRAYAPPLEPROC)load("glIsVertexArrayAPPLE"); +} +static void load_GL_APPLE_vertex_array_range(GLADloadproc load) { + if(!GLAD_GL_APPLE_vertex_array_range) return; + glad_glVertexArrayRangeAPPLE = (PFNGLVERTEXARRAYRANGEAPPLEPROC)load("glVertexArrayRangeAPPLE"); + glad_glFlushVertexArrayRangeAPPLE = (PFNGLFLUSHVERTEXARRAYRANGEAPPLEPROC)load("glFlushVertexArrayRangeAPPLE"); + glad_glVertexArrayParameteriAPPLE = (PFNGLVERTEXARRAYPARAMETERIAPPLEPROC)load("glVertexArrayParameteriAPPLE"); +} +static void load_GL_APPLE_vertex_program_evaluators(GLADloadproc load) { + if(!GLAD_GL_APPLE_vertex_program_evaluators) return; + glad_glEnableVertexAttribAPPLE = (PFNGLENABLEVERTEXATTRIBAPPLEPROC)load("glEnableVertexAttribAPPLE"); + glad_glDisableVertexAttribAPPLE = (PFNGLDISABLEVERTEXATTRIBAPPLEPROC)load("glDisableVertexAttribAPPLE"); + glad_glIsVertexAttribEnabledAPPLE = (PFNGLISVERTEXATTRIBENABLEDAPPLEPROC)load("glIsVertexAttribEnabledAPPLE"); + glad_glMapVertexAttrib1dAPPLE = (PFNGLMAPVERTEXATTRIB1DAPPLEPROC)load("glMapVertexAttrib1dAPPLE"); + glad_glMapVertexAttrib1fAPPLE = (PFNGLMAPVERTEXATTRIB1FAPPLEPROC)load("glMapVertexAttrib1fAPPLE"); + glad_glMapVertexAttrib2dAPPLE = (PFNGLMAPVERTEXATTRIB2DAPPLEPROC)load("glMapVertexAttrib2dAPPLE"); + glad_glMapVertexAttrib2fAPPLE = (PFNGLMAPVERTEXATTRIB2FAPPLEPROC)load("glMapVertexAttrib2fAPPLE"); +} +static void load_GL_ARB_ES2_compatibility(GLADloadproc load) { + if(!GLAD_GL_ARB_ES2_compatibility) return; + glad_glReleaseShaderCompiler = (PFNGLRELEASESHADERCOMPILERPROC)load("glReleaseShaderCompiler"); + glad_glShaderBinary = (PFNGLSHADERBINARYPROC)load("glShaderBinary"); + glad_glGetShaderPrecisionFormat = (PFNGLGETSHADERPRECISIONFORMATPROC)load("glGetShaderPrecisionFormat"); + glad_glDepthRangef = (PFNGLDEPTHRANGEFPROC)load("glDepthRangef"); + glad_glClearDepthf = (PFNGLCLEARDEPTHFPROC)load("glClearDepthf"); +} +static void load_GL_ARB_ES3_1_compatibility(GLADloadproc load) { + if(!GLAD_GL_ARB_ES3_1_compatibility) return; + glad_glMemoryBarrierByRegion = (PFNGLMEMORYBARRIERBYREGIONPROC)load("glMemoryBarrierByRegion"); +} +static void load_GL_ARB_ES3_2_compatibility(GLADloadproc load) { + if(!GLAD_GL_ARB_ES3_2_compatibility) return; + glad_glPrimitiveBoundingBoxARB = (PFNGLPRIMITIVEBOUNDINGBOXARBPROC)load("glPrimitiveBoundingBoxARB"); +} +static void load_GL_ARB_base_instance(GLADloadproc load) { + if(!GLAD_GL_ARB_base_instance) return; + glad_glDrawArraysInstancedBaseInstance = (PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEPROC)load("glDrawArraysInstancedBaseInstance"); + glad_glDrawElementsInstancedBaseInstance = (PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEPROC)load("glDrawElementsInstancedBaseInstance"); + glad_glDrawElementsInstancedBaseVertexBaseInstance = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC)load("glDrawElementsInstancedBaseVertexBaseInstance"); +} +static void load_GL_ARB_bindless_texture(GLADloadproc load) { + if(!GLAD_GL_ARB_bindless_texture) return; + glad_glGetTextureHandleARB = (PFNGLGETTEXTUREHANDLEARBPROC)load("glGetTextureHandleARB"); + glad_glGetTextureSamplerHandleARB = (PFNGLGETTEXTURESAMPLERHANDLEARBPROC)load("glGetTextureSamplerHandleARB"); + glad_glMakeTextureHandleResidentARB = (PFNGLMAKETEXTUREHANDLERESIDENTARBPROC)load("glMakeTextureHandleResidentARB"); + glad_glMakeTextureHandleNonResidentARB = (PFNGLMAKETEXTUREHANDLENONRESIDENTARBPROC)load("glMakeTextureHandleNonResidentARB"); + glad_glGetImageHandleARB = (PFNGLGETIMAGEHANDLEARBPROC)load("glGetImageHandleARB"); + glad_glMakeImageHandleResidentARB = (PFNGLMAKEIMAGEHANDLERESIDENTARBPROC)load("glMakeImageHandleResidentARB"); + glad_glMakeImageHandleNonResidentARB = (PFNGLMAKEIMAGEHANDLENONRESIDENTARBPROC)load("glMakeImageHandleNonResidentARB"); + glad_glUniformHandleui64ARB = (PFNGLUNIFORMHANDLEUI64ARBPROC)load("glUniformHandleui64ARB"); + glad_glUniformHandleui64vARB = (PFNGLUNIFORMHANDLEUI64VARBPROC)load("glUniformHandleui64vARB"); + glad_glProgramUniformHandleui64ARB = (PFNGLPROGRAMUNIFORMHANDLEUI64ARBPROC)load("glProgramUniformHandleui64ARB"); + glad_glProgramUniformHandleui64vARB = (PFNGLPROGRAMUNIFORMHANDLEUI64VARBPROC)load("glProgramUniformHandleui64vARB"); + glad_glIsTextureHandleResidentARB = (PFNGLISTEXTUREHANDLERESIDENTARBPROC)load("glIsTextureHandleResidentARB"); + glad_glIsImageHandleResidentARB = (PFNGLISIMAGEHANDLERESIDENTARBPROC)load("glIsImageHandleResidentARB"); + glad_glVertexAttribL1ui64ARB = (PFNGLVERTEXATTRIBL1UI64ARBPROC)load("glVertexAttribL1ui64ARB"); + glad_glVertexAttribL1ui64vARB = (PFNGLVERTEXATTRIBL1UI64VARBPROC)load("glVertexAttribL1ui64vARB"); + glad_glGetVertexAttribLui64vARB = (PFNGLGETVERTEXATTRIBLUI64VARBPROC)load("glGetVertexAttribLui64vARB"); +} +static void load_GL_ARB_blend_func_extended(GLADloadproc load) { + if(!GLAD_GL_ARB_blend_func_extended) return; + glad_glBindFragDataLocationIndexed = (PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)load("glBindFragDataLocationIndexed"); + glad_glGetFragDataIndex = (PFNGLGETFRAGDATAINDEXPROC)load("glGetFragDataIndex"); +} +static void load_GL_ARB_buffer_storage(GLADloadproc load) { + if(!GLAD_GL_ARB_buffer_storage) return; + glad_glBufferStorage = (PFNGLBUFFERSTORAGEPROC)load("glBufferStorage"); +} +static void load_GL_ARB_cl_event(GLADloadproc load) { + if(!GLAD_GL_ARB_cl_event) return; + glad_glCreateSyncFromCLeventARB = (PFNGLCREATESYNCFROMCLEVENTARBPROC)load("glCreateSyncFromCLeventARB"); +} +static void load_GL_ARB_clear_buffer_object(GLADloadproc load) { + if(!GLAD_GL_ARB_clear_buffer_object) return; + glad_glClearBufferData = (PFNGLCLEARBUFFERDATAPROC)load("glClearBufferData"); + glad_glClearBufferSubData = (PFNGLCLEARBUFFERSUBDATAPROC)load("glClearBufferSubData"); +} +static void load_GL_ARB_clear_texture(GLADloadproc load) { + if(!GLAD_GL_ARB_clear_texture) return; + glad_glClearTexImage = (PFNGLCLEARTEXIMAGEPROC)load("glClearTexImage"); + glad_glClearTexSubImage = (PFNGLCLEARTEXSUBIMAGEPROC)load("glClearTexSubImage"); +} +static void load_GL_ARB_clip_control(GLADloadproc load) { + if(!GLAD_GL_ARB_clip_control) return; + glad_glClipControl = (PFNGLCLIPCONTROLPROC)load("glClipControl"); +} +static void load_GL_ARB_color_buffer_float(GLADloadproc load) { + if(!GLAD_GL_ARB_color_buffer_float) return; + glad_glClampColorARB = (PFNGLCLAMPCOLORARBPROC)load("glClampColorARB"); +} +static void load_GL_ARB_compute_shader(GLADloadproc load) { + if(!GLAD_GL_ARB_compute_shader) return; + glad_glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC)load("glDispatchCompute"); + glad_glDispatchComputeIndirect = (PFNGLDISPATCHCOMPUTEINDIRECTPROC)load("glDispatchComputeIndirect"); +} +static void load_GL_ARB_compute_variable_group_size(GLADloadproc load) { + if(!GLAD_GL_ARB_compute_variable_group_size) return; + glad_glDispatchComputeGroupSizeARB = (PFNGLDISPATCHCOMPUTEGROUPSIZEARBPROC)load("glDispatchComputeGroupSizeARB"); +} +static void load_GL_ARB_copy_buffer(GLADloadproc load) { + if(!GLAD_GL_ARB_copy_buffer) return; + glad_glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC)load("glCopyBufferSubData"); +} +static void load_GL_ARB_copy_image(GLADloadproc load) { + if(!GLAD_GL_ARB_copy_image) return; + glad_glCopyImageSubData = (PFNGLCOPYIMAGESUBDATAPROC)load("glCopyImageSubData"); +} +static void load_GL_ARB_debug_output(GLADloadproc load) { + if(!GLAD_GL_ARB_debug_output) return; + glad_glDebugMessageControlARB = (PFNGLDEBUGMESSAGECONTROLARBPROC)load("glDebugMessageControlARB"); + glad_glDebugMessageInsertARB = (PFNGLDEBUGMESSAGEINSERTARBPROC)load("glDebugMessageInsertARB"); + glad_glDebugMessageCallbackARB = (PFNGLDEBUGMESSAGECALLBACKARBPROC)load("glDebugMessageCallbackARB"); + glad_glGetDebugMessageLogARB = (PFNGLGETDEBUGMESSAGELOGARBPROC)load("glGetDebugMessageLogARB"); +} +static void load_GL_ARB_direct_state_access(GLADloadproc load) { + if(!GLAD_GL_ARB_direct_state_access) return; + glad_glCreateTransformFeedbacks = (PFNGLCREATETRANSFORMFEEDBACKSPROC)load("glCreateTransformFeedbacks"); + glad_glTransformFeedbackBufferBase = (PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC)load("glTransformFeedbackBufferBase"); + glad_glTransformFeedbackBufferRange = (PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC)load("glTransformFeedbackBufferRange"); + glad_glGetTransformFeedbackiv = (PFNGLGETTRANSFORMFEEDBACKIVPROC)load("glGetTransformFeedbackiv"); + glad_glGetTransformFeedbacki_v = (PFNGLGETTRANSFORMFEEDBACKI_VPROC)load("glGetTransformFeedbacki_v"); + glad_glGetTransformFeedbacki64_v = (PFNGLGETTRANSFORMFEEDBACKI64_VPROC)load("glGetTransformFeedbacki64_v"); + glad_glCreateBuffers = (PFNGLCREATEBUFFERSPROC)load("glCreateBuffers"); + glad_glNamedBufferStorage = (PFNGLNAMEDBUFFERSTORAGEPROC)load("glNamedBufferStorage"); + glad_glNamedBufferData = (PFNGLNAMEDBUFFERDATAPROC)load("glNamedBufferData"); + glad_glNamedBufferSubData = (PFNGLNAMEDBUFFERSUBDATAPROC)load("glNamedBufferSubData"); + glad_glCopyNamedBufferSubData = (PFNGLCOPYNAMEDBUFFERSUBDATAPROC)load("glCopyNamedBufferSubData"); + glad_glClearNamedBufferData = (PFNGLCLEARNAMEDBUFFERDATAPROC)load("glClearNamedBufferData"); + glad_glClearNamedBufferSubData = (PFNGLCLEARNAMEDBUFFERSUBDATAPROC)load("glClearNamedBufferSubData"); + glad_glMapNamedBuffer = (PFNGLMAPNAMEDBUFFERPROC)load("glMapNamedBuffer"); + glad_glMapNamedBufferRange = (PFNGLMAPNAMEDBUFFERRANGEPROC)load("glMapNamedBufferRange"); + glad_glUnmapNamedBuffer = (PFNGLUNMAPNAMEDBUFFERPROC)load("glUnmapNamedBuffer"); + glad_glFlushMappedNamedBufferRange = (PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC)load("glFlushMappedNamedBufferRange"); + glad_glGetNamedBufferParameteriv = (PFNGLGETNAMEDBUFFERPARAMETERIVPROC)load("glGetNamedBufferParameteriv"); + glad_glGetNamedBufferParameteri64v = (PFNGLGETNAMEDBUFFERPARAMETERI64VPROC)load("glGetNamedBufferParameteri64v"); + glad_glGetNamedBufferPointerv = (PFNGLGETNAMEDBUFFERPOINTERVPROC)load("glGetNamedBufferPointerv"); + glad_glGetNamedBufferSubData = (PFNGLGETNAMEDBUFFERSUBDATAPROC)load("glGetNamedBufferSubData"); + glad_glCreateFramebuffers = (PFNGLCREATEFRAMEBUFFERSPROC)load("glCreateFramebuffers"); + glad_glNamedFramebufferRenderbuffer = (PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC)load("glNamedFramebufferRenderbuffer"); + glad_glNamedFramebufferParameteri = (PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC)load("glNamedFramebufferParameteri"); + glad_glNamedFramebufferTexture = (PFNGLNAMEDFRAMEBUFFERTEXTUREPROC)load("glNamedFramebufferTexture"); + glad_glNamedFramebufferTextureLayer = (PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC)load("glNamedFramebufferTextureLayer"); + glad_glNamedFramebufferDrawBuffer = (PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC)load("glNamedFramebufferDrawBuffer"); + glad_glNamedFramebufferDrawBuffers = (PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC)load("glNamedFramebufferDrawBuffers"); + glad_glNamedFramebufferReadBuffer = (PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC)load("glNamedFramebufferReadBuffer"); + glad_glInvalidateNamedFramebufferData = (PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC)load("glInvalidateNamedFramebufferData"); + glad_glInvalidateNamedFramebufferSubData = (PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC)load("glInvalidateNamedFramebufferSubData"); + glad_glClearNamedFramebufferiv = (PFNGLCLEARNAMEDFRAMEBUFFERIVPROC)load("glClearNamedFramebufferiv"); + glad_glClearNamedFramebufferuiv = (PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC)load("glClearNamedFramebufferuiv"); + glad_glClearNamedFramebufferfv = (PFNGLCLEARNAMEDFRAMEBUFFERFVPROC)load("glClearNamedFramebufferfv"); + glad_glClearNamedFramebufferfi = (PFNGLCLEARNAMEDFRAMEBUFFERFIPROC)load("glClearNamedFramebufferfi"); + glad_glBlitNamedFramebuffer = (PFNGLBLITNAMEDFRAMEBUFFERPROC)load("glBlitNamedFramebuffer"); + glad_glCheckNamedFramebufferStatus = (PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC)load("glCheckNamedFramebufferStatus"); + glad_glGetNamedFramebufferParameteriv = (PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC)load("glGetNamedFramebufferParameteriv"); + glad_glGetNamedFramebufferAttachmentParameteriv = (PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC)load("glGetNamedFramebufferAttachmentParameteriv"); + glad_glCreateRenderbuffers = (PFNGLCREATERENDERBUFFERSPROC)load("glCreateRenderbuffers"); + glad_glNamedRenderbufferStorage = (PFNGLNAMEDRENDERBUFFERSTORAGEPROC)load("glNamedRenderbufferStorage"); + glad_glNamedRenderbufferStorageMultisample = (PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC)load("glNamedRenderbufferStorageMultisample"); + glad_glGetNamedRenderbufferParameteriv = (PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC)load("glGetNamedRenderbufferParameteriv"); + glad_glCreateTextures = (PFNGLCREATETEXTURESPROC)load("glCreateTextures"); + glad_glTextureBuffer = (PFNGLTEXTUREBUFFERPROC)load("glTextureBuffer"); + glad_glTextureBufferRange = (PFNGLTEXTUREBUFFERRANGEPROC)load("glTextureBufferRange"); + glad_glTextureStorage1D = (PFNGLTEXTURESTORAGE1DPROC)load("glTextureStorage1D"); + glad_glTextureStorage2D = (PFNGLTEXTURESTORAGE2DPROC)load("glTextureStorage2D"); + glad_glTextureStorage3D = (PFNGLTEXTURESTORAGE3DPROC)load("glTextureStorage3D"); + glad_glTextureStorage2DMultisample = (PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC)load("glTextureStorage2DMultisample"); + glad_glTextureStorage3DMultisample = (PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC)load("glTextureStorage3DMultisample"); + glad_glTextureSubImage1D = (PFNGLTEXTURESUBIMAGE1DPROC)load("glTextureSubImage1D"); + glad_glTextureSubImage2D = (PFNGLTEXTURESUBIMAGE2DPROC)load("glTextureSubImage2D"); + glad_glTextureSubImage3D = (PFNGLTEXTURESUBIMAGE3DPROC)load("glTextureSubImage3D"); + glad_glCompressedTextureSubImage1D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC)load("glCompressedTextureSubImage1D"); + glad_glCompressedTextureSubImage2D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC)load("glCompressedTextureSubImage2D"); + glad_glCompressedTextureSubImage3D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC)load("glCompressedTextureSubImage3D"); + glad_glCopyTextureSubImage1D = (PFNGLCOPYTEXTURESUBIMAGE1DPROC)load("glCopyTextureSubImage1D"); + glad_glCopyTextureSubImage2D = (PFNGLCOPYTEXTURESUBIMAGE2DPROC)load("glCopyTextureSubImage2D"); + glad_glCopyTextureSubImage3D = (PFNGLCOPYTEXTURESUBIMAGE3DPROC)load("glCopyTextureSubImage3D"); + glad_glTextureParameterf = (PFNGLTEXTUREPARAMETERFPROC)load("glTextureParameterf"); + glad_glTextureParameterfv = (PFNGLTEXTUREPARAMETERFVPROC)load("glTextureParameterfv"); + glad_glTextureParameteri = (PFNGLTEXTUREPARAMETERIPROC)load("glTextureParameteri"); + glad_glTextureParameterIiv = (PFNGLTEXTUREPARAMETERIIVPROC)load("glTextureParameterIiv"); + glad_glTextureParameterIuiv = (PFNGLTEXTUREPARAMETERIUIVPROC)load("glTextureParameterIuiv"); + glad_glTextureParameteriv = (PFNGLTEXTUREPARAMETERIVPROC)load("glTextureParameteriv"); + glad_glGenerateTextureMipmap = (PFNGLGENERATETEXTUREMIPMAPPROC)load("glGenerateTextureMipmap"); + glad_glBindTextureUnit = (PFNGLBINDTEXTUREUNITPROC)load("glBindTextureUnit"); + glad_glGetTextureImage = (PFNGLGETTEXTUREIMAGEPROC)load("glGetTextureImage"); + glad_glGetCompressedTextureImage = (PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC)load("glGetCompressedTextureImage"); + glad_glGetTextureLevelParameterfv = (PFNGLGETTEXTURELEVELPARAMETERFVPROC)load("glGetTextureLevelParameterfv"); + glad_glGetTextureLevelParameteriv = (PFNGLGETTEXTURELEVELPARAMETERIVPROC)load("glGetTextureLevelParameteriv"); + glad_glGetTextureParameterfv = (PFNGLGETTEXTUREPARAMETERFVPROC)load("glGetTextureParameterfv"); + glad_glGetTextureParameterIiv = (PFNGLGETTEXTUREPARAMETERIIVPROC)load("glGetTextureParameterIiv"); + glad_glGetTextureParameterIuiv = (PFNGLGETTEXTUREPARAMETERIUIVPROC)load("glGetTextureParameterIuiv"); + glad_glGetTextureParameteriv = (PFNGLGETTEXTUREPARAMETERIVPROC)load("glGetTextureParameteriv"); + glad_glCreateVertexArrays = (PFNGLCREATEVERTEXARRAYSPROC)load("glCreateVertexArrays"); + glad_glDisableVertexArrayAttrib = (PFNGLDISABLEVERTEXARRAYATTRIBPROC)load("glDisableVertexArrayAttrib"); + glad_glEnableVertexArrayAttrib = (PFNGLENABLEVERTEXARRAYATTRIBPROC)load("glEnableVertexArrayAttrib"); + glad_glVertexArrayElementBuffer = (PFNGLVERTEXARRAYELEMENTBUFFERPROC)load("glVertexArrayElementBuffer"); + glad_glVertexArrayVertexBuffer = (PFNGLVERTEXARRAYVERTEXBUFFERPROC)load("glVertexArrayVertexBuffer"); + glad_glVertexArrayVertexBuffers = (PFNGLVERTEXARRAYVERTEXBUFFERSPROC)load("glVertexArrayVertexBuffers"); + glad_glVertexArrayAttribBinding = (PFNGLVERTEXARRAYATTRIBBINDINGPROC)load("glVertexArrayAttribBinding"); + glad_glVertexArrayAttribFormat = (PFNGLVERTEXARRAYATTRIBFORMATPROC)load("glVertexArrayAttribFormat"); + glad_glVertexArrayAttribIFormat = (PFNGLVERTEXARRAYATTRIBIFORMATPROC)load("glVertexArrayAttribIFormat"); + glad_glVertexArrayAttribLFormat = (PFNGLVERTEXARRAYATTRIBLFORMATPROC)load("glVertexArrayAttribLFormat"); + glad_glVertexArrayBindingDivisor = (PFNGLVERTEXARRAYBINDINGDIVISORPROC)load("glVertexArrayBindingDivisor"); + glad_glGetVertexArrayiv = (PFNGLGETVERTEXARRAYIVPROC)load("glGetVertexArrayiv"); + glad_glGetVertexArrayIndexediv = (PFNGLGETVERTEXARRAYINDEXEDIVPROC)load("glGetVertexArrayIndexediv"); + glad_glGetVertexArrayIndexed64iv = (PFNGLGETVERTEXARRAYINDEXED64IVPROC)load("glGetVertexArrayIndexed64iv"); + glad_glCreateSamplers = (PFNGLCREATESAMPLERSPROC)load("glCreateSamplers"); + glad_glCreateProgramPipelines = (PFNGLCREATEPROGRAMPIPELINESPROC)load("glCreateProgramPipelines"); + glad_glCreateQueries = (PFNGLCREATEQUERIESPROC)load("glCreateQueries"); + glad_glGetQueryBufferObjecti64v = (PFNGLGETQUERYBUFFEROBJECTI64VPROC)load("glGetQueryBufferObjecti64v"); + glad_glGetQueryBufferObjectiv = (PFNGLGETQUERYBUFFEROBJECTIVPROC)load("glGetQueryBufferObjectiv"); + glad_glGetQueryBufferObjectui64v = (PFNGLGETQUERYBUFFEROBJECTUI64VPROC)load("glGetQueryBufferObjectui64v"); + glad_glGetQueryBufferObjectuiv = (PFNGLGETQUERYBUFFEROBJECTUIVPROC)load("glGetQueryBufferObjectuiv"); +} +static void load_GL_ARB_draw_buffers(GLADloadproc load) { + if(!GLAD_GL_ARB_draw_buffers) return; + glad_glDrawBuffersARB = (PFNGLDRAWBUFFERSARBPROC)load("glDrawBuffersARB"); +} +static void load_GL_ARB_draw_buffers_blend(GLADloadproc load) { + if(!GLAD_GL_ARB_draw_buffers_blend) return; + glad_glBlendEquationiARB = (PFNGLBLENDEQUATIONIARBPROC)load("glBlendEquationiARB"); + glad_glBlendEquationSeparateiARB = (PFNGLBLENDEQUATIONSEPARATEIARBPROC)load("glBlendEquationSeparateiARB"); + glad_glBlendFunciARB = (PFNGLBLENDFUNCIARBPROC)load("glBlendFunciARB"); + glad_glBlendFuncSeparateiARB = (PFNGLBLENDFUNCSEPARATEIARBPROC)load("glBlendFuncSeparateiARB"); +} +static void load_GL_ARB_draw_elements_base_vertex(GLADloadproc load) { + if(!GLAD_GL_ARB_draw_elements_base_vertex) return; + glad_glDrawElementsBaseVertex = (PFNGLDRAWELEMENTSBASEVERTEXPROC)load("glDrawElementsBaseVertex"); + glad_glDrawRangeElementsBaseVertex = (PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)load("glDrawRangeElementsBaseVertex"); + glad_glDrawElementsInstancedBaseVertex = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)load("glDrawElementsInstancedBaseVertex"); + glad_glMultiDrawElementsBaseVertex = (PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)load("glMultiDrawElementsBaseVertex"); +} +static void load_GL_ARB_draw_indirect(GLADloadproc load) { + if(!GLAD_GL_ARB_draw_indirect) return; + glad_glDrawArraysIndirect = (PFNGLDRAWARRAYSINDIRECTPROC)load("glDrawArraysIndirect"); + glad_glDrawElementsIndirect = (PFNGLDRAWELEMENTSINDIRECTPROC)load("glDrawElementsIndirect"); +} +static void load_GL_ARB_draw_instanced(GLADloadproc load) { + if(!GLAD_GL_ARB_draw_instanced) return; + glad_glDrawArraysInstancedARB = (PFNGLDRAWARRAYSINSTANCEDARBPROC)load("glDrawArraysInstancedARB"); + glad_glDrawElementsInstancedARB = (PFNGLDRAWELEMENTSINSTANCEDARBPROC)load("glDrawElementsInstancedARB"); +} +static void load_GL_ARB_fragment_program(GLADloadproc load) { + if(!GLAD_GL_ARB_fragment_program) return; + glad_glProgramStringARB = (PFNGLPROGRAMSTRINGARBPROC)load("glProgramStringARB"); + glad_glBindProgramARB = (PFNGLBINDPROGRAMARBPROC)load("glBindProgramARB"); + glad_glDeleteProgramsARB = (PFNGLDELETEPROGRAMSARBPROC)load("glDeleteProgramsARB"); + glad_glGenProgramsARB = (PFNGLGENPROGRAMSARBPROC)load("glGenProgramsARB"); + glad_glProgramEnvParameter4dARB = (PFNGLPROGRAMENVPARAMETER4DARBPROC)load("glProgramEnvParameter4dARB"); + glad_glProgramEnvParameter4dvARB = (PFNGLPROGRAMENVPARAMETER4DVARBPROC)load("glProgramEnvParameter4dvARB"); + glad_glProgramEnvParameter4fARB = (PFNGLPROGRAMENVPARAMETER4FARBPROC)load("glProgramEnvParameter4fARB"); + glad_glProgramEnvParameter4fvARB = (PFNGLPROGRAMENVPARAMETER4FVARBPROC)load("glProgramEnvParameter4fvARB"); + glad_glProgramLocalParameter4dARB = (PFNGLPROGRAMLOCALPARAMETER4DARBPROC)load("glProgramLocalParameter4dARB"); + glad_glProgramLocalParameter4dvARB = (PFNGLPROGRAMLOCALPARAMETER4DVARBPROC)load("glProgramLocalParameter4dvARB"); + glad_glProgramLocalParameter4fARB = (PFNGLPROGRAMLOCALPARAMETER4FARBPROC)load("glProgramLocalParameter4fARB"); + glad_glProgramLocalParameter4fvARB = (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC)load("glProgramLocalParameter4fvARB"); + glad_glGetProgramEnvParameterdvARB = (PFNGLGETPROGRAMENVPARAMETERDVARBPROC)load("glGetProgramEnvParameterdvARB"); + glad_glGetProgramEnvParameterfvARB = (PFNGLGETPROGRAMENVPARAMETERFVARBPROC)load("glGetProgramEnvParameterfvARB"); + glad_glGetProgramLocalParameterdvARB = (PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC)load("glGetProgramLocalParameterdvARB"); + glad_glGetProgramLocalParameterfvARB = (PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC)load("glGetProgramLocalParameterfvARB"); + glad_glGetProgramivARB = (PFNGLGETPROGRAMIVARBPROC)load("glGetProgramivARB"); + glad_glGetProgramStringARB = (PFNGLGETPROGRAMSTRINGARBPROC)load("glGetProgramStringARB"); + glad_glIsProgramARB = (PFNGLISPROGRAMARBPROC)load("glIsProgramARB"); +} +static void load_GL_ARB_framebuffer_no_attachments(GLADloadproc load) { + if(!GLAD_GL_ARB_framebuffer_no_attachments) return; + glad_glFramebufferParameteri = (PFNGLFRAMEBUFFERPARAMETERIPROC)load("glFramebufferParameteri"); + glad_glGetFramebufferParameteriv = (PFNGLGETFRAMEBUFFERPARAMETERIVPROC)load("glGetFramebufferParameteriv"); +} +static void load_GL_ARB_framebuffer_object(GLADloadproc load) { + if(!GLAD_GL_ARB_framebuffer_object) return; + glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC)load("glIsRenderbuffer"); + glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC)load("glBindRenderbuffer"); + glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC)load("glDeleteRenderbuffers"); + glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC)load("glGenRenderbuffers"); + glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC)load("glRenderbufferStorage"); + glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC)load("glGetRenderbufferParameteriv"); + glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC)load("glIsFramebuffer"); + glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC)load("glBindFramebuffer"); + glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC)load("glDeleteFramebuffers"); + glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC)load("glGenFramebuffers"); + glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC)load("glCheckFramebufferStatus"); + glad_glFramebufferTexture1D = (PFNGLFRAMEBUFFERTEXTURE1DPROC)load("glFramebufferTexture1D"); + glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC)load("glFramebufferTexture2D"); + glad_glFramebufferTexture3D = (PFNGLFRAMEBUFFERTEXTURE3DPROC)load("glFramebufferTexture3D"); + glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC)load("glFramebufferRenderbuffer"); + glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)load("glGetFramebufferAttachmentParameteriv"); + glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC)load("glGenerateMipmap"); + glad_glBlitFramebuffer = (PFNGLBLITFRAMEBUFFERPROC)load("glBlitFramebuffer"); + glad_glRenderbufferStorageMultisample = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)load("glRenderbufferStorageMultisample"); + glad_glFramebufferTextureLayer = (PFNGLFRAMEBUFFERTEXTURELAYERPROC)load("glFramebufferTextureLayer"); +} +static void load_GL_ARB_geometry_shader4(GLADloadproc load) { + if(!GLAD_GL_ARB_geometry_shader4) return; + glad_glProgramParameteriARB = (PFNGLPROGRAMPARAMETERIARBPROC)load("glProgramParameteriARB"); + glad_glFramebufferTextureARB = (PFNGLFRAMEBUFFERTEXTUREARBPROC)load("glFramebufferTextureARB"); + glad_glFramebufferTextureLayerARB = (PFNGLFRAMEBUFFERTEXTURELAYERARBPROC)load("glFramebufferTextureLayerARB"); + glad_glFramebufferTextureFaceARB = (PFNGLFRAMEBUFFERTEXTUREFACEARBPROC)load("glFramebufferTextureFaceARB"); +} +static void load_GL_ARB_get_program_binary(GLADloadproc load) { + if(!GLAD_GL_ARB_get_program_binary) return; + glad_glGetProgramBinary = (PFNGLGETPROGRAMBINARYPROC)load("glGetProgramBinary"); + glad_glProgramBinary = (PFNGLPROGRAMBINARYPROC)load("glProgramBinary"); + glad_glProgramParameteri = (PFNGLPROGRAMPARAMETERIPROC)load("glProgramParameteri"); +} +static void load_GL_ARB_get_texture_sub_image(GLADloadproc load) { + if(!GLAD_GL_ARB_get_texture_sub_image) return; + glad_glGetTextureSubImage = (PFNGLGETTEXTURESUBIMAGEPROC)load("glGetTextureSubImage"); + glad_glGetCompressedTextureSubImage = (PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC)load("glGetCompressedTextureSubImage"); +} +static void load_GL_ARB_gl_spirv(GLADloadproc load) { + if(!GLAD_GL_ARB_gl_spirv) return; + glad_glSpecializeShaderARB = (PFNGLSPECIALIZESHADERARBPROC)load("glSpecializeShaderARB"); +} +static void load_GL_ARB_gpu_shader_fp64(GLADloadproc load) { + if(!GLAD_GL_ARB_gpu_shader_fp64) return; + glad_glUniform1d = (PFNGLUNIFORM1DPROC)load("glUniform1d"); + glad_glUniform2d = (PFNGLUNIFORM2DPROC)load("glUniform2d"); + glad_glUniform3d = (PFNGLUNIFORM3DPROC)load("glUniform3d"); + glad_glUniform4d = (PFNGLUNIFORM4DPROC)load("glUniform4d"); + glad_glUniform1dv = (PFNGLUNIFORM1DVPROC)load("glUniform1dv"); + glad_glUniform2dv = (PFNGLUNIFORM2DVPROC)load("glUniform2dv"); + glad_glUniform3dv = (PFNGLUNIFORM3DVPROC)load("glUniform3dv"); + glad_glUniform4dv = (PFNGLUNIFORM4DVPROC)load("glUniform4dv"); + glad_glUniformMatrix2dv = (PFNGLUNIFORMMATRIX2DVPROC)load("glUniformMatrix2dv"); + glad_glUniformMatrix3dv = (PFNGLUNIFORMMATRIX3DVPROC)load("glUniformMatrix3dv"); + glad_glUniformMatrix4dv = (PFNGLUNIFORMMATRIX4DVPROC)load("glUniformMatrix4dv"); + glad_glUniformMatrix2x3dv = (PFNGLUNIFORMMATRIX2X3DVPROC)load("glUniformMatrix2x3dv"); + glad_glUniformMatrix2x4dv = (PFNGLUNIFORMMATRIX2X4DVPROC)load("glUniformMatrix2x4dv"); + glad_glUniformMatrix3x2dv = (PFNGLUNIFORMMATRIX3X2DVPROC)load("glUniformMatrix3x2dv"); + glad_glUniformMatrix3x4dv = (PFNGLUNIFORMMATRIX3X4DVPROC)load("glUniformMatrix3x4dv"); + glad_glUniformMatrix4x2dv = (PFNGLUNIFORMMATRIX4X2DVPROC)load("glUniformMatrix4x2dv"); + glad_glUniformMatrix4x3dv = (PFNGLUNIFORMMATRIX4X3DVPROC)load("glUniformMatrix4x3dv"); + glad_glGetUniformdv = (PFNGLGETUNIFORMDVPROC)load("glGetUniformdv"); +} +static void load_GL_ARB_gpu_shader_int64(GLADloadproc load) { + if(!GLAD_GL_ARB_gpu_shader_int64) return; + glad_glUniform1i64ARB = (PFNGLUNIFORM1I64ARBPROC)load("glUniform1i64ARB"); + glad_glUniform2i64ARB = (PFNGLUNIFORM2I64ARBPROC)load("glUniform2i64ARB"); + glad_glUniform3i64ARB = (PFNGLUNIFORM3I64ARBPROC)load("glUniform3i64ARB"); + glad_glUniform4i64ARB = (PFNGLUNIFORM4I64ARBPROC)load("glUniform4i64ARB"); + glad_glUniform1i64vARB = (PFNGLUNIFORM1I64VARBPROC)load("glUniform1i64vARB"); + glad_glUniform2i64vARB = (PFNGLUNIFORM2I64VARBPROC)load("glUniform2i64vARB"); + glad_glUniform3i64vARB = (PFNGLUNIFORM3I64VARBPROC)load("glUniform3i64vARB"); + glad_glUniform4i64vARB = (PFNGLUNIFORM4I64VARBPROC)load("glUniform4i64vARB"); + glad_glUniform1ui64ARB = (PFNGLUNIFORM1UI64ARBPROC)load("glUniform1ui64ARB"); + glad_glUniform2ui64ARB = (PFNGLUNIFORM2UI64ARBPROC)load("glUniform2ui64ARB"); + glad_glUniform3ui64ARB = (PFNGLUNIFORM3UI64ARBPROC)load("glUniform3ui64ARB"); + glad_glUniform4ui64ARB = (PFNGLUNIFORM4UI64ARBPROC)load("glUniform4ui64ARB"); + glad_glUniform1ui64vARB = (PFNGLUNIFORM1UI64VARBPROC)load("glUniform1ui64vARB"); + glad_glUniform2ui64vARB = (PFNGLUNIFORM2UI64VARBPROC)load("glUniform2ui64vARB"); + glad_glUniform3ui64vARB = (PFNGLUNIFORM3UI64VARBPROC)load("glUniform3ui64vARB"); + glad_glUniform4ui64vARB = (PFNGLUNIFORM4UI64VARBPROC)load("glUniform4ui64vARB"); + glad_glGetUniformi64vARB = (PFNGLGETUNIFORMI64VARBPROC)load("glGetUniformi64vARB"); + glad_glGetUniformui64vARB = (PFNGLGETUNIFORMUI64VARBPROC)load("glGetUniformui64vARB"); + glad_glGetnUniformi64vARB = (PFNGLGETNUNIFORMI64VARBPROC)load("glGetnUniformi64vARB"); + glad_glGetnUniformui64vARB = (PFNGLGETNUNIFORMUI64VARBPROC)load("glGetnUniformui64vARB"); + glad_glProgramUniform1i64ARB = (PFNGLPROGRAMUNIFORM1I64ARBPROC)load("glProgramUniform1i64ARB"); + glad_glProgramUniform2i64ARB = (PFNGLPROGRAMUNIFORM2I64ARBPROC)load("glProgramUniform2i64ARB"); + glad_glProgramUniform3i64ARB = (PFNGLPROGRAMUNIFORM3I64ARBPROC)load("glProgramUniform3i64ARB"); + glad_glProgramUniform4i64ARB = (PFNGLPROGRAMUNIFORM4I64ARBPROC)load("glProgramUniform4i64ARB"); + glad_glProgramUniform1i64vARB = (PFNGLPROGRAMUNIFORM1I64VARBPROC)load("glProgramUniform1i64vARB"); + glad_glProgramUniform2i64vARB = (PFNGLPROGRAMUNIFORM2I64VARBPROC)load("glProgramUniform2i64vARB"); + glad_glProgramUniform3i64vARB = (PFNGLPROGRAMUNIFORM3I64VARBPROC)load("glProgramUniform3i64vARB"); + glad_glProgramUniform4i64vARB = (PFNGLPROGRAMUNIFORM4I64VARBPROC)load("glProgramUniform4i64vARB"); + glad_glProgramUniform1ui64ARB = (PFNGLPROGRAMUNIFORM1UI64ARBPROC)load("glProgramUniform1ui64ARB"); + glad_glProgramUniform2ui64ARB = (PFNGLPROGRAMUNIFORM2UI64ARBPROC)load("glProgramUniform2ui64ARB"); + glad_glProgramUniform3ui64ARB = (PFNGLPROGRAMUNIFORM3UI64ARBPROC)load("glProgramUniform3ui64ARB"); + glad_glProgramUniform4ui64ARB = (PFNGLPROGRAMUNIFORM4UI64ARBPROC)load("glProgramUniform4ui64ARB"); + glad_glProgramUniform1ui64vARB = (PFNGLPROGRAMUNIFORM1UI64VARBPROC)load("glProgramUniform1ui64vARB"); + glad_glProgramUniform2ui64vARB = (PFNGLPROGRAMUNIFORM2UI64VARBPROC)load("glProgramUniform2ui64vARB"); + glad_glProgramUniform3ui64vARB = (PFNGLPROGRAMUNIFORM3UI64VARBPROC)load("glProgramUniform3ui64vARB"); + glad_glProgramUniform4ui64vARB = (PFNGLPROGRAMUNIFORM4UI64VARBPROC)load("glProgramUniform4ui64vARB"); +} +static void load_GL_ARB_imaging(GLADloadproc load) { + if(!GLAD_GL_ARB_imaging) return; + glad_glBlendColor = (PFNGLBLENDCOLORPROC)load("glBlendColor"); + glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC)load("glBlendEquation"); + glad_glColorTable = (PFNGLCOLORTABLEPROC)load("glColorTable"); + glad_glColorTableParameterfv = (PFNGLCOLORTABLEPARAMETERFVPROC)load("glColorTableParameterfv"); + glad_glColorTableParameteriv = (PFNGLCOLORTABLEPARAMETERIVPROC)load("glColorTableParameteriv"); + glad_glCopyColorTable = (PFNGLCOPYCOLORTABLEPROC)load("glCopyColorTable"); + glad_glGetColorTable = (PFNGLGETCOLORTABLEPROC)load("glGetColorTable"); + glad_glGetColorTableParameterfv = (PFNGLGETCOLORTABLEPARAMETERFVPROC)load("glGetColorTableParameterfv"); + glad_glGetColorTableParameteriv = (PFNGLGETCOLORTABLEPARAMETERIVPROC)load("glGetColorTableParameteriv"); + glad_glColorSubTable = (PFNGLCOLORSUBTABLEPROC)load("glColorSubTable"); + glad_glCopyColorSubTable = (PFNGLCOPYCOLORSUBTABLEPROC)load("glCopyColorSubTable"); + glad_glConvolutionFilter1D = (PFNGLCONVOLUTIONFILTER1DPROC)load("glConvolutionFilter1D"); + glad_glConvolutionFilter2D = (PFNGLCONVOLUTIONFILTER2DPROC)load("glConvolutionFilter2D"); + glad_glConvolutionParameterf = (PFNGLCONVOLUTIONPARAMETERFPROC)load("glConvolutionParameterf"); + glad_glConvolutionParameterfv = (PFNGLCONVOLUTIONPARAMETERFVPROC)load("glConvolutionParameterfv"); + glad_glConvolutionParameteri = (PFNGLCONVOLUTIONPARAMETERIPROC)load("glConvolutionParameteri"); + glad_glConvolutionParameteriv = (PFNGLCONVOLUTIONPARAMETERIVPROC)load("glConvolutionParameteriv"); + glad_glCopyConvolutionFilter1D = (PFNGLCOPYCONVOLUTIONFILTER1DPROC)load("glCopyConvolutionFilter1D"); + glad_glCopyConvolutionFilter2D = (PFNGLCOPYCONVOLUTIONFILTER2DPROC)load("glCopyConvolutionFilter2D"); + glad_glGetConvolutionFilter = (PFNGLGETCONVOLUTIONFILTERPROC)load("glGetConvolutionFilter"); + glad_glGetConvolutionParameterfv = (PFNGLGETCONVOLUTIONPARAMETERFVPROC)load("glGetConvolutionParameterfv"); + glad_glGetConvolutionParameteriv = (PFNGLGETCONVOLUTIONPARAMETERIVPROC)load("glGetConvolutionParameteriv"); + glad_glGetSeparableFilter = (PFNGLGETSEPARABLEFILTERPROC)load("glGetSeparableFilter"); + glad_glSeparableFilter2D = (PFNGLSEPARABLEFILTER2DPROC)load("glSeparableFilter2D"); + glad_glGetHistogram = (PFNGLGETHISTOGRAMPROC)load("glGetHistogram"); + glad_glGetHistogramParameterfv = (PFNGLGETHISTOGRAMPARAMETERFVPROC)load("glGetHistogramParameterfv"); + glad_glGetHistogramParameteriv = (PFNGLGETHISTOGRAMPARAMETERIVPROC)load("glGetHistogramParameteriv"); + glad_glGetMinmax = (PFNGLGETMINMAXPROC)load("glGetMinmax"); + glad_glGetMinmaxParameterfv = (PFNGLGETMINMAXPARAMETERFVPROC)load("glGetMinmaxParameterfv"); + glad_glGetMinmaxParameteriv = (PFNGLGETMINMAXPARAMETERIVPROC)load("glGetMinmaxParameteriv"); + glad_glHistogram = (PFNGLHISTOGRAMPROC)load("glHistogram"); + glad_glMinmax = (PFNGLMINMAXPROC)load("glMinmax"); + glad_glResetHistogram = (PFNGLRESETHISTOGRAMPROC)load("glResetHistogram"); + glad_glResetMinmax = (PFNGLRESETMINMAXPROC)load("glResetMinmax"); +} +static void load_GL_ARB_indirect_parameters(GLADloadproc load) { + if(!GLAD_GL_ARB_indirect_parameters) return; + glad_glMultiDrawArraysIndirectCountARB = (PFNGLMULTIDRAWARRAYSINDIRECTCOUNTARBPROC)load("glMultiDrawArraysIndirectCountARB"); + glad_glMultiDrawElementsIndirectCountARB = (PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTARBPROC)load("glMultiDrawElementsIndirectCountARB"); +} +static void load_GL_ARB_instanced_arrays(GLADloadproc load) { + if(!GLAD_GL_ARB_instanced_arrays) return; + glad_glVertexAttribDivisorARB = (PFNGLVERTEXATTRIBDIVISORARBPROC)load("glVertexAttribDivisorARB"); +} +static void load_GL_ARB_internalformat_query(GLADloadproc load) { + if(!GLAD_GL_ARB_internalformat_query) return; + glad_glGetInternalformativ = (PFNGLGETINTERNALFORMATIVPROC)load("glGetInternalformativ"); +} +static void load_GL_ARB_internalformat_query2(GLADloadproc load) { + if(!GLAD_GL_ARB_internalformat_query2) return; + glad_glGetInternalformati64v = (PFNGLGETINTERNALFORMATI64VPROC)load("glGetInternalformati64v"); +} +static void load_GL_ARB_invalidate_subdata(GLADloadproc load) { + if(!GLAD_GL_ARB_invalidate_subdata) return; + glad_glInvalidateTexSubImage = (PFNGLINVALIDATETEXSUBIMAGEPROC)load("glInvalidateTexSubImage"); + glad_glInvalidateTexImage = (PFNGLINVALIDATETEXIMAGEPROC)load("glInvalidateTexImage"); + glad_glInvalidateBufferSubData = (PFNGLINVALIDATEBUFFERSUBDATAPROC)load("glInvalidateBufferSubData"); + glad_glInvalidateBufferData = (PFNGLINVALIDATEBUFFERDATAPROC)load("glInvalidateBufferData"); + glad_glInvalidateFramebuffer = (PFNGLINVALIDATEFRAMEBUFFERPROC)load("glInvalidateFramebuffer"); + glad_glInvalidateSubFramebuffer = (PFNGLINVALIDATESUBFRAMEBUFFERPROC)load("glInvalidateSubFramebuffer"); +} +static void load_GL_ARB_map_buffer_range(GLADloadproc load) { + if(!GLAD_GL_ARB_map_buffer_range) return; + glad_glMapBufferRange = (PFNGLMAPBUFFERRANGEPROC)load("glMapBufferRange"); + glad_glFlushMappedBufferRange = (PFNGLFLUSHMAPPEDBUFFERRANGEPROC)load("glFlushMappedBufferRange"); +} +static void load_GL_ARB_matrix_palette(GLADloadproc load) { + if(!GLAD_GL_ARB_matrix_palette) return; + glad_glCurrentPaletteMatrixARB = (PFNGLCURRENTPALETTEMATRIXARBPROC)load("glCurrentPaletteMatrixARB"); + glad_glMatrixIndexubvARB = (PFNGLMATRIXINDEXUBVARBPROC)load("glMatrixIndexubvARB"); + glad_glMatrixIndexusvARB = (PFNGLMATRIXINDEXUSVARBPROC)load("glMatrixIndexusvARB"); + glad_glMatrixIndexuivARB = (PFNGLMATRIXINDEXUIVARBPROC)load("glMatrixIndexuivARB"); + glad_glMatrixIndexPointerARB = (PFNGLMATRIXINDEXPOINTERARBPROC)load("glMatrixIndexPointerARB"); +} +static void load_GL_ARB_multi_bind(GLADloadproc load) { + if(!GLAD_GL_ARB_multi_bind) return; + glad_glBindBuffersBase = (PFNGLBINDBUFFERSBASEPROC)load("glBindBuffersBase"); + glad_glBindBuffersRange = (PFNGLBINDBUFFERSRANGEPROC)load("glBindBuffersRange"); + glad_glBindTextures = (PFNGLBINDTEXTURESPROC)load("glBindTextures"); + glad_glBindSamplers = (PFNGLBINDSAMPLERSPROC)load("glBindSamplers"); + glad_glBindImageTextures = (PFNGLBINDIMAGETEXTURESPROC)load("glBindImageTextures"); + glad_glBindVertexBuffers = (PFNGLBINDVERTEXBUFFERSPROC)load("glBindVertexBuffers"); +} +static void load_GL_ARB_multi_draw_indirect(GLADloadproc load) { + if(!GLAD_GL_ARB_multi_draw_indirect) return; + glad_glMultiDrawArraysIndirect = (PFNGLMULTIDRAWARRAYSINDIRECTPROC)load("glMultiDrawArraysIndirect"); + glad_glMultiDrawElementsIndirect = (PFNGLMULTIDRAWELEMENTSINDIRECTPROC)load("glMultiDrawElementsIndirect"); +} +static void load_GL_ARB_multisample(GLADloadproc load) { + if(!GLAD_GL_ARB_multisample) return; + glad_glSampleCoverageARB = (PFNGLSAMPLECOVERAGEARBPROC)load("glSampleCoverageARB"); +} +static void load_GL_ARB_multitexture(GLADloadproc load) { + if(!GLAD_GL_ARB_multitexture) return; + glad_glActiveTextureARB = (PFNGLACTIVETEXTUREARBPROC)load("glActiveTextureARB"); + glad_glClientActiveTextureARB = (PFNGLCLIENTACTIVETEXTUREARBPROC)load("glClientActiveTextureARB"); + glad_glMultiTexCoord1dARB = (PFNGLMULTITEXCOORD1DARBPROC)load("glMultiTexCoord1dARB"); + glad_glMultiTexCoord1dvARB = (PFNGLMULTITEXCOORD1DVARBPROC)load("glMultiTexCoord1dvARB"); + glad_glMultiTexCoord1fARB = (PFNGLMULTITEXCOORD1FARBPROC)load("glMultiTexCoord1fARB"); + glad_glMultiTexCoord1fvARB = (PFNGLMULTITEXCOORD1FVARBPROC)load("glMultiTexCoord1fvARB"); + glad_glMultiTexCoord1iARB = (PFNGLMULTITEXCOORD1IARBPROC)load("glMultiTexCoord1iARB"); + glad_glMultiTexCoord1ivARB = (PFNGLMULTITEXCOORD1IVARBPROC)load("glMultiTexCoord1ivARB"); + glad_glMultiTexCoord1sARB = (PFNGLMULTITEXCOORD1SARBPROC)load("glMultiTexCoord1sARB"); + glad_glMultiTexCoord1svARB = (PFNGLMULTITEXCOORD1SVARBPROC)load("glMultiTexCoord1svARB"); + glad_glMultiTexCoord2dARB = (PFNGLMULTITEXCOORD2DARBPROC)load("glMultiTexCoord2dARB"); + glad_glMultiTexCoord2dvARB = (PFNGLMULTITEXCOORD2DVARBPROC)load("glMultiTexCoord2dvARB"); + glad_glMultiTexCoord2fARB = (PFNGLMULTITEXCOORD2FARBPROC)load("glMultiTexCoord2fARB"); + glad_glMultiTexCoord2fvARB = (PFNGLMULTITEXCOORD2FVARBPROC)load("glMultiTexCoord2fvARB"); + glad_glMultiTexCoord2iARB = (PFNGLMULTITEXCOORD2IARBPROC)load("glMultiTexCoord2iARB"); + glad_glMultiTexCoord2ivARB = (PFNGLMULTITEXCOORD2IVARBPROC)load("glMultiTexCoord2ivARB"); + glad_glMultiTexCoord2sARB = (PFNGLMULTITEXCOORD2SARBPROC)load("glMultiTexCoord2sARB"); + glad_glMultiTexCoord2svARB = (PFNGLMULTITEXCOORD2SVARBPROC)load("glMultiTexCoord2svARB"); + glad_glMultiTexCoord3dARB = (PFNGLMULTITEXCOORD3DARBPROC)load("glMultiTexCoord3dARB"); + glad_glMultiTexCoord3dvARB = (PFNGLMULTITEXCOORD3DVARBPROC)load("glMultiTexCoord3dvARB"); + glad_glMultiTexCoord3fARB = (PFNGLMULTITEXCOORD3FARBPROC)load("glMultiTexCoord3fARB"); + glad_glMultiTexCoord3fvARB = (PFNGLMULTITEXCOORD3FVARBPROC)load("glMultiTexCoord3fvARB"); + glad_glMultiTexCoord3iARB = (PFNGLMULTITEXCOORD3IARBPROC)load("glMultiTexCoord3iARB"); + glad_glMultiTexCoord3ivARB = (PFNGLMULTITEXCOORD3IVARBPROC)load("glMultiTexCoord3ivARB"); + glad_glMultiTexCoord3sARB = (PFNGLMULTITEXCOORD3SARBPROC)load("glMultiTexCoord3sARB"); + glad_glMultiTexCoord3svARB = (PFNGLMULTITEXCOORD3SVARBPROC)load("glMultiTexCoord3svARB"); + glad_glMultiTexCoord4dARB = (PFNGLMULTITEXCOORD4DARBPROC)load("glMultiTexCoord4dARB"); + glad_glMultiTexCoord4dvARB = (PFNGLMULTITEXCOORD4DVARBPROC)load("glMultiTexCoord4dvARB"); + glad_glMultiTexCoord4fARB = (PFNGLMULTITEXCOORD4FARBPROC)load("glMultiTexCoord4fARB"); + glad_glMultiTexCoord4fvARB = (PFNGLMULTITEXCOORD4FVARBPROC)load("glMultiTexCoord4fvARB"); + glad_glMultiTexCoord4iARB = (PFNGLMULTITEXCOORD4IARBPROC)load("glMultiTexCoord4iARB"); + glad_glMultiTexCoord4ivARB = (PFNGLMULTITEXCOORD4IVARBPROC)load("glMultiTexCoord4ivARB"); + glad_glMultiTexCoord4sARB = (PFNGLMULTITEXCOORD4SARBPROC)load("glMultiTexCoord4sARB"); + glad_glMultiTexCoord4svARB = (PFNGLMULTITEXCOORD4SVARBPROC)load("glMultiTexCoord4svARB"); +} +static void load_GL_ARB_occlusion_query(GLADloadproc load) { + if(!GLAD_GL_ARB_occlusion_query) return; + glad_glGenQueriesARB = (PFNGLGENQUERIESARBPROC)load("glGenQueriesARB"); + glad_glDeleteQueriesARB = (PFNGLDELETEQUERIESARBPROC)load("glDeleteQueriesARB"); + glad_glIsQueryARB = (PFNGLISQUERYARBPROC)load("glIsQueryARB"); + glad_glBeginQueryARB = (PFNGLBEGINQUERYARBPROC)load("glBeginQueryARB"); + glad_glEndQueryARB = (PFNGLENDQUERYARBPROC)load("glEndQueryARB"); + glad_glGetQueryivARB = (PFNGLGETQUERYIVARBPROC)load("glGetQueryivARB"); + glad_glGetQueryObjectivARB = (PFNGLGETQUERYOBJECTIVARBPROC)load("glGetQueryObjectivARB"); + glad_glGetQueryObjectuivARB = (PFNGLGETQUERYOBJECTUIVARBPROC)load("glGetQueryObjectuivARB"); +} +static void load_GL_ARB_parallel_shader_compile(GLADloadproc load) { + if(!GLAD_GL_ARB_parallel_shader_compile) return; + glad_glMaxShaderCompilerThreadsARB = (PFNGLMAXSHADERCOMPILERTHREADSARBPROC)load("glMaxShaderCompilerThreadsARB"); +} +static void load_GL_ARB_point_parameters(GLADloadproc load) { + if(!GLAD_GL_ARB_point_parameters) return; + glad_glPointParameterfARB = (PFNGLPOINTPARAMETERFARBPROC)load("glPointParameterfARB"); + glad_glPointParameterfvARB = (PFNGLPOINTPARAMETERFVARBPROC)load("glPointParameterfvARB"); +} +static void load_GL_ARB_polygon_offset_clamp(GLADloadproc load) { + if(!GLAD_GL_ARB_polygon_offset_clamp) return; + glad_glPolygonOffsetClamp = (PFNGLPOLYGONOFFSETCLAMPPROC)load("glPolygonOffsetClamp"); +} +static void load_GL_ARB_program_interface_query(GLADloadproc load) { + if(!GLAD_GL_ARB_program_interface_query) return; + glad_glGetProgramInterfaceiv = (PFNGLGETPROGRAMINTERFACEIVPROC)load("glGetProgramInterfaceiv"); + glad_glGetProgramResourceIndex = (PFNGLGETPROGRAMRESOURCEINDEXPROC)load("glGetProgramResourceIndex"); + glad_glGetProgramResourceName = (PFNGLGETPROGRAMRESOURCENAMEPROC)load("glGetProgramResourceName"); + glad_glGetProgramResourceiv = (PFNGLGETPROGRAMRESOURCEIVPROC)load("glGetProgramResourceiv"); + glad_glGetProgramResourceLocation = (PFNGLGETPROGRAMRESOURCELOCATIONPROC)load("glGetProgramResourceLocation"); + glad_glGetProgramResourceLocationIndex = (PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC)load("glGetProgramResourceLocationIndex"); +} +static void load_GL_ARB_provoking_vertex(GLADloadproc load) { + if(!GLAD_GL_ARB_provoking_vertex) return; + glad_glProvokingVertex = (PFNGLPROVOKINGVERTEXPROC)load("glProvokingVertex"); +} +static void load_GL_ARB_robustness(GLADloadproc load) { + if(!GLAD_GL_ARB_robustness) return; + glad_glGetGraphicsResetStatusARB = (PFNGLGETGRAPHICSRESETSTATUSARBPROC)load("glGetGraphicsResetStatusARB"); + glad_glGetnTexImageARB = (PFNGLGETNTEXIMAGEARBPROC)load("glGetnTexImageARB"); + glad_glReadnPixelsARB = (PFNGLREADNPIXELSARBPROC)load("glReadnPixelsARB"); + glad_glGetnCompressedTexImageARB = (PFNGLGETNCOMPRESSEDTEXIMAGEARBPROC)load("glGetnCompressedTexImageARB"); + glad_glGetnUniformfvARB = (PFNGLGETNUNIFORMFVARBPROC)load("glGetnUniformfvARB"); + glad_glGetnUniformivARB = (PFNGLGETNUNIFORMIVARBPROC)load("glGetnUniformivARB"); + glad_glGetnUniformuivARB = (PFNGLGETNUNIFORMUIVARBPROC)load("glGetnUniformuivARB"); + glad_glGetnUniformdvARB = (PFNGLGETNUNIFORMDVARBPROC)load("glGetnUniformdvARB"); + glad_glGetnMapdvARB = (PFNGLGETNMAPDVARBPROC)load("glGetnMapdvARB"); + glad_glGetnMapfvARB = (PFNGLGETNMAPFVARBPROC)load("glGetnMapfvARB"); + glad_glGetnMapivARB = (PFNGLGETNMAPIVARBPROC)load("glGetnMapivARB"); + glad_glGetnPixelMapfvARB = (PFNGLGETNPIXELMAPFVARBPROC)load("glGetnPixelMapfvARB"); + glad_glGetnPixelMapuivARB = (PFNGLGETNPIXELMAPUIVARBPROC)load("glGetnPixelMapuivARB"); + glad_glGetnPixelMapusvARB = (PFNGLGETNPIXELMAPUSVARBPROC)load("glGetnPixelMapusvARB"); + glad_glGetnPolygonStippleARB = (PFNGLGETNPOLYGONSTIPPLEARBPROC)load("glGetnPolygonStippleARB"); + glad_glGetnColorTableARB = (PFNGLGETNCOLORTABLEARBPROC)load("glGetnColorTableARB"); + glad_glGetnConvolutionFilterARB = (PFNGLGETNCONVOLUTIONFILTERARBPROC)load("glGetnConvolutionFilterARB"); + glad_glGetnSeparableFilterARB = (PFNGLGETNSEPARABLEFILTERARBPROC)load("glGetnSeparableFilterARB"); + glad_glGetnHistogramARB = (PFNGLGETNHISTOGRAMARBPROC)load("glGetnHistogramARB"); + glad_glGetnMinmaxARB = (PFNGLGETNMINMAXARBPROC)load("glGetnMinmaxARB"); +} +static void load_GL_ARB_sample_locations(GLADloadproc load) { + if(!GLAD_GL_ARB_sample_locations) return; + glad_glFramebufferSampleLocationsfvARB = (PFNGLFRAMEBUFFERSAMPLELOCATIONSFVARBPROC)load("glFramebufferSampleLocationsfvARB"); + glad_glNamedFramebufferSampleLocationsfvARB = (PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVARBPROC)load("glNamedFramebufferSampleLocationsfvARB"); + glad_glEvaluateDepthValuesARB = (PFNGLEVALUATEDEPTHVALUESARBPROC)load("glEvaluateDepthValuesARB"); +} +static void load_GL_ARB_sample_shading(GLADloadproc load) { + if(!GLAD_GL_ARB_sample_shading) return; + glad_glMinSampleShadingARB = (PFNGLMINSAMPLESHADINGARBPROC)load("glMinSampleShadingARB"); +} +static void load_GL_ARB_sampler_objects(GLADloadproc load) { + if(!GLAD_GL_ARB_sampler_objects) return; + glad_glGenSamplers = (PFNGLGENSAMPLERSPROC)load("glGenSamplers"); + glad_glDeleteSamplers = (PFNGLDELETESAMPLERSPROC)load("glDeleteSamplers"); + glad_glIsSampler = (PFNGLISSAMPLERPROC)load("glIsSampler"); + glad_glBindSampler = (PFNGLBINDSAMPLERPROC)load("glBindSampler"); + glad_glSamplerParameteri = (PFNGLSAMPLERPARAMETERIPROC)load("glSamplerParameteri"); + glad_glSamplerParameteriv = (PFNGLSAMPLERPARAMETERIVPROC)load("glSamplerParameteriv"); + glad_glSamplerParameterf = (PFNGLSAMPLERPARAMETERFPROC)load("glSamplerParameterf"); + glad_glSamplerParameterfv = (PFNGLSAMPLERPARAMETERFVPROC)load("glSamplerParameterfv"); + glad_glSamplerParameterIiv = (PFNGLSAMPLERPARAMETERIIVPROC)load("glSamplerParameterIiv"); + glad_glSamplerParameterIuiv = (PFNGLSAMPLERPARAMETERIUIVPROC)load("glSamplerParameterIuiv"); + glad_glGetSamplerParameteriv = (PFNGLGETSAMPLERPARAMETERIVPROC)load("glGetSamplerParameteriv"); + glad_glGetSamplerParameterIiv = (PFNGLGETSAMPLERPARAMETERIIVPROC)load("glGetSamplerParameterIiv"); + glad_glGetSamplerParameterfv = (PFNGLGETSAMPLERPARAMETERFVPROC)load("glGetSamplerParameterfv"); + glad_glGetSamplerParameterIuiv = (PFNGLGETSAMPLERPARAMETERIUIVPROC)load("glGetSamplerParameterIuiv"); +} +static void load_GL_ARB_separate_shader_objects(GLADloadproc load) { + if(!GLAD_GL_ARB_separate_shader_objects) return; + glad_glUseProgramStages = (PFNGLUSEPROGRAMSTAGESPROC)load("glUseProgramStages"); + glad_glActiveShaderProgram = (PFNGLACTIVESHADERPROGRAMPROC)load("glActiveShaderProgram"); + glad_glCreateShaderProgramv = (PFNGLCREATESHADERPROGRAMVPROC)load("glCreateShaderProgramv"); + glad_glBindProgramPipeline = (PFNGLBINDPROGRAMPIPELINEPROC)load("glBindProgramPipeline"); + glad_glDeleteProgramPipelines = (PFNGLDELETEPROGRAMPIPELINESPROC)load("glDeleteProgramPipelines"); + glad_glGenProgramPipelines = (PFNGLGENPROGRAMPIPELINESPROC)load("glGenProgramPipelines"); + glad_glIsProgramPipeline = (PFNGLISPROGRAMPIPELINEPROC)load("glIsProgramPipeline"); + glad_glGetProgramPipelineiv = (PFNGLGETPROGRAMPIPELINEIVPROC)load("glGetProgramPipelineiv"); + glad_glProgramParameteri = (PFNGLPROGRAMPARAMETERIPROC)load("glProgramParameteri"); + glad_glProgramUniform1i = (PFNGLPROGRAMUNIFORM1IPROC)load("glProgramUniform1i"); + glad_glProgramUniform1iv = (PFNGLPROGRAMUNIFORM1IVPROC)load("glProgramUniform1iv"); + glad_glProgramUniform1f = (PFNGLPROGRAMUNIFORM1FPROC)load("glProgramUniform1f"); + glad_glProgramUniform1fv = (PFNGLPROGRAMUNIFORM1FVPROC)load("glProgramUniform1fv"); + glad_glProgramUniform1d = (PFNGLPROGRAMUNIFORM1DPROC)load("glProgramUniform1d"); + glad_glProgramUniform1dv = (PFNGLPROGRAMUNIFORM1DVPROC)load("glProgramUniform1dv"); + glad_glProgramUniform1ui = (PFNGLPROGRAMUNIFORM1UIPROC)load("glProgramUniform1ui"); + glad_glProgramUniform1uiv = (PFNGLPROGRAMUNIFORM1UIVPROC)load("glProgramUniform1uiv"); + glad_glProgramUniform2i = (PFNGLPROGRAMUNIFORM2IPROC)load("glProgramUniform2i"); + glad_glProgramUniform2iv = (PFNGLPROGRAMUNIFORM2IVPROC)load("glProgramUniform2iv"); + glad_glProgramUniform2f = (PFNGLPROGRAMUNIFORM2FPROC)load("glProgramUniform2f"); + glad_glProgramUniform2fv = (PFNGLPROGRAMUNIFORM2FVPROC)load("glProgramUniform2fv"); + glad_glProgramUniform2d = (PFNGLPROGRAMUNIFORM2DPROC)load("glProgramUniform2d"); + glad_glProgramUniform2dv = (PFNGLPROGRAMUNIFORM2DVPROC)load("glProgramUniform2dv"); + glad_glProgramUniform2ui = (PFNGLPROGRAMUNIFORM2UIPROC)load("glProgramUniform2ui"); + glad_glProgramUniform2uiv = (PFNGLPROGRAMUNIFORM2UIVPROC)load("glProgramUniform2uiv"); + glad_glProgramUniform3i = (PFNGLPROGRAMUNIFORM3IPROC)load("glProgramUniform3i"); + glad_glProgramUniform3iv = (PFNGLPROGRAMUNIFORM3IVPROC)load("glProgramUniform3iv"); + glad_glProgramUniform3f = (PFNGLPROGRAMUNIFORM3FPROC)load("glProgramUniform3f"); + glad_glProgramUniform3fv = (PFNGLPROGRAMUNIFORM3FVPROC)load("glProgramUniform3fv"); + glad_glProgramUniform3d = (PFNGLPROGRAMUNIFORM3DPROC)load("glProgramUniform3d"); + glad_glProgramUniform3dv = (PFNGLPROGRAMUNIFORM3DVPROC)load("glProgramUniform3dv"); + glad_glProgramUniform3ui = (PFNGLPROGRAMUNIFORM3UIPROC)load("glProgramUniform3ui"); + glad_glProgramUniform3uiv = (PFNGLPROGRAMUNIFORM3UIVPROC)load("glProgramUniform3uiv"); + glad_glProgramUniform4i = (PFNGLPROGRAMUNIFORM4IPROC)load("glProgramUniform4i"); + glad_glProgramUniform4iv = (PFNGLPROGRAMUNIFORM4IVPROC)load("glProgramUniform4iv"); + glad_glProgramUniform4f = (PFNGLPROGRAMUNIFORM4FPROC)load("glProgramUniform4f"); + glad_glProgramUniform4fv = (PFNGLPROGRAMUNIFORM4FVPROC)load("glProgramUniform4fv"); + glad_glProgramUniform4d = (PFNGLPROGRAMUNIFORM4DPROC)load("glProgramUniform4d"); + glad_glProgramUniform4dv = (PFNGLPROGRAMUNIFORM4DVPROC)load("glProgramUniform4dv"); + glad_glProgramUniform4ui = (PFNGLPROGRAMUNIFORM4UIPROC)load("glProgramUniform4ui"); + glad_glProgramUniform4uiv = (PFNGLPROGRAMUNIFORM4UIVPROC)load("glProgramUniform4uiv"); + glad_glProgramUniformMatrix2fv = (PFNGLPROGRAMUNIFORMMATRIX2FVPROC)load("glProgramUniformMatrix2fv"); + glad_glProgramUniformMatrix3fv = (PFNGLPROGRAMUNIFORMMATRIX3FVPROC)load("glProgramUniformMatrix3fv"); + glad_glProgramUniformMatrix4fv = (PFNGLPROGRAMUNIFORMMATRIX4FVPROC)load("glProgramUniformMatrix4fv"); + glad_glProgramUniformMatrix2dv = (PFNGLPROGRAMUNIFORMMATRIX2DVPROC)load("glProgramUniformMatrix2dv"); + glad_glProgramUniformMatrix3dv = (PFNGLPROGRAMUNIFORMMATRIX3DVPROC)load("glProgramUniformMatrix3dv"); + glad_glProgramUniformMatrix4dv = (PFNGLPROGRAMUNIFORMMATRIX4DVPROC)load("glProgramUniformMatrix4dv"); + glad_glProgramUniformMatrix2x3fv = (PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC)load("glProgramUniformMatrix2x3fv"); + glad_glProgramUniformMatrix3x2fv = (PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC)load("glProgramUniformMatrix3x2fv"); + glad_glProgramUniformMatrix2x4fv = (PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC)load("glProgramUniformMatrix2x4fv"); + glad_glProgramUniformMatrix4x2fv = (PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC)load("glProgramUniformMatrix4x2fv"); + glad_glProgramUniformMatrix3x4fv = (PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC)load("glProgramUniformMatrix3x4fv"); + glad_glProgramUniformMatrix4x3fv = (PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC)load("glProgramUniformMatrix4x3fv"); + glad_glProgramUniformMatrix2x3dv = (PFNGLPROGRAMUNIFORMMATRIX2X3DVPROC)load("glProgramUniformMatrix2x3dv"); + glad_glProgramUniformMatrix3x2dv = (PFNGLPROGRAMUNIFORMMATRIX3X2DVPROC)load("glProgramUniformMatrix3x2dv"); + glad_glProgramUniformMatrix2x4dv = (PFNGLPROGRAMUNIFORMMATRIX2X4DVPROC)load("glProgramUniformMatrix2x4dv"); + glad_glProgramUniformMatrix4x2dv = (PFNGLPROGRAMUNIFORMMATRIX4X2DVPROC)load("glProgramUniformMatrix4x2dv"); + glad_glProgramUniformMatrix3x4dv = (PFNGLPROGRAMUNIFORMMATRIX3X4DVPROC)load("glProgramUniformMatrix3x4dv"); + glad_glProgramUniformMatrix4x3dv = (PFNGLPROGRAMUNIFORMMATRIX4X3DVPROC)load("glProgramUniformMatrix4x3dv"); + glad_glValidateProgramPipeline = (PFNGLVALIDATEPROGRAMPIPELINEPROC)load("glValidateProgramPipeline"); + glad_glGetProgramPipelineInfoLog = (PFNGLGETPROGRAMPIPELINEINFOLOGPROC)load("glGetProgramPipelineInfoLog"); +} +static void load_GL_ARB_shader_atomic_counters(GLADloadproc load) { + if(!GLAD_GL_ARB_shader_atomic_counters) return; + glad_glGetActiveAtomicCounterBufferiv = (PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC)load("glGetActiveAtomicCounterBufferiv"); +} +static void load_GL_ARB_shader_image_load_store(GLADloadproc load) { + if(!GLAD_GL_ARB_shader_image_load_store) return; + glad_glBindImageTexture = (PFNGLBINDIMAGETEXTUREPROC)load("glBindImageTexture"); + glad_glMemoryBarrier = (PFNGLMEMORYBARRIERPROC)load("glMemoryBarrier"); +} +static void load_GL_ARB_shader_objects(GLADloadproc load) { + if(!GLAD_GL_ARB_shader_objects) return; + glad_glDeleteObjectARB = (PFNGLDELETEOBJECTARBPROC)load("glDeleteObjectARB"); + glad_glGetHandleARB = (PFNGLGETHANDLEARBPROC)load("glGetHandleARB"); + glad_glDetachObjectARB = (PFNGLDETACHOBJECTARBPROC)load("glDetachObjectARB"); + glad_glCreateShaderObjectARB = (PFNGLCREATESHADEROBJECTARBPROC)load("glCreateShaderObjectARB"); + glad_glShaderSourceARB = (PFNGLSHADERSOURCEARBPROC)load("glShaderSourceARB"); + glad_glCompileShaderARB = (PFNGLCOMPILESHADERARBPROC)load("glCompileShaderARB"); + glad_glCreateProgramObjectARB = (PFNGLCREATEPROGRAMOBJECTARBPROC)load("glCreateProgramObjectARB"); + glad_glAttachObjectARB = (PFNGLATTACHOBJECTARBPROC)load("glAttachObjectARB"); + glad_glLinkProgramARB = (PFNGLLINKPROGRAMARBPROC)load("glLinkProgramARB"); + glad_glUseProgramObjectARB = (PFNGLUSEPROGRAMOBJECTARBPROC)load("glUseProgramObjectARB"); + glad_glValidateProgramARB = (PFNGLVALIDATEPROGRAMARBPROC)load("glValidateProgramARB"); + glad_glUniform1fARB = (PFNGLUNIFORM1FARBPROC)load("glUniform1fARB"); + glad_glUniform2fARB = (PFNGLUNIFORM2FARBPROC)load("glUniform2fARB"); + glad_glUniform3fARB = (PFNGLUNIFORM3FARBPROC)load("glUniform3fARB"); + glad_glUniform4fARB = (PFNGLUNIFORM4FARBPROC)load("glUniform4fARB"); + glad_glUniform1iARB = (PFNGLUNIFORM1IARBPROC)load("glUniform1iARB"); + glad_glUniform2iARB = (PFNGLUNIFORM2IARBPROC)load("glUniform2iARB"); + glad_glUniform3iARB = (PFNGLUNIFORM3IARBPROC)load("glUniform3iARB"); + glad_glUniform4iARB = (PFNGLUNIFORM4IARBPROC)load("glUniform4iARB"); + glad_glUniform1fvARB = (PFNGLUNIFORM1FVARBPROC)load("glUniform1fvARB"); + glad_glUniform2fvARB = (PFNGLUNIFORM2FVARBPROC)load("glUniform2fvARB"); + glad_glUniform3fvARB = (PFNGLUNIFORM3FVARBPROC)load("glUniform3fvARB"); + glad_glUniform4fvARB = (PFNGLUNIFORM4FVARBPROC)load("glUniform4fvARB"); + glad_glUniform1ivARB = (PFNGLUNIFORM1IVARBPROC)load("glUniform1ivARB"); + glad_glUniform2ivARB = (PFNGLUNIFORM2IVARBPROC)load("glUniform2ivARB"); + glad_glUniform3ivARB = (PFNGLUNIFORM3IVARBPROC)load("glUniform3ivARB"); + glad_glUniform4ivARB = (PFNGLUNIFORM4IVARBPROC)load("glUniform4ivARB"); + glad_glUniformMatrix2fvARB = (PFNGLUNIFORMMATRIX2FVARBPROC)load("glUniformMatrix2fvARB"); + glad_glUniformMatrix3fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC)load("glUniformMatrix3fvARB"); + glad_glUniformMatrix4fvARB = (PFNGLUNIFORMMATRIX4FVARBPROC)load("glUniformMatrix4fvARB"); + glad_glGetObjectParameterfvARB = (PFNGLGETOBJECTPARAMETERFVARBPROC)load("glGetObjectParameterfvARB"); + glad_glGetObjectParameterivARB = (PFNGLGETOBJECTPARAMETERIVARBPROC)load("glGetObjectParameterivARB"); + glad_glGetInfoLogARB = (PFNGLGETINFOLOGARBPROC)load("glGetInfoLogARB"); + glad_glGetAttachedObjectsARB = (PFNGLGETATTACHEDOBJECTSARBPROC)load("glGetAttachedObjectsARB"); + glad_glGetUniformLocationARB = (PFNGLGETUNIFORMLOCATIONARBPROC)load("glGetUniformLocationARB"); + glad_glGetActiveUniformARB = (PFNGLGETACTIVEUNIFORMARBPROC)load("glGetActiveUniformARB"); + glad_glGetUniformfvARB = (PFNGLGETUNIFORMFVARBPROC)load("glGetUniformfvARB"); + glad_glGetUniformivARB = (PFNGLGETUNIFORMIVARBPROC)load("glGetUniformivARB"); + glad_glGetShaderSourceARB = (PFNGLGETSHADERSOURCEARBPROC)load("glGetShaderSourceARB"); +} +static void load_GL_ARB_shader_storage_buffer_object(GLADloadproc load) { + if(!GLAD_GL_ARB_shader_storage_buffer_object) return; + glad_glShaderStorageBlockBinding = (PFNGLSHADERSTORAGEBLOCKBINDINGPROC)load("glShaderStorageBlockBinding"); +} +static void load_GL_ARB_shader_subroutine(GLADloadproc load) { + if(!GLAD_GL_ARB_shader_subroutine) return; + glad_glGetSubroutineUniformLocation = (PFNGLGETSUBROUTINEUNIFORMLOCATIONPROC)load("glGetSubroutineUniformLocation"); + glad_glGetSubroutineIndex = (PFNGLGETSUBROUTINEINDEXPROC)load("glGetSubroutineIndex"); + glad_glGetActiveSubroutineUniformiv = (PFNGLGETACTIVESUBROUTINEUNIFORMIVPROC)load("glGetActiveSubroutineUniformiv"); + glad_glGetActiveSubroutineUniformName = (PFNGLGETACTIVESUBROUTINEUNIFORMNAMEPROC)load("glGetActiveSubroutineUniformName"); + glad_glGetActiveSubroutineName = (PFNGLGETACTIVESUBROUTINENAMEPROC)load("glGetActiveSubroutineName"); + glad_glUniformSubroutinesuiv = (PFNGLUNIFORMSUBROUTINESUIVPROC)load("glUniformSubroutinesuiv"); + glad_glGetUniformSubroutineuiv = (PFNGLGETUNIFORMSUBROUTINEUIVPROC)load("glGetUniformSubroutineuiv"); + glad_glGetProgramStageiv = (PFNGLGETPROGRAMSTAGEIVPROC)load("glGetProgramStageiv"); +} +static void load_GL_ARB_shading_language_include(GLADloadproc load) { + if(!GLAD_GL_ARB_shading_language_include) return; + glad_glNamedStringARB = (PFNGLNAMEDSTRINGARBPROC)load("glNamedStringARB"); + glad_glDeleteNamedStringARB = (PFNGLDELETENAMEDSTRINGARBPROC)load("glDeleteNamedStringARB"); + glad_glCompileShaderIncludeARB = (PFNGLCOMPILESHADERINCLUDEARBPROC)load("glCompileShaderIncludeARB"); + glad_glIsNamedStringARB = (PFNGLISNAMEDSTRINGARBPROC)load("glIsNamedStringARB"); + glad_glGetNamedStringARB = (PFNGLGETNAMEDSTRINGARBPROC)load("glGetNamedStringARB"); + glad_glGetNamedStringivARB = (PFNGLGETNAMEDSTRINGIVARBPROC)load("glGetNamedStringivARB"); +} +static void load_GL_ARB_sparse_buffer(GLADloadproc load) { + if(!GLAD_GL_ARB_sparse_buffer) return; + glad_glBufferPageCommitmentARB = (PFNGLBUFFERPAGECOMMITMENTARBPROC)load("glBufferPageCommitmentARB"); + glad_glNamedBufferPageCommitmentEXT = (PFNGLNAMEDBUFFERPAGECOMMITMENTEXTPROC)load("glNamedBufferPageCommitmentEXT"); + glad_glNamedBufferPageCommitmentARB = (PFNGLNAMEDBUFFERPAGECOMMITMENTARBPROC)load("glNamedBufferPageCommitmentARB"); +} +static void load_GL_ARB_sparse_texture(GLADloadproc load) { + if(!GLAD_GL_ARB_sparse_texture) return; + glad_glTexPageCommitmentARB = (PFNGLTEXPAGECOMMITMENTARBPROC)load("glTexPageCommitmentARB"); +} +static void load_GL_ARB_sync(GLADloadproc load) { + if(!GLAD_GL_ARB_sync) return; + glad_glFenceSync = (PFNGLFENCESYNCPROC)load("glFenceSync"); + glad_glIsSync = (PFNGLISSYNCPROC)load("glIsSync"); + glad_glDeleteSync = (PFNGLDELETESYNCPROC)load("glDeleteSync"); + glad_glClientWaitSync = (PFNGLCLIENTWAITSYNCPROC)load("glClientWaitSync"); + glad_glWaitSync = (PFNGLWAITSYNCPROC)load("glWaitSync"); + glad_glGetInteger64v = (PFNGLGETINTEGER64VPROC)load("glGetInteger64v"); + glad_glGetSynciv = (PFNGLGETSYNCIVPROC)load("glGetSynciv"); +} +static void load_GL_ARB_tessellation_shader(GLADloadproc load) { + if(!GLAD_GL_ARB_tessellation_shader) return; + glad_glPatchParameteri = (PFNGLPATCHPARAMETERIPROC)load("glPatchParameteri"); + glad_glPatchParameterfv = (PFNGLPATCHPARAMETERFVPROC)load("glPatchParameterfv"); +} +static void load_GL_ARB_texture_barrier(GLADloadproc load) { + if(!GLAD_GL_ARB_texture_barrier) return; + glad_glTextureBarrier = (PFNGLTEXTUREBARRIERPROC)load("glTextureBarrier"); +} +static void load_GL_ARB_texture_buffer_object(GLADloadproc load) { + if(!GLAD_GL_ARB_texture_buffer_object) return; + glad_glTexBufferARB = (PFNGLTEXBUFFERARBPROC)load("glTexBufferARB"); +} +static void load_GL_ARB_texture_buffer_range(GLADloadproc load) { + if(!GLAD_GL_ARB_texture_buffer_range) return; + glad_glTexBufferRange = (PFNGLTEXBUFFERRANGEPROC)load("glTexBufferRange"); +} +static void load_GL_ARB_texture_compression(GLADloadproc load) { + if(!GLAD_GL_ARB_texture_compression) return; + glad_glCompressedTexImage3DARB = (PFNGLCOMPRESSEDTEXIMAGE3DARBPROC)load("glCompressedTexImage3DARB"); + glad_glCompressedTexImage2DARB = (PFNGLCOMPRESSEDTEXIMAGE2DARBPROC)load("glCompressedTexImage2DARB"); + glad_glCompressedTexImage1DARB = (PFNGLCOMPRESSEDTEXIMAGE1DARBPROC)load("glCompressedTexImage1DARB"); + glad_glCompressedTexSubImage3DARB = (PFNGLCOMPRESSEDTEXSUBIMAGE3DARBPROC)load("glCompressedTexSubImage3DARB"); + glad_glCompressedTexSubImage2DARB = (PFNGLCOMPRESSEDTEXSUBIMAGE2DARBPROC)load("glCompressedTexSubImage2DARB"); + glad_glCompressedTexSubImage1DARB = (PFNGLCOMPRESSEDTEXSUBIMAGE1DARBPROC)load("glCompressedTexSubImage1DARB"); + glad_glGetCompressedTexImageARB = (PFNGLGETCOMPRESSEDTEXIMAGEARBPROC)load("glGetCompressedTexImageARB"); +} +static void load_GL_ARB_texture_multisample(GLADloadproc load) { + if(!GLAD_GL_ARB_texture_multisample) return; + glad_glTexImage2DMultisample = (PFNGLTEXIMAGE2DMULTISAMPLEPROC)load("glTexImage2DMultisample"); + glad_glTexImage3DMultisample = (PFNGLTEXIMAGE3DMULTISAMPLEPROC)load("glTexImage3DMultisample"); + glad_glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC)load("glGetMultisamplefv"); + glad_glSampleMaski = (PFNGLSAMPLEMASKIPROC)load("glSampleMaski"); +} +static void load_GL_ARB_texture_storage(GLADloadproc load) { + if(!GLAD_GL_ARB_texture_storage) return; + glad_glTexStorage1D = (PFNGLTEXSTORAGE1DPROC)load("glTexStorage1D"); + glad_glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)load("glTexStorage2D"); + glad_glTexStorage3D = (PFNGLTEXSTORAGE3DPROC)load("glTexStorage3D"); +} +static void load_GL_ARB_texture_storage_multisample(GLADloadproc load) { + if(!GLAD_GL_ARB_texture_storage_multisample) return; + glad_glTexStorage2DMultisample = (PFNGLTEXSTORAGE2DMULTISAMPLEPROC)load("glTexStorage2DMultisample"); + glad_glTexStorage3DMultisample = (PFNGLTEXSTORAGE3DMULTISAMPLEPROC)load("glTexStorage3DMultisample"); +} +static void load_GL_ARB_texture_view(GLADloadproc load) { + if(!GLAD_GL_ARB_texture_view) return; + glad_glTextureView = (PFNGLTEXTUREVIEWPROC)load("glTextureView"); +} +static void load_GL_ARB_timer_query(GLADloadproc load) { + if(!GLAD_GL_ARB_timer_query) return; + glad_glQueryCounter = (PFNGLQUERYCOUNTERPROC)load("glQueryCounter"); + glad_glGetQueryObjecti64v = (PFNGLGETQUERYOBJECTI64VPROC)load("glGetQueryObjecti64v"); + glad_glGetQueryObjectui64v = (PFNGLGETQUERYOBJECTUI64VPROC)load("glGetQueryObjectui64v"); +} +static void load_GL_ARB_transform_feedback2(GLADloadproc load) { + if(!GLAD_GL_ARB_transform_feedback2) return; + glad_glBindTransformFeedback = (PFNGLBINDTRANSFORMFEEDBACKPROC)load("glBindTransformFeedback"); + glad_glDeleteTransformFeedbacks = (PFNGLDELETETRANSFORMFEEDBACKSPROC)load("glDeleteTransformFeedbacks"); + glad_glGenTransformFeedbacks = (PFNGLGENTRANSFORMFEEDBACKSPROC)load("glGenTransformFeedbacks"); + glad_glIsTransformFeedback = (PFNGLISTRANSFORMFEEDBACKPROC)load("glIsTransformFeedback"); + glad_glPauseTransformFeedback = (PFNGLPAUSETRANSFORMFEEDBACKPROC)load("glPauseTransformFeedback"); + glad_glResumeTransformFeedback = (PFNGLRESUMETRANSFORMFEEDBACKPROC)load("glResumeTransformFeedback"); + glad_glDrawTransformFeedback = (PFNGLDRAWTRANSFORMFEEDBACKPROC)load("glDrawTransformFeedback"); +} +static void load_GL_ARB_transform_feedback3(GLADloadproc load) { + if(!GLAD_GL_ARB_transform_feedback3) return; + glad_glDrawTransformFeedbackStream = (PFNGLDRAWTRANSFORMFEEDBACKSTREAMPROC)load("glDrawTransformFeedbackStream"); + glad_glBeginQueryIndexed = (PFNGLBEGINQUERYINDEXEDPROC)load("glBeginQueryIndexed"); + glad_glEndQueryIndexed = (PFNGLENDQUERYINDEXEDPROC)load("glEndQueryIndexed"); + glad_glGetQueryIndexediv = (PFNGLGETQUERYINDEXEDIVPROC)load("glGetQueryIndexediv"); +} +static void load_GL_ARB_transform_feedback_instanced(GLADloadproc load) { + if(!GLAD_GL_ARB_transform_feedback_instanced) return; + glad_glDrawTransformFeedbackInstanced = (PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC)load("glDrawTransformFeedbackInstanced"); + glad_glDrawTransformFeedbackStreamInstanced = (PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC)load("glDrawTransformFeedbackStreamInstanced"); +} +static void load_GL_ARB_transpose_matrix(GLADloadproc load) { + if(!GLAD_GL_ARB_transpose_matrix) return; + glad_glLoadTransposeMatrixfARB = (PFNGLLOADTRANSPOSEMATRIXFARBPROC)load("glLoadTransposeMatrixfARB"); + glad_glLoadTransposeMatrixdARB = (PFNGLLOADTRANSPOSEMATRIXDARBPROC)load("glLoadTransposeMatrixdARB"); + glad_glMultTransposeMatrixfARB = (PFNGLMULTTRANSPOSEMATRIXFARBPROC)load("glMultTransposeMatrixfARB"); + glad_glMultTransposeMatrixdARB = (PFNGLMULTTRANSPOSEMATRIXDARBPROC)load("glMultTransposeMatrixdARB"); +} +static void load_GL_ARB_uniform_buffer_object(GLADloadproc load) { + if(!GLAD_GL_ARB_uniform_buffer_object) return; + glad_glGetUniformIndices = (PFNGLGETUNIFORMINDICESPROC)load("glGetUniformIndices"); + glad_glGetActiveUniformsiv = (PFNGLGETACTIVEUNIFORMSIVPROC)load("glGetActiveUniformsiv"); + glad_glGetActiveUniformName = (PFNGLGETACTIVEUNIFORMNAMEPROC)load("glGetActiveUniformName"); + glad_glGetUniformBlockIndex = (PFNGLGETUNIFORMBLOCKINDEXPROC)load("glGetUniformBlockIndex"); + glad_glGetActiveUniformBlockiv = (PFNGLGETACTIVEUNIFORMBLOCKIVPROC)load("glGetActiveUniformBlockiv"); + glad_glGetActiveUniformBlockName = (PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)load("glGetActiveUniformBlockName"); + glad_glUniformBlockBinding = (PFNGLUNIFORMBLOCKBINDINGPROC)load("glUniformBlockBinding"); + glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC)load("glBindBufferRange"); + glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC)load("glBindBufferBase"); + glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC)load("glGetIntegeri_v"); +} +static void load_GL_ARB_vertex_array_object(GLADloadproc load) { + if(!GLAD_GL_ARB_vertex_array_object) return; + glad_glBindVertexArray = (PFNGLBINDVERTEXARRAYPROC)load("glBindVertexArray"); + glad_glDeleteVertexArrays = (PFNGLDELETEVERTEXARRAYSPROC)load("glDeleteVertexArrays"); + glad_glGenVertexArrays = (PFNGLGENVERTEXARRAYSPROC)load("glGenVertexArrays"); + glad_glIsVertexArray = (PFNGLISVERTEXARRAYPROC)load("glIsVertexArray"); +} +static void load_GL_ARB_vertex_attrib_64bit(GLADloadproc load) { + if(!GLAD_GL_ARB_vertex_attrib_64bit) return; + glad_glVertexAttribL1d = (PFNGLVERTEXATTRIBL1DPROC)load("glVertexAttribL1d"); + glad_glVertexAttribL2d = (PFNGLVERTEXATTRIBL2DPROC)load("glVertexAttribL2d"); + glad_glVertexAttribL3d = (PFNGLVERTEXATTRIBL3DPROC)load("glVertexAttribL3d"); + glad_glVertexAttribL4d = (PFNGLVERTEXATTRIBL4DPROC)load("glVertexAttribL4d"); + glad_glVertexAttribL1dv = (PFNGLVERTEXATTRIBL1DVPROC)load("glVertexAttribL1dv"); + glad_glVertexAttribL2dv = (PFNGLVERTEXATTRIBL2DVPROC)load("glVertexAttribL2dv"); + glad_glVertexAttribL3dv = (PFNGLVERTEXATTRIBL3DVPROC)load("glVertexAttribL3dv"); + glad_glVertexAttribL4dv = (PFNGLVERTEXATTRIBL4DVPROC)load("glVertexAttribL4dv"); + glad_glVertexAttribLPointer = (PFNGLVERTEXATTRIBLPOINTERPROC)load("glVertexAttribLPointer"); + glad_glGetVertexAttribLdv = (PFNGLGETVERTEXATTRIBLDVPROC)load("glGetVertexAttribLdv"); +} +static void load_GL_ARB_vertex_attrib_binding(GLADloadproc load) { + if(!GLAD_GL_ARB_vertex_attrib_binding) return; + glad_glBindVertexBuffer = (PFNGLBINDVERTEXBUFFERPROC)load("glBindVertexBuffer"); + glad_glVertexAttribFormat = (PFNGLVERTEXATTRIBFORMATPROC)load("glVertexAttribFormat"); + glad_glVertexAttribIFormat = (PFNGLVERTEXATTRIBIFORMATPROC)load("glVertexAttribIFormat"); + glad_glVertexAttribLFormat = (PFNGLVERTEXATTRIBLFORMATPROC)load("glVertexAttribLFormat"); + glad_glVertexAttribBinding = (PFNGLVERTEXATTRIBBINDINGPROC)load("glVertexAttribBinding"); + glad_glVertexBindingDivisor = (PFNGLVERTEXBINDINGDIVISORPROC)load("glVertexBindingDivisor"); +} +static void load_GL_ARB_vertex_blend(GLADloadproc load) { + if(!GLAD_GL_ARB_vertex_blend) return; + glad_glWeightbvARB = (PFNGLWEIGHTBVARBPROC)load("glWeightbvARB"); + glad_glWeightsvARB = (PFNGLWEIGHTSVARBPROC)load("glWeightsvARB"); + glad_glWeightivARB = (PFNGLWEIGHTIVARBPROC)load("glWeightivARB"); + glad_glWeightfvARB = (PFNGLWEIGHTFVARBPROC)load("glWeightfvARB"); + glad_glWeightdvARB = (PFNGLWEIGHTDVARBPROC)load("glWeightdvARB"); + glad_glWeightubvARB = (PFNGLWEIGHTUBVARBPROC)load("glWeightubvARB"); + glad_glWeightusvARB = (PFNGLWEIGHTUSVARBPROC)load("glWeightusvARB"); + glad_glWeightuivARB = (PFNGLWEIGHTUIVARBPROC)load("glWeightuivARB"); + glad_glWeightPointerARB = (PFNGLWEIGHTPOINTERARBPROC)load("glWeightPointerARB"); + glad_glVertexBlendARB = (PFNGLVERTEXBLENDARBPROC)load("glVertexBlendARB"); +} +static void load_GL_ARB_vertex_buffer_object(GLADloadproc load) { + if(!GLAD_GL_ARB_vertex_buffer_object) return; + glad_glBindBufferARB = (PFNGLBINDBUFFERARBPROC)load("glBindBufferARB"); + glad_glDeleteBuffersARB = (PFNGLDELETEBUFFERSARBPROC)load("glDeleteBuffersARB"); + glad_glGenBuffersARB = (PFNGLGENBUFFERSARBPROC)load("glGenBuffersARB"); + glad_glIsBufferARB = (PFNGLISBUFFERARBPROC)load("glIsBufferARB"); + glad_glBufferDataARB = (PFNGLBUFFERDATAARBPROC)load("glBufferDataARB"); + glad_glBufferSubDataARB = (PFNGLBUFFERSUBDATAARBPROC)load("glBufferSubDataARB"); + glad_glGetBufferSubDataARB = (PFNGLGETBUFFERSUBDATAARBPROC)load("glGetBufferSubDataARB"); + glad_glMapBufferARB = (PFNGLMAPBUFFERARBPROC)load("glMapBufferARB"); + glad_glUnmapBufferARB = (PFNGLUNMAPBUFFERARBPROC)load("glUnmapBufferARB"); + glad_glGetBufferParameterivARB = (PFNGLGETBUFFERPARAMETERIVARBPROC)load("glGetBufferParameterivARB"); + glad_glGetBufferPointervARB = (PFNGLGETBUFFERPOINTERVARBPROC)load("glGetBufferPointervARB"); +} +static void load_GL_ARB_vertex_program(GLADloadproc load) { + if(!GLAD_GL_ARB_vertex_program) return; + glad_glVertexAttrib1dARB = (PFNGLVERTEXATTRIB1DARBPROC)load("glVertexAttrib1dARB"); + glad_glVertexAttrib1dvARB = (PFNGLVERTEXATTRIB1DVARBPROC)load("glVertexAttrib1dvARB"); + glad_glVertexAttrib1fARB = (PFNGLVERTEXATTRIB1FARBPROC)load("glVertexAttrib1fARB"); + glad_glVertexAttrib1fvARB = (PFNGLVERTEXATTRIB1FVARBPROC)load("glVertexAttrib1fvARB"); + glad_glVertexAttrib1sARB = (PFNGLVERTEXATTRIB1SARBPROC)load("glVertexAttrib1sARB"); + glad_glVertexAttrib1svARB = (PFNGLVERTEXATTRIB1SVARBPROC)load("glVertexAttrib1svARB"); + glad_glVertexAttrib2dARB = (PFNGLVERTEXATTRIB2DARBPROC)load("glVertexAttrib2dARB"); + glad_glVertexAttrib2dvARB = (PFNGLVERTEXATTRIB2DVARBPROC)load("glVertexAttrib2dvARB"); + glad_glVertexAttrib2fARB = (PFNGLVERTEXATTRIB2FARBPROC)load("glVertexAttrib2fARB"); + glad_glVertexAttrib2fvARB = (PFNGLVERTEXATTRIB2FVARBPROC)load("glVertexAttrib2fvARB"); + glad_glVertexAttrib2sARB = (PFNGLVERTEXATTRIB2SARBPROC)load("glVertexAttrib2sARB"); + glad_glVertexAttrib2svARB = (PFNGLVERTEXATTRIB2SVARBPROC)load("glVertexAttrib2svARB"); + glad_glVertexAttrib3dARB = (PFNGLVERTEXATTRIB3DARBPROC)load("glVertexAttrib3dARB"); + glad_glVertexAttrib3dvARB = (PFNGLVERTEXATTRIB3DVARBPROC)load("glVertexAttrib3dvARB"); + glad_glVertexAttrib3fARB = (PFNGLVERTEXATTRIB3FARBPROC)load("glVertexAttrib3fARB"); + glad_glVertexAttrib3fvARB = (PFNGLVERTEXATTRIB3FVARBPROC)load("glVertexAttrib3fvARB"); + glad_glVertexAttrib3sARB = (PFNGLVERTEXATTRIB3SARBPROC)load("glVertexAttrib3sARB"); + glad_glVertexAttrib3svARB = (PFNGLVERTEXATTRIB3SVARBPROC)load("glVertexAttrib3svARB"); + glad_glVertexAttrib4NbvARB = (PFNGLVERTEXATTRIB4NBVARBPROC)load("glVertexAttrib4NbvARB"); + glad_glVertexAttrib4NivARB = (PFNGLVERTEXATTRIB4NIVARBPROC)load("glVertexAttrib4NivARB"); + glad_glVertexAttrib4NsvARB = (PFNGLVERTEXATTRIB4NSVARBPROC)load("glVertexAttrib4NsvARB"); + glad_glVertexAttrib4NubARB = (PFNGLVERTEXATTRIB4NUBARBPROC)load("glVertexAttrib4NubARB"); + glad_glVertexAttrib4NubvARB = (PFNGLVERTEXATTRIB4NUBVARBPROC)load("glVertexAttrib4NubvARB"); + glad_glVertexAttrib4NuivARB = (PFNGLVERTEXATTRIB4NUIVARBPROC)load("glVertexAttrib4NuivARB"); + glad_glVertexAttrib4NusvARB = (PFNGLVERTEXATTRIB4NUSVARBPROC)load("glVertexAttrib4NusvARB"); + glad_glVertexAttrib4bvARB = (PFNGLVERTEXATTRIB4BVARBPROC)load("glVertexAttrib4bvARB"); + glad_glVertexAttrib4dARB = (PFNGLVERTEXATTRIB4DARBPROC)load("glVertexAttrib4dARB"); + glad_glVertexAttrib4dvARB = (PFNGLVERTEXATTRIB4DVARBPROC)load("glVertexAttrib4dvARB"); + glad_glVertexAttrib4fARB = (PFNGLVERTEXATTRIB4FARBPROC)load("glVertexAttrib4fARB"); + glad_glVertexAttrib4fvARB = (PFNGLVERTEXATTRIB4FVARBPROC)load("glVertexAttrib4fvARB"); + glad_glVertexAttrib4ivARB = (PFNGLVERTEXATTRIB4IVARBPROC)load("glVertexAttrib4ivARB"); + glad_glVertexAttrib4sARB = (PFNGLVERTEXATTRIB4SARBPROC)load("glVertexAttrib4sARB"); + glad_glVertexAttrib4svARB = (PFNGLVERTEXATTRIB4SVARBPROC)load("glVertexAttrib4svARB"); + glad_glVertexAttrib4ubvARB = (PFNGLVERTEXATTRIB4UBVARBPROC)load("glVertexAttrib4ubvARB"); + glad_glVertexAttrib4uivARB = (PFNGLVERTEXATTRIB4UIVARBPROC)load("glVertexAttrib4uivARB"); + glad_glVertexAttrib4usvARB = (PFNGLVERTEXATTRIB4USVARBPROC)load("glVertexAttrib4usvARB"); + glad_glVertexAttribPointerARB = (PFNGLVERTEXATTRIBPOINTERARBPROC)load("glVertexAttribPointerARB"); + glad_glEnableVertexAttribArrayARB = (PFNGLENABLEVERTEXATTRIBARRAYARBPROC)load("glEnableVertexAttribArrayARB"); + glad_glDisableVertexAttribArrayARB = (PFNGLDISABLEVERTEXATTRIBARRAYARBPROC)load("glDisableVertexAttribArrayARB"); + glad_glProgramStringARB = (PFNGLPROGRAMSTRINGARBPROC)load("glProgramStringARB"); + glad_glBindProgramARB = (PFNGLBINDPROGRAMARBPROC)load("glBindProgramARB"); + glad_glDeleteProgramsARB = (PFNGLDELETEPROGRAMSARBPROC)load("glDeleteProgramsARB"); + glad_glGenProgramsARB = (PFNGLGENPROGRAMSARBPROC)load("glGenProgramsARB"); + glad_glProgramEnvParameter4dARB = (PFNGLPROGRAMENVPARAMETER4DARBPROC)load("glProgramEnvParameter4dARB"); + glad_glProgramEnvParameter4dvARB = (PFNGLPROGRAMENVPARAMETER4DVARBPROC)load("glProgramEnvParameter4dvARB"); + glad_glProgramEnvParameter4fARB = (PFNGLPROGRAMENVPARAMETER4FARBPROC)load("glProgramEnvParameter4fARB"); + glad_glProgramEnvParameter4fvARB = (PFNGLPROGRAMENVPARAMETER4FVARBPROC)load("glProgramEnvParameter4fvARB"); + glad_glProgramLocalParameter4dARB = (PFNGLPROGRAMLOCALPARAMETER4DARBPROC)load("glProgramLocalParameter4dARB"); + glad_glProgramLocalParameter4dvARB = (PFNGLPROGRAMLOCALPARAMETER4DVARBPROC)load("glProgramLocalParameter4dvARB"); + glad_glProgramLocalParameter4fARB = (PFNGLPROGRAMLOCALPARAMETER4FARBPROC)load("glProgramLocalParameter4fARB"); + glad_glProgramLocalParameter4fvARB = (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC)load("glProgramLocalParameter4fvARB"); + glad_glGetProgramEnvParameterdvARB = (PFNGLGETPROGRAMENVPARAMETERDVARBPROC)load("glGetProgramEnvParameterdvARB"); + glad_glGetProgramEnvParameterfvARB = (PFNGLGETPROGRAMENVPARAMETERFVARBPROC)load("glGetProgramEnvParameterfvARB"); + glad_glGetProgramLocalParameterdvARB = (PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC)load("glGetProgramLocalParameterdvARB"); + glad_glGetProgramLocalParameterfvARB = (PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC)load("glGetProgramLocalParameterfvARB"); + glad_glGetProgramivARB = (PFNGLGETPROGRAMIVARBPROC)load("glGetProgramivARB"); + glad_glGetProgramStringARB = (PFNGLGETPROGRAMSTRINGARBPROC)load("glGetProgramStringARB"); + glad_glGetVertexAttribdvARB = (PFNGLGETVERTEXATTRIBDVARBPROC)load("glGetVertexAttribdvARB"); + glad_glGetVertexAttribfvARB = (PFNGLGETVERTEXATTRIBFVARBPROC)load("glGetVertexAttribfvARB"); + glad_glGetVertexAttribivARB = (PFNGLGETVERTEXATTRIBIVARBPROC)load("glGetVertexAttribivARB"); + glad_glGetVertexAttribPointervARB = (PFNGLGETVERTEXATTRIBPOINTERVARBPROC)load("glGetVertexAttribPointervARB"); + glad_glIsProgramARB = (PFNGLISPROGRAMARBPROC)load("glIsProgramARB"); +} +static void load_GL_ARB_vertex_shader(GLADloadproc load) { + if(!GLAD_GL_ARB_vertex_shader) return; + glad_glVertexAttrib1fARB = (PFNGLVERTEXATTRIB1FARBPROC)load("glVertexAttrib1fARB"); + glad_glVertexAttrib1sARB = (PFNGLVERTEXATTRIB1SARBPROC)load("glVertexAttrib1sARB"); + glad_glVertexAttrib1dARB = (PFNGLVERTEXATTRIB1DARBPROC)load("glVertexAttrib1dARB"); + glad_glVertexAttrib2fARB = (PFNGLVERTEXATTRIB2FARBPROC)load("glVertexAttrib2fARB"); + glad_glVertexAttrib2sARB = (PFNGLVERTEXATTRIB2SARBPROC)load("glVertexAttrib2sARB"); + glad_glVertexAttrib2dARB = (PFNGLVERTEXATTRIB2DARBPROC)load("glVertexAttrib2dARB"); + glad_glVertexAttrib3fARB = (PFNGLVERTEXATTRIB3FARBPROC)load("glVertexAttrib3fARB"); + glad_glVertexAttrib3sARB = (PFNGLVERTEXATTRIB3SARBPROC)load("glVertexAttrib3sARB"); + glad_glVertexAttrib3dARB = (PFNGLVERTEXATTRIB3DARBPROC)load("glVertexAttrib3dARB"); + glad_glVertexAttrib4fARB = (PFNGLVERTEXATTRIB4FARBPROC)load("glVertexAttrib4fARB"); + glad_glVertexAttrib4sARB = (PFNGLVERTEXATTRIB4SARBPROC)load("glVertexAttrib4sARB"); + glad_glVertexAttrib4dARB = (PFNGLVERTEXATTRIB4DARBPROC)load("glVertexAttrib4dARB"); + glad_glVertexAttrib4NubARB = (PFNGLVERTEXATTRIB4NUBARBPROC)load("glVertexAttrib4NubARB"); + glad_glVertexAttrib1fvARB = (PFNGLVERTEXATTRIB1FVARBPROC)load("glVertexAttrib1fvARB"); + glad_glVertexAttrib1svARB = (PFNGLVERTEXATTRIB1SVARBPROC)load("glVertexAttrib1svARB"); + glad_glVertexAttrib1dvARB = (PFNGLVERTEXATTRIB1DVARBPROC)load("glVertexAttrib1dvARB"); + glad_glVertexAttrib2fvARB = (PFNGLVERTEXATTRIB2FVARBPROC)load("glVertexAttrib2fvARB"); + glad_glVertexAttrib2svARB = (PFNGLVERTEXATTRIB2SVARBPROC)load("glVertexAttrib2svARB"); + glad_glVertexAttrib2dvARB = (PFNGLVERTEXATTRIB2DVARBPROC)load("glVertexAttrib2dvARB"); + glad_glVertexAttrib3fvARB = (PFNGLVERTEXATTRIB3FVARBPROC)load("glVertexAttrib3fvARB"); + glad_glVertexAttrib3svARB = (PFNGLVERTEXATTRIB3SVARBPROC)load("glVertexAttrib3svARB"); + glad_glVertexAttrib3dvARB = (PFNGLVERTEXATTRIB3DVARBPROC)load("glVertexAttrib3dvARB"); + glad_glVertexAttrib4fvARB = (PFNGLVERTEXATTRIB4FVARBPROC)load("glVertexAttrib4fvARB"); + glad_glVertexAttrib4svARB = (PFNGLVERTEXATTRIB4SVARBPROC)load("glVertexAttrib4svARB"); + glad_glVertexAttrib4dvARB = (PFNGLVERTEXATTRIB4DVARBPROC)load("glVertexAttrib4dvARB"); + glad_glVertexAttrib4ivARB = (PFNGLVERTEXATTRIB4IVARBPROC)load("glVertexAttrib4ivARB"); + glad_glVertexAttrib4bvARB = (PFNGLVERTEXATTRIB4BVARBPROC)load("glVertexAttrib4bvARB"); + glad_glVertexAttrib4ubvARB = (PFNGLVERTEXATTRIB4UBVARBPROC)load("glVertexAttrib4ubvARB"); + glad_glVertexAttrib4usvARB = (PFNGLVERTEXATTRIB4USVARBPROC)load("glVertexAttrib4usvARB"); + glad_glVertexAttrib4uivARB = (PFNGLVERTEXATTRIB4UIVARBPROC)load("glVertexAttrib4uivARB"); + glad_glVertexAttrib4NbvARB = (PFNGLVERTEXATTRIB4NBVARBPROC)load("glVertexAttrib4NbvARB"); + glad_glVertexAttrib4NsvARB = (PFNGLVERTEXATTRIB4NSVARBPROC)load("glVertexAttrib4NsvARB"); + glad_glVertexAttrib4NivARB = (PFNGLVERTEXATTRIB4NIVARBPROC)load("glVertexAttrib4NivARB"); + glad_glVertexAttrib4NubvARB = (PFNGLVERTEXATTRIB4NUBVARBPROC)load("glVertexAttrib4NubvARB"); + glad_glVertexAttrib4NusvARB = (PFNGLVERTEXATTRIB4NUSVARBPROC)load("glVertexAttrib4NusvARB"); + glad_glVertexAttrib4NuivARB = (PFNGLVERTEXATTRIB4NUIVARBPROC)load("glVertexAttrib4NuivARB"); + glad_glVertexAttribPointerARB = (PFNGLVERTEXATTRIBPOINTERARBPROC)load("glVertexAttribPointerARB"); + glad_glEnableVertexAttribArrayARB = (PFNGLENABLEVERTEXATTRIBARRAYARBPROC)load("glEnableVertexAttribArrayARB"); + glad_glDisableVertexAttribArrayARB = (PFNGLDISABLEVERTEXATTRIBARRAYARBPROC)load("glDisableVertexAttribArrayARB"); + glad_glBindAttribLocationARB = (PFNGLBINDATTRIBLOCATIONARBPROC)load("glBindAttribLocationARB"); + glad_glGetActiveAttribARB = (PFNGLGETACTIVEATTRIBARBPROC)load("glGetActiveAttribARB"); + glad_glGetAttribLocationARB = (PFNGLGETATTRIBLOCATIONARBPROC)load("glGetAttribLocationARB"); + glad_glGetVertexAttribdvARB = (PFNGLGETVERTEXATTRIBDVARBPROC)load("glGetVertexAttribdvARB"); + glad_glGetVertexAttribfvARB = (PFNGLGETVERTEXATTRIBFVARBPROC)load("glGetVertexAttribfvARB"); + glad_glGetVertexAttribivARB = (PFNGLGETVERTEXATTRIBIVARBPROC)load("glGetVertexAttribivARB"); + glad_glGetVertexAttribPointervARB = (PFNGLGETVERTEXATTRIBPOINTERVARBPROC)load("glGetVertexAttribPointervARB"); +} +static void load_GL_ARB_vertex_type_2_10_10_10_rev(GLADloadproc load) { + if(!GLAD_GL_ARB_vertex_type_2_10_10_10_rev) return; + glad_glVertexAttribP1ui = (PFNGLVERTEXATTRIBP1UIPROC)load("glVertexAttribP1ui"); + glad_glVertexAttribP1uiv = (PFNGLVERTEXATTRIBP1UIVPROC)load("glVertexAttribP1uiv"); + glad_glVertexAttribP2ui = (PFNGLVERTEXATTRIBP2UIPROC)load("glVertexAttribP2ui"); + glad_glVertexAttribP2uiv = (PFNGLVERTEXATTRIBP2UIVPROC)load("glVertexAttribP2uiv"); + glad_glVertexAttribP3ui = (PFNGLVERTEXATTRIBP3UIPROC)load("glVertexAttribP3ui"); + glad_glVertexAttribP3uiv = (PFNGLVERTEXATTRIBP3UIVPROC)load("glVertexAttribP3uiv"); + glad_glVertexAttribP4ui = (PFNGLVERTEXATTRIBP4UIPROC)load("glVertexAttribP4ui"); + glad_glVertexAttribP4uiv = (PFNGLVERTEXATTRIBP4UIVPROC)load("glVertexAttribP4uiv"); + glad_glVertexP2ui = (PFNGLVERTEXP2UIPROC)load("glVertexP2ui"); + glad_glVertexP2uiv = (PFNGLVERTEXP2UIVPROC)load("glVertexP2uiv"); + glad_glVertexP3ui = (PFNGLVERTEXP3UIPROC)load("glVertexP3ui"); + glad_glVertexP3uiv = (PFNGLVERTEXP3UIVPROC)load("glVertexP3uiv"); + glad_glVertexP4ui = (PFNGLVERTEXP4UIPROC)load("glVertexP4ui"); + glad_glVertexP4uiv = (PFNGLVERTEXP4UIVPROC)load("glVertexP4uiv"); + glad_glTexCoordP1ui = (PFNGLTEXCOORDP1UIPROC)load("glTexCoordP1ui"); + glad_glTexCoordP1uiv = (PFNGLTEXCOORDP1UIVPROC)load("glTexCoordP1uiv"); + glad_glTexCoordP2ui = (PFNGLTEXCOORDP2UIPROC)load("glTexCoordP2ui"); + glad_glTexCoordP2uiv = (PFNGLTEXCOORDP2UIVPROC)load("glTexCoordP2uiv"); + glad_glTexCoordP3ui = (PFNGLTEXCOORDP3UIPROC)load("glTexCoordP3ui"); + glad_glTexCoordP3uiv = (PFNGLTEXCOORDP3UIVPROC)load("glTexCoordP3uiv"); + glad_glTexCoordP4ui = (PFNGLTEXCOORDP4UIPROC)load("glTexCoordP4ui"); + glad_glTexCoordP4uiv = (PFNGLTEXCOORDP4UIVPROC)load("glTexCoordP4uiv"); + glad_glMultiTexCoordP1ui = (PFNGLMULTITEXCOORDP1UIPROC)load("glMultiTexCoordP1ui"); + glad_glMultiTexCoordP1uiv = (PFNGLMULTITEXCOORDP1UIVPROC)load("glMultiTexCoordP1uiv"); + glad_glMultiTexCoordP2ui = (PFNGLMULTITEXCOORDP2UIPROC)load("glMultiTexCoordP2ui"); + glad_glMultiTexCoordP2uiv = (PFNGLMULTITEXCOORDP2UIVPROC)load("glMultiTexCoordP2uiv"); + glad_glMultiTexCoordP3ui = (PFNGLMULTITEXCOORDP3UIPROC)load("glMultiTexCoordP3ui"); + glad_glMultiTexCoordP3uiv = (PFNGLMULTITEXCOORDP3UIVPROC)load("glMultiTexCoordP3uiv"); + glad_glMultiTexCoordP4ui = (PFNGLMULTITEXCOORDP4UIPROC)load("glMultiTexCoordP4ui"); + glad_glMultiTexCoordP4uiv = (PFNGLMULTITEXCOORDP4UIVPROC)load("glMultiTexCoordP4uiv"); + glad_glNormalP3ui = (PFNGLNORMALP3UIPROC)load("glNormalP3ui"); + glad_glNormalP3uiv = (PFNGLNORMALP3UIVPROC)load("glNormalP3uiv"); + glad_glColorP3ui = (PFNGLCOLORP3UIPROC)load("glColorP3ui"); + glad_glColorP3uiv = (PFNGLCOLORP3UIVPROC)load("glColorP3uiv"); + glad_glColorP4ui = (PFNGLCOLORP4UIPROC)load("glColorP4ui"); + glad_glColorP4uiv = (PFNGLCOLORP4UIVPROC)load("glColorP4uiv"); + glad_glSecondaryColorP3ui = (PFNGLSECONDARYCOLORP3UIPROC)load("glSecondaryColorP3ui"); + glad_glSecondaryColorP3uiv = (PFNGLSECONDARYCOLORP3UIVPROC)load("glSecondaryColorP3uiv"); +} +static void load_GL_ARB_viewport_array(GLADloadproc load) { + if(!GLAD_GL_ARB_viewport_array) return; + glad_glViewportArrayv = (PFNGLVIEWPORTARRAYVPROC)load("glViewportArrayv"); + glad_glViewportIndexedf = (PFNGLVIEWPORTINDEXEDFPROC)load("glViewportIndexedf"); + glad_glViewportIndexedfv = (PFNGLVIEWPORTINDEXEDFVPROC)load("glViewportIndexedfv"); + glad_glScissorArrayv = (PFNGLSCISSORARRAYVPROC)load("glScissorArrayv"); + glad_glScissorIndexed = (PFNGLSCISSORINDEXEDPROC)load("glScissorIndexed"); + glad_glScissorIndexedv = (PFNGLSCISSORINDEXEDVPROC)load("glScissorIndexedv"); + glad_glDepthRangeArrayv = (PFNGLDEPTHRANGEARRAYVPROC)load("glDepthRangeArrayv"); + glad_glDepthRangeIndexed = (PFNGLDEPTHRANGEINDEXEDPROC)load("glDepthRangeIndexed"); + glad_glGetFloati_v = (PFNGLGETFLOATI_VPROC)load("glGetFloati_v"); + glad_glGetDoublei_v = (PFNGLGETDOUBLEI_VPROC)load("glGetDoublei_v"); + glad_glDepthRangeArraydvNV = (PFNGLDEPTHRANGEARRAYDVNVPROC)load("glDepthRangeArraydvNV"); + glad_glDepthRangeIndexeddNV = (PFNGLDEPTHRANGEINDEXEDDNVPROC)load("glDepthRangeIndexeddNV"); +} +static void load_GL_ARB_window_pos(GLADloadproc load) { + if(!GLAD_GL_ARB_window_pos) return; + glad_glWindowPos2dARB = (PFNGLWINDOWPOS2DARBPROC)load("glWindowPos2dARB"); + glad_glWindowPos2dvARB = (PFNGLWINDOWPOS2DVARBPROC)load("glWindowPos2dvARB"); + glad_glWindowPos2fARB = (PFNGLWINDOWPOS2FARBPROC)load("glWindowPos2fARB"); + glad_glWindowPos2fvARB = (PFNGLWINDOWPOS2FVARBPROC)load("glWindowPos2fvARB"); + glad_glWindowPos2iARB = (PFNGLWINDOWPOS2IARBPROC)load("glWindowPos2iARB"); + glad_glWindowPos2ivARB = (PFNGLWINDOWPOS2IVARBPROC)load("glWindowPos2ivARB"); + glad_glWindowPos2sARB = (PFNGLWINDOWPOS2SARBPROC)load("glWindowPos2sARB"); + glad_glWindowPos2svARB = (PFNGLWINDOWPOS2SVARBPROC)load("glWindowPos2svARB"); + glad_glWindowPos3dARB = (PFNGLWINDOWPOS3DARBPROC)load("glWindowPos3dARB"); + glad_glWindowPos3dvARB = (PFNGLWINDOWPOS3DVARBPROC)load("glWindowPos3dvARB"); + glad_glWindowPos3fARB = (PFNGLWINDOWPOS3FARBPROC)load("glWindowPos3fARB"); + glad_glWindowPos3fvARB = (PFNGLWINDOWPOS3FVARBPROC)load("glWindowPos3fvARB"); + glad_glWindowPos3iARB = (PFNGLWINDOWPOS3IARBPROC)load("glWindowPos3iARB"); + glad_glWindowPos3ivARB = (PFNGLWINDOWPOS3IVARBPROC)load("glWindowPos3ivARB"); + glad_glWindowPos3sARB = (PFNGLWINDOWPOS3SARBPROC)load("glWindowPos3sARB"); + glad_glWindowPos3svARB = (PFNGLWINDOWPOS3SVARBPROC)load("glWindowPos3svARB"); +} +static void load_GL_ATI_draw_buffers(GLADloadproc load) { + if(!GLAD_GL_ATI_draw_buffers) return; + glad_glDrawBuffersATI = (PFNGLDRAWBUFFERSATIPROC)load("glDrawBuffersATI"); +} +static void load_GL_ATI_element_array(GLADloadproc load) { + if(!GLAD_GL_ATI_element_array) return; + glad_glElementPointerATI = (PFNGLELEMENTPOINTERATIPROC)load("glElementPointerATI"); + glad_glDrawElementArrayATI = (PFNGLDRAWELEMENTARRAYATIPROC)load("glDrawElementArrayATI"); + glad_glDrawRangeElementArrayATI = (PFNGLDRAWRANGEELEMENTARRAYATIPROC)load("glDrawRangeElementArrayATI"); +} +static void load_GL_ATI_envmap_bumpmap(GLADloadproc load) { + if(!GLAD_GL_ATI_envmap_bumpmap) return; + glad_glTexBumpParameterivATI = (PFNGLTEXBUMPPARAMETERIVATIPROC)load("glTexBumpParameterivATI"); + glad_glTexBumpParameterfvATI = (PFNGLTEXBUMPPARAMETERFVATIPROC)load("glTexBumpParameterfvATI"); + glad_glGetTexBumpParameterivATI = (PFNGLGETTEXBUMPPARAMETERIVATIPROC)load("glGetTexBumpParameterivATI"); + glad_glGetTexBumpParameterfvATI = (PFNGLGETTEXBUMPPARAMETERFVATIPROC)load("glGetTexBumpParameterfvATI"); +} +static void load_GL_ATI_fragment_shader(GLADloadproc load) { + if(!GLAD_GL_ATI_fragment_shader) return; + glad_glGenFragmentShadersATI = (PFNGLGENFRAGMENTSHADERSATIPROC)load("glGenFragmentShadersATI"); + glad_glBindFragmentShaderATI = (PFNGLBINDFRAGMENTSHADERATIPROC)load("glBindFragmentShaderATI"); + glad_glDeleteFragmentShaderATI = (PFNGLDELETEFRAGMENTSHADERATIPROC)load("glDeleteFragmentShaderATI"); + glad_glBeginFragmentShaderATI = (PFNGLBEGINFRAGMENTSHADERATIPROC)load("glBeginFragmentShaderATI"); + glad_glEndFragmentShaderATI = (PFNGLENDFRAGMENTSHADERATIPROC)load("glEndFragmentShaderATI"); + glad_glPassTexCoordATI = (PFNGLPASSTEXCOORDATIPROC)load("glPassTexCoordATI"); + glad_glSampleMapATI = (PFNGLSAMPLEMAPATIPROC)load("glSampleMapATI"); + glad_glColorFragmentOp1ATI = (PFNGLCOLORFRAGMENTOP1ATIPROC)load("glColorFragmentOp1ATI"); + glad_glColorFragmentOp2ATI = (PFNGLCOLORFRAGMENTOP2ATIPROC)load("glColorFragmentOp2ATI"); + glad_glColorFragmentOp3ATI = (PFNGLCOLORFRAGMENTOP3ATIPROC)load("glColorFragmentOp3ATI"); + glad_glAlphaFragmentOp1ATI = (PFNGLALPHAFRAGMENTOP1ATIPROC)load("glAlphaFragmentOp1ATI"); + glad_glAlphaFragmentOp2ATI = (PFNGLALPHAFRAGMENTOP2ATIPROC)load("glAlphaFragmentOp2ATI"); + glad_glAlphaFragmentOp3ATI = (PFNGLALPHAFRAGMENTOP3ATIPROC)load("glAlphaFragmentOp3ATI"); + glad_glSetFragmentShaderConstantATI = (PFNGLSETFRAGMENTSHADERCONSTANTATIPROC)load("glSetFragmentShaderConstantATI"); +} +static void load_GL_ATI_map_object_buffer(GLADloadproc load) { + if(!GLAD_GL_ATI_map_object_buffer) return; + glad_glMapObjectBufferATI = (PFNGLMAPOBJECTBUFFERATIPROC)load("glMapObjectBufferATI"); + glad_glUnmapObjectBufferATI = (PFNGLUNMAPOBJECTBUFFERATIPROC)load("glUnmapObjectBufferATI"); +} +static void load_GL_ATI_pn_triangles(GLADloadproc load) { + if(!GLAD_GL_ATI_pn_triangles) return; + glad_glPNTrianglesiATI = (PFNGLPNTRIANGLESIATIPROC)load("glPNTrianglesiATI"); + glad_glPNTrianglesfATI = (PFNGLPNTRIANGLESFATIPROC)load("glPNTrianglesfATI"); +} +static void load_GL_ATI_separate_stencil(GLADloadproc load) { + if(!GLAD_GL_ATI_separate_stencil) return; + glad_glStencilOpSeparateATI = (PFNGLSTENCILOPSEPARATEATIPROC)load("glStencilOpSeparateATI"); + glad_glStencilFuncSeparateATI = (PFNGLSTENCILFUNCSEPARATEATIPROC)load("glStencilFuncSeparateATI"); +} +static void load_GL_ATI_vertex_array_object(GLADloadproc load) { + if(!GLAD_GL_ATI_vertex_array_object) return; + glad_glNewObjectBufferATI = (PFNGLNEWOBJECTBUFFERATIPROC)load("glNewObjectBufferATI"); + glad_glIsObjectBufferATI = (PFNGLISOBJECTBUFFERATIPROC)load("glIsObjectBufferATI"); + glad_glUpdateObjectBufferATI = (PFNGLUPDATEOBJECTBUFFERATIPROC)load("glUpdateObjectBufferATI"); + glad_glGetObjectBufferfvATI = (PFNGLGETOBJECTBUFFERFVATIPROC)load("glGetObjectBufferfvATI"); + glad_glGetObjectBufferivATI = (PFNGLGETOBJECTBUFFERIVATIPROC)load("glGetObjectBufferivATI"); + glad_glFreeObjectBufferATI = (PFNGLFREEOBJECTBUFFERATIPROC)load("glFreeObjectBufferATI"); + glad_glArrayObjectATI = (PFNGLARRAYOBJECTATIPROC)load("glArrayObjectATI"); + glad_glGetArrayObjectfvATI = (PFNGLGETARRAYOBJECTFVATIPROC)load("glGetArrayObjectfvATI"); + glad_glGetArrayObjectivATI = (PFNGLGETARRAYOBJECTIVATIPROC)load("glGetArrayObjectivATI"); + glad_glVariantArrayObjectATI = (PFNGLVARIANTARRAYOBJECTATIPROC)load("glVariantArrayObjectATI"); + glad_glGetVariantArrayObjectfvATI = (PFNGLGETVARIANTARRAYOBJECTFVATIPROC)load("glGetVariantArrayObjectfvATI"); + glad_glGetVariantArrayObjectivATI = (PFNGLGETVARIANTARRAYOBJECTIVATIPROC)load("glGetVariantArrayObjectivATI"); +} +static void load_GL_ATI_vertex_attrib_array_object(GLADloadproc load) { + if(!GLAD_GL_ATI_vertex_attrib_array_object) return; + glad_glVertexAttribArrayObjectATI = (PFNGLVERTEXATTRIBARRAYOBJECTATIPROC)load("glVertexAttribArrayObjectATI"); + glad_glGetVertexAttribArrayObjectfvATI = (PFNGLGETVERTEXATTRIBARRAYOBJECTFVATIPROC)load("glGetVertexAttribArrayObjectfvATI"); + glad_glGetVertexAttribArrayObjectivATI = (PFNGLGETVERTEXATTRIBARRAYOBJECTIVATIPROC)load("glGetVertexAttribArrayObjectivATI"); +} +static void load_GL_ATI_vertex_streams(GLADloadproc load) { + if(!GLAD_GL_ATI_vertex_streams) return; + glad_glVertexStream1sATI = (PFNGLVERTEXSTREAM1SATIPROC)load("glVertexStream1sATI"); + glad_glVertexStream1svATI = (PFNGLVERTEXSTREAM1SVATIPROC)load("glVertexStream1svATI"); + glad_glVertexStream1iATI = (PFNGLVERTEXSTREAM1IATIPROC)load("glVertexStream1iATI"); + glad_glVertexStream1ivATI = (PFNGLVERTEXSTREAM1IVATIPROC)load("glVertexStream1ivATI"); + glad_glVertexStream1fATI = (PFNGLVERTEXSTREAM1FATIPROC)load("glVertexStream1fATI"); + glad_glVertexStream1fvATI = (PFNGLVERTEXSTREAM1FVATIPROC)load("glVertexStream1fvATI"); + glad_glVertexStream1dATI = (PFNGLVERTEXSTREAM1DATIPROC)load("glVertexStream1dATI"); + glad_glVertexStream1dvATI = (PFNGLVERTEXSTREAM1DVATIPROC)load("glVertexStream1dvATI"); + glad_glVertexStream2sATI = (PFNGLVERTEXSTREAM2SATIPROC)load("glVertexStream2sATI"); + glad_glVertexStream2svATI = (PFNGLVERTEXSTREAM2SVATIPROC)load("glVertexStream2svATI"); + glad_glVertexStream2iATI = (PFNGLVERTEXSTREAM2IATIPROC)load("glVertexStream2iATI"); + glad_glVertexStream2ivATI = (PFNGLVERTEXSTREAM2IVATIPROC)load("glVertexStream2ivATI"); + glad_glVertexStream2fATI = (PFNGLVERTEXSTREAM2FATIPROC)load("glVertexStream2fATI"); + glad_glVertexStream2fvATI = (PFNGLVERTEXSTREAM2FVATIPROC)load("glVertexStream2fvATI"); + glad_glVertexStream2dATI = (PFNGLVERTEXSTREAM2DATIPROC)load("glVertexStream2dATI"); + glad_glVertexStream2dvATI = (PFNGLVERTEXSTREAM2DVATIPROC)load("glVertexStream2dvATI"); + glad_glVertexStream3sATI = (PFNGLVERTEXSTREAM3SATIPROC)load("glVertexStream3sATI"); + glad_glVertexStream3svATI = (PFNGLVERTEXSTREAM3SVATIPROC)load("glVertexStream3svATI"); + glad_glVertexStream3iATI = (PFNGLVERTEXSTREAM3IATIPROC)load("glVertexStream3iATI"); + glad_glVertexStream3ivATI = (PFNGLVERTEXSTREAM3IVATIPROC)load("glVertexStream3ivATI"); + glad_glVertexStream3fATI = (PFNGLVERTEXSTREAM3FATIPROC)load("glVertexStream3fATI"); + glad_glVertexStream3fvATI = (PFNGLVERTEXSTREAM3FVATIPROC)load("glVertexStream3fvATI"); + glad_glVertexStream3dATI = (PFNGLVERTEXSTREAM3DATIPROC)load("glVertexStream3dATI"); + glad_glVertexStream3dvATI = (PFNGLVERTEXSTREAM3DVATIPROC)load("glVertexStream3dvATI"); + glad_glVertexStream4sATI = (PFNGLVERTEXSTREAM4SATIPROC)load("glVertexStream4sATI"); + glad_glVertexStream4svATI = (PFNGLVERTEXSTREAM4SVATIPROC)load("glVertexStream4svATI"); + glad_glVertexStream4iATI = (PFNGLVERTEXSTREAM4IATIPROC)load("glVertexStream4iATI"); + glad_glVertexStream4ivATI = (PFNGLVERTEXSTREAM4IVATIPROC)load("glVertexStream4ivATI"); + glad_glVertexStream4fATI = (PFNGLVERTEXSTREAM4FATIPROC)load("glVertexStream4fATI"); + glad_glVertexStream4fvATI = (PFNGLVERTEXSTREAM4FVATIPROC)load("glVertexStream4fvATI"); + glad_glVertexStream4dATI = (PFNGLVERTEXSTREAM4DATIPROC)load("glVertexStream4dATI"); + glad_glVertexStream4dvATI = (PFNGLVERTEXSTREAM4DVATIPROC)load("glVertexStream4dvATI"); + glad_glNormalStream3bATI = (PFNGLNORMALSTREAM3BATIPROC)load("glNormalStream3bATI"); + glad_glNormalStream3bvATI = (PFNGLNORMALSTREAM3BVATIPROC)load("glNormalStream3bvATI"); + glad_glNormalStream3sATI = (PFNGLNORMALSTREAM3SATIPROC)load("glNormalStream3sATI"); + glad_glNormalStream3svATI = (PFNGLNORMALSTREAM3SVATIPROC)load("glNormalStream3svATI"); + glad_glNormalStream3iATI = (PFNGLNORMALSTREAM3IATIPROC)load("glNormalStream3iATI"); + glad_glNormalStream3ivATI = (PFNGLNORMALSTREAM3IVATIPROC)load("glNormalStream3ivATI"); + glad_glNormalStream3fATI = (PFNGLNORMALSTREAM3FATIPROC)load("glNormalStream3fATI"); + glad_glNormalStream3fvATI = (PFNGLNORMALSTREAM3FVATIPROC)load("glNormalStream3fvATI"); + glad_glNormalStream3dATI = (PFNGLNORMALSTREAM3DATIPROC)load("glNormalStream3dATI"); + glad_glNormalStream3dvATI = (PFNGLNORMALSTREAM3DVATIPROC)load("glNormalStream3dvATI"); + glad_glClientActiveVertexStreamATI = (PFNGLCLIENTACTIVEVERTEXSTREAMATIPROC)load("glClientActiveVertexStreamATI"); + glad_glVertexBlendEnviATI = (PFNGLVERTEXBLENDENVIATIPROC)load("glVertexBlendEnviATI"); + glad_glVertexBlendEnvfATI = (PFNGLVERTEXBLENDENVFATIPROC)load("glVertexBlendEnvfATI"); +} +static void load_GL_EXT_EGL_image_storage(GLADloadproc load) { + if(!GLAD_GL_EXT_EGL_image_storage) return; + glad_glEGLImageTargetTexStorageEXT = (PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC)load("glEGLImageTargetTexStorageEXT"); + glad_glEGLImageTargetTextureStorageEXT = (PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC)load("glEGLImageTargetTextureStorageEXT"); +} +static void load_GL_EXT_bindable_uniform(GLADloadproc load) { + if(!GLAD_GL_EXT_bindable_uniform) return; + glad_glUniformBufferEXT = (PFNGLUNIFORMBUFFEREXTPROC)load("glUniformBufferEXT"); + glad_glGetUniformBufferSizeEXT = (PFNGLGETUNIFORMBUFFERSIZEEXTPROC)load("glGetUniformBufferSizeEXT"); + glad_glGetUniformOffsetEXT = (PFNGLGETUNIFORMOFFSETEXTPROC)load("glGetUniformOffsetEXT"); +} +static void load_GL_EXT_blend_color(GLADloadproc load) { + if(!GLAD_GL_EXT_blend_color) return; + glad_glBlendColorEXT = (PFNGLBLENDCOLOREXTPROC)load("glBlendColorEXT"); +} +static void load_GL_EXT_blend_equation_separate(GLADloadproc load) { + if(!GLAD_GL_EXT_blend_equation_separate) return; + glad_glBlendEquationSeparateEXT = (PFNGLBLENDEQUATIONSEPARATEEXTPROC)load("glBlendEquationSeparateEXT"); +} +static void load_GL_EXT_blend_func_separate(GLADloadproc load) { + if(!GLAD_GL_EXT_blend_func_separate) return; + glad_glBlendFuncSeparateEXT = (PFNGLBLENDFUNCSEPARATEEXTPROC)load("glBlendFuncSeparateEXT"); +} +static void load_GL_EXT_blend_minmax(GLADloadproc load) { + if(!GLAD_GL_EXT_blend_minmax) return; + glad_glBlendEquationEXT = (PFNGLBLENDEQUATIONEXTPROC)load("glBlendEquationEXT"); +} +static void load_GL_EXT_color_subtable(GLADloadproc load) { + if(!GLAD_GL_EXT_color_subtable) return; + glad_glColorSubTableEXT = (PFNGLCOLORSUBTABLEEXTPROC)load("glColorSubTableEXT"); + glad_glCopyColorSubTableEXT = (PFNGLCOPYCOLORSUBTABLEEXTPROC)load("glCopyColorSubTableEXT"); +} +static void load_GL_EXT_compiled_vertex_array(GLADloadproc load) { + if(!GLAD_GL_EXT_compiled_vertex_array) return; + glad_glLockArraysEXT = (PFNGLLOCKARRAYSEXTPROC)load("glLockArraysEXT"); + glad_glUnlockArraysEXT = (PFNGLUNLOCKARRAYSEXTPROC)load("glUnlockArraysEXT"); +} +static void load_GL_EXT_convolution(GLADloadproc load) { + if(!GLAD_GL_EXT_convolution) return; + glad_glConvolutionFilter1DEXT = (PFNGLCONVOLUTIONFILTER1DEXTPROC)load("glConvolutionFilter1DEXT"); + glad_glConvolutionFilter2DEXT = (PFNGLCONVOLUTIONFILTER2DEXTPROC)load("glConvolutionFilter2DEXT"); + glad_glConvolutionParameterfEXT = (PFNGLCONVOLUTIONPARAMETERFEXTPROC)load("glConvolutionParameterfEXT"); + glad_glConvolutionParameterfvEXT = (PFNGLCONVOLUTIONPARAMETERFVEXTPROC)load("glConvolutionParameterfvEXT"); + glad_glConvolutionParameteriEXT = (PFNGLCONVOLUTIONPARAMETERIEXTPROC)load("glConvolutionParameteriEXT"); + glad_glConvolutionParameterivEXT = (PFNGLCONVOLUTIONPARAMETERIVEXTPROC)load("glConvolutionParameterivEXT"); + glad_glCopyConvolutionFilter1DEXT = (PFNGLCOPYCONVOLUTIONFILTER1DEXTPROC)load("glCopyConvolutionFilter1DEXT"); + glad_glCopyConvolutionFilter2DEXT = (PFNGLCOPYCONVOLUTIONFILTER2DEXTPROC)load("glCopyConvolutionFilter2DEXT"); + glad_glGetConvolutionFilterEXT = (PFNGLGETCONVOLUTIONFILTEREXTPROC)load("glGetConvolutionFilterEXT"); + glad_glGetConvolutionParameterfvEXT = (PFNGLGETCONVOLUTIONPARAMETERFVEXTPROC)load("glGetConvolutionParameterfvEXT"); + glad_glGetConvolutionParameterivEXT = (PFNGLGETCONVOLUTIONPARAMETERIVEXTPROC)load("glGetConvolutionParameterivEXT"); + glad_glGetSeparableFilterEXT = (PFNGLGETSEPARABLEFILTEREXTPROC)load("glGetSeparableFilterEXT"); + glad_glSeparableFilter2DEXT = (PFNGLSEPARABLEFILTER2DEXTPROC)load("glSeparableFilter2DEXT"); +} +static void load_GL_EXT_coordinate_frame(GLADloadproc load) { + if(!GLAD_GL_EXT_coordinate_frame) return; + glad_glTangent3bEXT = (PFNGLTANGENT3BEXTPROC)load("glTangent3bEXT"); + glad_glTangent3bvEXT = (PFNGLTANGENT3BVEXTPROC)load("glTangent3bvEXT"); + glad_glTangent3dEXT = (PFNGLTANGENT3DEXTPROC)load("glTangent3dEXT"); + glad_glTangent3dvEXT = (PFNGLTANGENT3DVEXTPROC)load("glTangent3dvEXT"); + glad_glTangent3fEXT = (PFNGLTANGENT3FEXTPROC)load("glTangent3fEXT"); + glad_glTangent3fvEXT = (PFNGLTANGENT3FVEXTPROC)load("glTangent3fvEXT"); + glad_glTangent3iEXT = (PFNGLTANGENT3IEXTPROC)load("glTangent3iEXT"); + glad_glTangent3ivEXT = (PFNGLTANGENT3IVEXTPROC)load("glTangent3ivEXT"); + glad_glTangent3sEXT = (PFNGLTANGENT3SEXTPROC)load("glTangent3sEXT"); + glad_glTangent3svEXT = (PFNGLTANGENT3SVEXTPROC)load("glTangent3svEXT"); + glad_glBinormal3bEXT = (PFNGLBINORMAL3BEXTPROC)load("glBinormal3bEXT"); + glad_glBinormal3bvEXT = (PFNGLBINORMAL3BVEXTPROC)load("glBinormal3bvEXT"); + glad_glBinormal3dEXT = (PFNGLBINORMAL3DEXTPROC)load("glBinormal3dEXT"); + glad_glBinormal3dvEXT = (PFNGLBINORMAL3DVEXTPROC)load("glBinormal3dvEXT"); + glad_glBinormal3fEXT = (PFNGLBINORMAL3FEXTPROC)load("glBinormal3fEXT"); + glad_glBinormal3fvEXT = (PFNGLBINORMAL3FVEXTPROC)load("glBinormal3fvEXT"); + glad_glBinormal3iEXT = (PFNGLBINORMAL3IEXTPROC)load("glBinormal3iEXT"); + glad_glBinormal3ivEXT = (PFNGLBINORMAL3IVEXTPROC)load("glBinormal3ivEXT"); + glad_glBinormal3sEXT = (PFNGLBINORMAL3SEXTPROC)load("glBinormal3sEXT"); + glad_glBinormal3svEXT = (PFNGLBINORMAL3SVEXTPROC)load("glBinormal3svEXT"); + glad_glTangentPointerEXT = (PFNGLTANGENTPOINTEREXTPROC)load("glTangentPointerEXT"); + glad_glBinormalPointerEXT = (PFNGLBINORMALPOINTEREXTPROC)load("glBinormalPointerEXT"); +} +static void load_GL_EXT_copy_texture(GLADloadproc load) { + if(!GLAD_GL_EXT_copy_texture) return; + glad_glCopyTexImage1DEXT = (PFNGLCOPYTEXIMAGE1DEXTPROC)load("glCopyTexImage1DEXT"); + glad_glCopyTexImage2DEXT = (PFNGLCOPYTEXIMAGE2DEXTPROC)load("glCopyTexImage2DEXT"); + glad_glCopyTexSubImage1DEXT = (PFNGLCOPYTEXSUBIMAGE1DEXTPROC)load("glCopyTexSubImage1DEXT"); + glad_glCopyTexSubImage2DEXT = (PFNGLCOPYTEXSUBIMAGE2DEXTPROC)load("glCopyTexSubImage2DEXT"); + glad_glCopyTexSubImage3DEXT = (PFNGLCOPYTEXSUBIMAGE3DEXTPROC)load("glCopyTexSubImage3DEXT"); +} +static void load_GL_EXT_cull_vertex(GLADloadproc load) { + if(!GLAD_GL_EXT_cull_vertex) return; + glad_glCullParameterdvEXT = (PFNGLCULLPARAMETERDVEXTPROC)load("glCullParameterdvEXT"); + glad_glCullParameterfvEXT = (PFNGLCULLPARAMETERFVEXTPROC)load("glCullParameterfvEXT"); +} +static void load_GL_EXT_debug_label(GLADloadproc load) { + if(!GLAD_GL_EXT_debug_label) return; + glad_glLabelObjectEXT = (PFNGLLABELOBJECTEXTPROC)load("glLabelObjectEXT"); + glad_glGetObjectLabelEXT = (PFNGLGETOBJECTLABELEXTPROC)load("glGetObjectLabelEXT"); +} +static void load_GL_EXT_debug_marker(GLADloadproc load) { + if(!GLAD_GL_EXT_debug_marker) return; + glad_glInsertEventMarkerEXT = (PFNGLINSERTEVENTMARKEREXTPROC)load("glInsertEventMarkerEXT"); + glad_glPushGroupMarkerEXT = (PFNGLPUSHGROUPMARKEREXTPROC)load("glPushGroupMarkerEXT"); + glad_glPopGroupMarkerEXT = (PFNGLPOPGROUPMARKEREXTPROC)load("glPopGroupMarkerEXT"); +} +static void load_GL_EXT_depth_bounds_test(GLADloadproc load) { + if(!GLAD_GL_EXT_depth_bounds_test) return; + glad_glDepthBoundsEXT = (PFNGLDEPTHBOUNDSEXTPROC)load("glDepthBoundsEXT"); +} +static void load_GL_EXT_direct_state_access(GLADloadproc load) { + if(!GLAD_GL_EXT_direct_state_access) return; + glad_glMatrixLoadfEXT = (PFNGLMATRIXLOADFEXTPROC)load("glMatrixLoadfEXT"); + glad_glMatrixLoaddEXT = (PFNGLMATRIXLOADDEXTPROC)load("glMatrixLoaddEXT"); + glad_glMatrixMultfEXT = (PFNGLMATRIXMULTFEXTPROC)load("glMatrixMultfEXT"); + glad_glMatrixMultdEXT = (PFNGLMATRIXMULTDEXTPROC)load("glMatrixMultdEXT"); + glad_glMatrixLoadIdentityEXT = (PFNGLMATRIXLOADIDENTITYEXTPROC)load("glMatrixLoadIdentityEXT"); + glad_glMatrixRotatefEXT = (PFNGLMATRIXROTATEFEXTPROC)load("glMatrixRotatefEXT"); + glad_glMatrixRotatedEXT = (PFNGLMATRIXROTATEDEXTPROC)load("glMatrixRotatedEXT"); + glad_glMatrixScalefEXT = (PFNGLMATRIXSCALEFEXTPROC)load("glMatrixScalefEXT"); + glad_glMatrixScaledEXT = (PFNGLMATRIXSCALEDEXTPROC)load("glMatrixScaledEXT"); + glad_glMatrixTranslatefEXT = (PFNGLMATRIXTRANSLATEFEXTPROC)load("glMatrixTranslatefEXT"); + glad_glMatrixTranslatedEXT = (PFNGLMATRIXTRANSLATEDEXTPROC)load("glMatrixTranslatedEXT"); + glad_glMatrixFrustumEXT = (PFNGLMATRIXFRUSTUMEXTPROC)load("glMatrixFrustumEXT"); + glad_glMatrixOrthoEXT = (PFNGLMATRIXORTHOEXTPROC)load("glMatrixOrthoEXT"); + glad_glMatrixPopEXT = (PFNGLMATRIXPOPEXTPROC)load("glMatrixPopEXT"); + glad_glMatrixPushEXT = (PFNGLMATRIXPUSHEXTPROC)load("glMatrixPushEXT"); + glad_glClientAttribDefaultEXT = (PFNGLCLIENTATTRIBDEFAULTEXTPROC)load("glClientAttribDefaultEXT"); + glad_glPushClientAttribDefaultEXT = (PFNGLPUSHCLIENTATTRIBDEFAULTEXTPROC)load("glPushClientAttribDefaultEXT"); + glad_glTextureParameterfEXT = (PFNGLTEXTUREPARAMETERFEXTPROC)load("glTextureParameterfEXT"); + glad_glTextureParameterfvEXT = (PFNGLTEXTUREPARAMETERFVEXTPROC)load("glTextureParameterfvEXT"); + glad_glTextureParameteriEXT = (PFNGLTEXTUREPARAMETERIEXTPROC)load("glTextureParameteriEXT"); + glad_glTextureParameterivEXT = (PFNGLTEXTUREPARAMETERIVEXTPROC)load("glTextureParameterivEXT"); + glad_glTextureImage1DEXT = (PFNGLTEXTUREIMAGE1DEXTPROC)load("glTextureImage1DEXT"); + glad_glTextureImage2DEXT = (PFNGLTEXTUREIMAGE2DEXTPROC)load("glTextureImage2DEXT"); + glad_glTextureSubImage1DEXT = (PFNGLTEXTURESUBIMAGE1DEXTPROC)load("glTextureSubImage1DEXT"); + glad_glTextureSubImage2DEXT = (PFNGLTEXTURESUBIMAGE2DEXTPROC)load("glTextureSubImage2DEXT"); + glad_glCopyTextureImage1DEXT = (PFNGLCOPYTEXTUREIMAGE1DEXTPROC)load("glCopyTextureImage1DEXT"); + glad_glCopyTextureImage2DEXT = (PFNGLCOPYTEXTUREIMAGE2DEXTPROC)load("glCopyTextureImage2DEXT"); + glad_glCopyTextureSubImage1DEXT = (PFNGLCOPYTEXTURESUBIMAGE1DEXTPROC)load("glCopyTextureSubImage1DEXT"); + glad_glCopyTextureSubImage2DEXT = (PFNGLCOPYTEXTURESUBIMAGE2DEXTPROC)load("glCopyTextureSubImage2DEXT"); + glad_glGetTextureImageEXT = (PFNGLGETTEXTUREIMAGEEXTPROC)load("glGetTextureImageEXT"); + glad_glGetTextureParameterfvEXT = (PFNGLGETTEXTUREPARAMETERFVEXTPROC)load("glGetTextureParameterfvEXT"); + glad_glGetTextureParameterivEXT = (PFNGLGETTEXTUREPARAMETERIVEXTPROC)load("glGetTextureParameterivEXT"); + glad_glGetTextureLevelParameterfvEXT = (PFNGLGETTEXTURELEVELPARAMETERFVEXTPROC)load("glGetTextureLevelParameterfvEXT"); + glad_glGetTextureLevelParameterivEXT = (PFNGLGETTEXTURELEVELPARAMETERIVEXTPROC)load("glGetTextureLevelParameterivEXT"); + glad_glTextureImage3DEXT = (PFNGLTEXTUREIMAGE3DEXTPROC)load("glTextureImage3DEXT"); + glad_glTextureSubImage3DEXT = (PFNGLTEXTURESUBIMAGE3DEXTPROC)load("glTextureSubImage3DEXT"); + glad_glCopyTextureSubImage3DEXT = (PFNGLCOPYTEXTURESUBIMAGE3DEXTPROC)load("glCopyTextureSubImage3DEXT"); + glad_glBindMultiTextureEXT = (PFNGLBINDMULTITEXTUREEXTPROC)load("glBindMultiTextureEXT"); + glad_glMultiTexCoordPointerEXT = (PFNGLMULTITEXCOORDPOINTEREXTPROC)load("glMultiTexCoordPointerEXT"); + glad_glMultiTexEnvfEXT = (PFNGLMULTITEXENVFEXTPROC)load("glMultiTexEnvfEXT"); + glad_glMultiTexEnvfvEXT = (PFNGLMULTITEXENVFVEXTPROC)load("glMultiTexEnvfvEXT"); + glad_glMultiTexEnviEXT = (PFNGLMULTITEXENVIEXTPROC)load("glMultiTexEnviEXT"); + glad_glMultiTexEnvivEXT = (PFNGLMULTITEXENVIVEXTPROC)load("glMultiTexEnvivEXT"); + glad_glMultiTexGendEXT = (PFNGLMULTITEXGENDEXTPROC)load("glMultiTexGendEXT"); + glad_glMultiTexGendvEXT = (PFNGLMULTITEXGENDVEXTPROC)load("glMultiTexGendvEXT"); + glad_glMultiTexGenfEXT = (PFNGLMULTITEXGENFEXTPROC)load("glMultiTexGenfEXT"); + glad_glMultiTexGenfvEXT = (PFNGLMULTITEXGENFVEXTPROC)load("glMultiTexGenfvEXT"); + glad_glMultiTexGeniEXT = (PFNGLMULTITEXGENIEXTPROC)load("glMultiTexGeniEXT"); + glad_glMultiTexGenivEXT = (PFNGLMULTITEXGENIVEXTPROC)load("glMultiTexGenivEXT"); + glad_glGetMultiTexEnvfvEXT = (PFNGLGETMULTITEXENVFVEXTPROC)load("glGetMultiTexEnvfvEXT"); + glad_glGetMultiTexEnvivEXT = (PFNGLGETMULTITEXENVIVEXTPROC)load("glGetMultiTexEnvivEXT"); + glad_glGetMultiTexGendvEXT = (PFNGLGETMULTITEXGENDVEXTPROC)load("glGetMultiTexGendvEXT"); + glad_glGetMultiTexGenfvEXT = (PFNGLGETMULTITEXGENFVEXTPROC)load("glGetMultiTexGenfvEXT"); + glad_glGetMultiTexGenivEXT = (PFNGLGETMULTITEXGENIVEXTPROC)load("glGetMultiTexGenivEXT"); + glad_glMultiTexParameteriEXT = (PFNGLMULTITEXPARAMETERIEXTPROC)load("glMultiTexParameteriEXT"); + glad_glMultiTexParameterivEXT = (PFNGLMULTITEXPARAMETERIVEXTPROC)load("glMultiTexParameterivEXT"); + glad_glMultiTexParameterfEXT = (PFNGLMULTITEXPARAMETERFEXTPROC)load("glMultiTexParameterfEXT"); + glad_glMultiTexParameterfvEXT = (PFNGLMULTITEXPARAMETERFVEXTPROC)load("glMultiTexParameterfvEXT"); + glad_glMultiTexImage1DEXT = (PFNGLMULTITEXIMAGE1DEXTPROC)load("glMultiTexImage1DEXT"); + glad_glMultiTexImage2DEXT = (PFNGLMULTITEXIMAGE2DEXTPROC)load("glMultiTexImage2DEXT"); + glad_glMultiTexSubImage1DEXT = (PFNGLMULTITEXSUBIMAGE1DEXTPROC)load("glMultiTexSubImage1DEXT"); + glad_glMultiTexSubImage2DEXT = (PFNGLMULTITEXSUBIMAGE2DEXTPROC)load("glMultiTexSubImage2DEXT"); + glad_glCopyMultiTexImage1DEXT = (PFNGLCOPYMULTITEXIMAGE1DEXTPROC)load("glCopyMultiTexImage1DEXT"); + glad_glCopyMultiTexImage2DEXT = (PFNGLCOPYMULTITEXIMAGE2DEXTPROC)load("glCopyMultiTexImage2DEXT"); + glad_glCopyMultiTexSubImage1DEXT = (PFNGLCOPYMULTITEXSUBIMAGE1DEXTPROC)load("glCopyMultiTexSubImage1DEXT"); + glad_glCopyMultiTexSubImage2DEXT = (PFNGLCOPYMULTITEXSUBIMAGE2DEXTPROC)load("glCopyMultiTexSubImage2DEXT"); + glad_glGetMultiTexImageEXT = (PFNGLGETMULTITEXIMAGEEXTPROC)load("glGetMultiTexImageEXT"); + glad_glGetMultiTexParameterfvEXT = (PFNGLGETMULTITEXPARAMETERFVEXTPROC)load("glGetMultiTexParameterfvEXT"); + glad_glGetMultiTexParameterivEXT = (PFNGLGETMULTITEXPARAMETERIVEXTPROC)load("glGetMultiTexParameterivEXT"); + glad_glGetMultiTexLevelParameterfvEXT = (PFNGLGETMULTITEXLEVELPARAMETERFVEXTPROC)load("glGetMultiTexLevelParameterfvEXT"); + glad_glGetMultiTexLevelParameterivEXT = (PFNGLGETMULTITEXLEVELPARAMETERIVEXTPROC)load("glGetMultiTexLevelParameterivEXT"); + glad_glMultiTexImage3DEXT = (PFNGLMULTITEXIMAGE3DEXTPROC)load("glMultiTexImage3DEXT"); + glad_glMultiTexSubImage3DEXT = (PFNGLMULTITEXSUBIMAGE3DEXTPROC)load("glMultiTexSubImage3DEXT"); + glad_glCopyMultiTexSubImage3DEXT = (PFNGLCOPYMULTITEXSUBIMAGE3DEXTPROC)load("glCopyMultiTexSubImage3DEXT"); + glad_glEnableClientStateIndexedEXT = (PFNGLENABLECLIENTSTATEINDEXEDEXTPROC)load("glEnableClientStateIndexedEXT"); + glad_glDisableClientStateIndexedEXT = (PFNGLDISABLECLIENTSTATEINDEXEDEXTPROC)load("glDisableClientStateIndexedEXT"); + glad_glGetFloatIndexedvEXT = (PFNGLGETFLOATINDEXEDVEXTPROC)load("glGetFloatIndexedvEXT"); + glad_glGetDoubleIndexedvEXT = (PFNGLGETDOUBLEINDEXEDVEXTPROC)load("glGetDoubleIndexedvEXT"); + glad_glGetPointerIndexedvEXT = (PFNGLGETPOINTERINDEXEDVEXTPROC)load("glGetPointerIndexedvEXT"); + glad_glEnableIndexedEXT = (PFNGLENABLEINDEXEDEXTPROC)load("glEnableIndexedEXT"); + glad_glDisableIndexedEXT = (PFNGLDISABLEINDEXEDEXTPROC)load("glDisableIndexedEXT"); + glad_glIsEnabledIndexedEXT = (PFNGLISENABLEDINDEXEDEXTPROC)load("glIsEnabledIndexedEXT"); + glad_glGetIntegerIndexedvEXT = (PFNGLGETINTEGERINDEXEDVEXTPROC)load("glGetIntegerIndexedvEXT"); + glad_glGetBooleanIndexedvEXT = (PFNGLGETBOOLEANINDEXEDVEXTPROC)load("glGetBooleanIndexedvEXT"); + glad_glCompressedTextureImage3DEXT = (PFNGLCOMPRESSEDTEXTUREIMAGE3DEXTPROC)load("glCompressedTextureImage3DEXT"); + glad_glCompressedTextureImage2DEXT = (PFNGLCOMPRESSEDTEXTUREIMAGE2DEXTPROC)load("glCompressedTextureImage2DEXT"); + glad_glCompressedTextureImage1DEXT = (PFNGLCOMPRESSEDTEXTUREIMAGE1DEXTPROC)load("glCompressedTextureImage1DEXT"); + glad_glCompressedTextureSubImage3DEXT = (PFNGLCOMPRESSEDTEXTURESUBIMAGE3DEXTPROC)load("glCompressedTextureSubImage3DEXT"); + glad_glCompressedTextureSubImage2DEXT = (PFNGLCOMPRESSEDTEXTURESUBIMAGE2DEXTPROC)load("glCompressedTextureSubImage2DEXT"); + glad_glCompressedTextureSubImage1DEXT = (PFNGLCOMPRESSEDTEXTURESUBIMAGE1DEXTPROC)load("glCompressedTextureSubImage1DEXT"); + glad_glGetCompressedTextureImageEXT = (PFNGLGETCOMPRESSEDTEXTUREIMAGEEXTPROC)load("glGetCompressedTextureImageEXT"); + glad_glCompressedMultiTexImage3DEXT = (PFNGLCOMPRESSEDMULTITEXIMAGE3DEXTPROC)load("glCompressedMultiTexImage3DEXT"); + glad_glCompressedMultiTexImage2DEXT = (PFNGLCOMPRESSEDMULTITEXIMAGE2DEXTPROC)load("glCompressedMultiTexImage2DEXT"); + glad_glCompressedMultiTexImage1DEXT = (PFNGLCOMPRESSEDMULTITEXIMAGE1DEXTPROC)load("glCompressedMultiTexImage1DEXT"); + glad_glCompressedMultiTexSubImage3DEXT = (PFNGLCOMPRESSEDMULTITEXSUBIMAGE3DEXTPROC)load("glCompressedMultiTexSubImage3DEXT"); + glad_glCompressedMultiTexSubImage2DEXT = (PFNGLCOMPRESSEDMULTITEXSUBIMAGE2DEXTPROC)load("glCompressedMultiTexSubImage2DEXT"); + glad_glCompressedMultiTexSubImage1DEXT = (PFNGLCOMPRESSEDMULTITEXSUBIMAGE1DEXTPROC)load("glCompressedMultiTexSubImage1DEXT"); + glad_glGetCompressedMultiTexImageEXT = (PFNGLGETCOMPRESSEDMULTITEXIMAGEEXTPROC)load("glGetCompressedMultiTexImageEXT"); + glad_glMatrixLoadTransposefEXT = (PFNGLMATRIXLOADTRANSPOSEFEXTPROC)load("glMatrixLoadTransposefEXT"); + glad_glMatrixLoadTransposedEXT = (PFNGLMATRIXLOADTRANSPOSEDEXTPROC)load("glMatrixLoadTransposedEXT"); + glad_glMatrixMultTransposefEXT = (PFNGLMATRIXMULTTRANSPOSEFEXTPROC)load("glMatrixMultTransposefEXT"); + glad_glMatrixMultTransposedEXT = (PFNGLMATRIXMULTTRANSPOSEDEXTPROC)load("glMatrixMultTransposedEXT"); + glad_glNamedBufferDataEXT = (PFNGLNAMEDBUFFERDATAEXTPROC)load("glNamedBufferDataEXT"); + glad_glNamedBufferSubDataEXT = (PFNGLNAMEDBUFFERSUBDATAEXTPROC)load("glNamedBufferSubDataEXT"); + glad_glMapNamedBufferEXT = (PFNGLMAPNAMEDBUFFEREXTPROC)load("glMapNamedBufferEXT"); + glad_glUnmapNamedBufferEXT = (PFNGLUNMAPNAMEDBUFFEREXTPROC)load("glUnmapNamedBufferEXT"); + glad_glGetNamedBufferParameterivEXT = (PFNGLGETNAMEDBUFFERPARAMETERIVEXTPROC)load("glGetNamedBufferParameterivEXT"); + glad_glGetNamedBufferPointervEXT = (PFNGLGETNAMEDBUFFERPOINTERVEXTPROC)load("glGetNamedBufferPointervEXT"); + glad_glGetNamedBufferSubDataEXT = (PFNGLGETNAMEDBUFFERSUBDATAEXTPROC)load("glGetNamedBufferSubDataEXT"); + glad_glProgramUniform1fEXT = (PFNGLPROGRAMUNIFORM1FEXTPROC)load("glProgramUniform1fEXT"); + glad_glProgramUniform2fEXT = (PFNGLPROGRAMUNIFORM2FEXTPROC)load("glProgramUniform2fEXT"); + glad_glProgramUniform3fEXT = (PFNGLPROGRAMUNIFORM3FEXTPROC)load("glProgramUniform3fEXT"); + glad_glProgramUniform4fEXT = (PFNGLPROGRAMUNIFORM4FEXTPROC)load("glProgramUniform4fEXT"); + glad_glProgramUniform1iEXT = (PFNGLPROGRAMUNIFORM1IEXTPROC)load("glProgramUniform1iEXT"); + glad_glProgramUniform2iEXT = (PFNGLPROGRAMUNIFORM2IEXTPROC)load("glProgramUniform2iEXT"); + glad_glProgramUniform3iEXT = (PFNGLPROGRAMUNIFORM3IEXTPROC)load("glProgramUniform3iEXT"); + glad_glProgramUniform4iEXT = (PFNGLPROGRAMUNIFORM4IEXTPROC)load("glProgramUniform4iEXT"); + glad_glProgramUniform1fvEXT = (PFNGLPROGRAMUNIFORM1FVEXTPROC)load("glProgramUniform1fvEXT"); + glad_glProgramUniform2fvEXT = (PFNGLPROGRAMUNIFORM2FVEXTPROC)load("glProgramUniform2fvEXT"); + glad_glProgramUniform3fvEXT = (PFNGLPROGRAMUNIFORM3FVEXTPROC)load("glProgramUniform3fvEXT"); + glad_glProgramUniform4fvEXT = (PFNGLPROGRAMUNIFORM4FVEXTPROC)load("glProgramUniform4fvEXT"); + glad_glProgramUniform1ivEXT = (PFNGLPROGRAMUNIFORM1IVEXTPROC)load("glProgramUniform1ivEXT"); + glad_glProgramUniform2ivEXT = (PFNGLPROGRAMUNIFORM2IVEXTPROC)load("glProgramUniform2ivEXT"); + glad_glProgramUniform3ivEXT = (PFNGLPROGRAMUNIFORM3IVEXTPROC)load("glProgramUniform3ivEXT"); + glad_glProgramUniform4ivEXT = (PFNGLPROGRAMUNIFORM4IVEXTPROC)load("glProgramUniform4ivEXT"); + glad_glProgramUniformMatrix2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC)load("glProgramUniformMatrix2fvEXT"); + glad_glProgramUniformMatrix3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC)load("glProgramUniformMatrix3fvEXT"); + glad_glProgramUniformMatrix4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC)load("glProgramUniformMatrix4fvEXT"); + glad_glProgramUniformMatrix2x3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC)load("glProgramUniformMatrix2x3fvEXT"); + glad_glProgramUniformMatrix3x2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC)load("glProgramUniformMatrix3x2fvEXT"); + glad_glProgramUniformMatrix2x4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC)load("glProgramUniformMatrix2x4fvEXT"); + glad_glProgramUniformMatrix4x2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC)load("glProgramUniformMatrix4x2fvEXT"); + glad_glProgramUniformMatrix3x4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC)load("glProgramUniformMatrix3x4fvEXT"); + glad_glProgramUniformMatrix4x3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC)load("glProgramUniformMatrix4x3fvEXT"); + glad_glTextureBufferEXT = (PFNGLTEXTUREBUFFEREXTPROC)load("glTextureBufferEXT"); + glad_glMultiTexBufferEXT = (PFNGLMULTITEXBUFFEREXTPROC)load("glMultiTexBufferEXT"); + glad_glTextureParameterIivEXT = (PFNGLTEXTUREPARAMETERIIVEXTPROC)load("glTextureParameterIivEXT"); + glad_glTextureParameterIuivEXT = (PFNGLTEXTUREPARAMETERIUIVEXTPROC)load("glTextureParameterIuivEXT"); + glad_glGetTextureParameterIivEXT = (PFNGLGETTEXTUREPARAMETERIIVEXTPROC)load("glGetTextureParameterIivEXT"); + glad_glGetTextureParameterIuivEXT = (PFNGLGETTEXTUREPARAMETERIUIVEXTPROC)load("glGetTextureParameterIuivEXT"); + glad_glMultiTexParameterIivEXT = (PFNGLMULTITEXPARAMETERIIVEXTPROC)load("glMultiTexParameterIivEXT"); + glad_glMultiTexParameterIuivEXT = (PFNGLMULTITEXPARAMETERIUIVEXTPROC)load("glMultiTexParameterIuivEXT"); + glad_glGetMultiTexParameterIivEXT = (PFNGLGETMULTITEXPARAMETERIIVEXTPROC)load("glGetMultiTexParameterIivEXT"); + glad_glGetMultiTexParameterIuivEXT = (PFNGLGETMULTITEXPARAMETERIUIVEXTPROC)load("glGetMultiTexParameterIuivEXT"); + glad_glProgramUniform1uiEXT = (PFNGLPROGRAMUNIFORM1UIEXTPROC)load("glProgramUniform1uiEXT"); + glad_glProgramUniform2uiEXT = (PFNGLPROGRAMUNIFORM2UIEXTPROC)load("glProgramUniform2uiEXT"); + glad_glProgramUniform3uiEXT = (PFNGLPROGRAMUNIFORM3UIEXTPROC)load("glProgramUniform3uiEXT"); + glad_glProgramUniform4uiEXT = (PFNGLPROGRAMUNIFORM4UIEXTPROC)load("glProgramUniform4uiEXT"); + glad_glProgramUniform1uivEXT = (PFNGLPROGRAMUNIFORM1UIVEXTPROC)load("glProgramUniform1uivEXT"); + glad_glProgramUniform2uivEXT = (PFNGLPROGRAMUNIFORM2UIVEXTPROC)load("glProgramUniform2uivEXT"); + glad_glProgramUniform3uivEXT = (PFNGLPROGRAMUNIFORM3UIVEXTPROC)load("glProgramUniform3uivEXT"); + glad_glProgramUniform4uivEXT = (PFNGLPROGRAMUNIFORM4UIVEXTPROC)load("glProgramUniform4uivEXT"); + glad_glNamedProgramLocalParameters4fvEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETERS4FVEXTPROC)load("glNamedProgramLocalParameters4fvEXT"); + glad_glNamedProgramLocalParameterI4iEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETERI4IEXTPROC)load("glNamedProgramLocalParameterI4iEXT"); + glad_glNamedProgramLocalParameterI4ivEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETERI4IVEXTPROC)load("glNamedProgramLocalParameterI4ivEXT"); + glad_glNamedProgramLocalParametersI4ivEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETERSI4IVEXTPROC)load("glNamedProgramLocalParametersI4ivEXT"); + glad_glNamedProgramLocalParameterI4uiEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIEXTPROC)load("glNamedProgramLocalParameterI4uiEXT"); + glad_glNamedProgramLocalParameterI4uivEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIVEXTPROC)load("glNamedProgramLocalParameterI4uivEXT"); + glad_glNamedProgramLocalParametersI4uivEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETERSI4UIVEXTPROC)load("glNamedProgramLocalParametersI4uivEXT"); + glad_glGetNamedProgramLocalParameterIivEXT = (PFNGLGETNAMEDPROGRAMLOCALPARAMETERIIVEXTPROC)load("glGetNamedProgramLocalParameterIivEXT"); + glad_glGetNamedProgramLocalParameterIuivEXT = (PFNGLGETNAMEDPROGRAMLOCALPARAMETERIUIVEXTPROC)load("glGetNamedProgramLocalParameterIuivEXT"); + glad_glEnableClientStateiEXT = (PFNGLENABLECLIENTSTATEIEXTPROC)load("glEnableClientStateiEXT"); + glad_glDisableClientStateiEXT = (PFNGLDISABLECLIENTSTATEIEXTPROC)load("glDisableClientStateiEXT"); + glad_glGetFloati_vEXT = (PFNGLGETFLOATI_VEXTPROC)load("glGetFloati_vEXT"); + glad_glGetDoublei_vEXT = (PFNGLGETDOUBLEI_VEXTPROC)load("glGetDoublei_vEXT"); + glad_glGetPointeri_vEXT = (PFNGLGETPOINTERI_VEXTPROC)load("glGetPointeri_vEXT"); + glad_glNamedProgramStringEXT = (PFNGLNAMEDPROGRAMSTRINGEXTPROC)load("glNamedProgramStringEXT"); + glad_glNamedProgramLocalParameter4dEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETER4DEXTPROC)load("glNamedProgramLocalParameter4dEXT"); + glad_glNamedProgramLocalParameter4dvEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETER4DVEXTPROC)load("glNamedProgramLocalParameter4dvEXT"); + glad_glNamedProgramLocalParameter4fEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETER4FEXTPROC)load("glNamedProgramLocalParameter4fEXT"); + glad_glNamedProgramLocalParameter4fvEXT = (PFNGLNAMEDPROGRAMLOCALPARAMETER4FVEXTPROC)load("glNamedProgramLocalParameter4fvEXT"); + glad_glGetNamedProgramLocalParameterdvEXT = (PFNGLGETNAMEDPROGRAMLOCALPARAMETERDVEXTPROC)load("glGetNamedProgramLocalParameterdvEXT"); + glad_glGetNamedProgramLocalParameterfvEXT = (PFNGLGETNAMEDPROGRAMLOCALPARAMETERFVEXTPROC)load("glGetNamedProgramLocalParameterfvEXT"); + glad_glGetNamedProgramivEXT = (PFNGLGETNAMEDPROGRAMIVEXTPROC)load("glGetNamedProgramivEXT"); + glad_glGetNamedProgramStringEXT = (PFNGLGETNAMEDPROGRAMSTRINGEXTPROC)load("glGetNamedProgramStringEXT"); + glad_glNamedRenderbufferStorageEXT = (PFNGLNAMEDRENDERBUFFERSTORAGEEXTPROC)load("glNamedRenderbufferStorageEXT"); + glad_glGetNamedRenderbufferParameterivEXT = (PFNGLGETNAMEDRENDERBUFFERPARAMETERIVEXTPROC)load("glGetNamedRenderbufferParameterivEXT"); + glad_glNamedRenderbufferStorageMultisampleEXT = (PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC)load("glNamedRenderbufferStorageMultisampleEXT"); + glad_glNamedRenderbufferStorageMultisampleCoverageEXT = (PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLECOVERAGEEXTPROC)load("glNamedRenderbufferStorageMultisampleCoverageEXT"); + glad_glCheckNamedFramebufferStatusEXT = (PFNGLCHECKNAMEDFRAMEBUFFERSTATUSEXTPROC)load("glCheckNamedFramebufferStatusEXT"); + glad_glNamedFramebufferTexture1DEXT = (PFNGLNAMEDFRAMEBUFFERTEXTURE1DEXTPROC)load("glNamedFramebufferTexture1DEXT"); + glad_glNamedFramebufferTexture2DEXT = (PFNGLNAMEDFRAMEBUFFERTEXTURE2DEXTPROC)load("glNamedFramebufferTexture2DEXT"); + glad_glNamedFramebufferTexture3DEXT = (PFNGLNAMEDFRAMEBUFFERTEXTURE3DEXTPROC)load("glNamedFramebufferTexture3DEXT"); + glad_glNamedFramebufferRenderbufferEXT = (PFNGLNAMEDFRAMEBUFFERRENDERBUFFEREXTPROC)load("glNamedFramebufferRenderbufferEXT"); + glad_glGetNamedFramebufferAttachmentParameterivEXT = (PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC)load("glGetNamedFramebufferAttachmentParameterivEXT"); + glad_glGenerateTextureMipmapEXT = (PFNGLGENERATETEXTUREMIPMAPEXTPROC)load("glGenerateTextureMipmapEXT"); + glad_glGenerateMultiTexMipmapEXT = (PFNGLGENERATEMULTITEXMIPMAPEXTPROC)load("glGenerateMultiTexMipmapEXT"); + glad_glFramebufferDrawBufferEXT = (PFNGLFRAMEBUFFERDRAWBUFFEREXTPROC)load("glFramebufferDrawBufferEXT"); + glad_glFramebufferDrawBuffersEXT = (PFNGLFRAMEBUFFERDRAWBUFFERSEXTPROC)load("glFramebufferDrawBuffersEXT"); + glad_glFramebufferReadBufferEXT = (PFNGLFRAMEBUFFERREADBUFFEREXTPROC)load("glFramebufferReadBufferEXT"); + glad_glGetFramebufferParameterivEXT = (PFNGLGETFRAMEBUFFERPARAMETERIVEXTPROC)load("glGetFramebufferParameterivEXT"); + glad_glNamedCopyBufferSubDataEXT = (PFNGLNAMEDCOPYBUFFERSUBDATAEXTPROC)load("glNamedCopyBufferSubDataEXT"); + glad_glNamedFramebufferTextureEXT = (PFNGLNAMEDFRAMEBUFFERTEXTUREEXTPROC)load("glNamedFramebufferTextureEXT"); + glad_glNamedFramebufferTextureLayerEXT = (PFNGLNAMEDFRAMEBUFFERTEXTURELAYEREXTPROC)load("glNamedFramebufferTextureLayerEXT"); + glad_glNamedFramebufferTextureFaceEXT = (PFNGLNAMEDFRAMEBUFFERTEXTUREFACEEXTPROC)load("glNamedFramebufferTextureFaceEXT"); + glad_glTextureRenderbufferEXT = (PFNGLTEXTURERENDERBUFFEREXTPROC)load("glTextureRenderbufferEXT"); + glad_glMultiTexRenderbufferEXT = (PFNGLMULTITEXRENDERBUFFEREXTPROC)load("glMultiTexRenderbufferEXT"); + glad_glVertexArrayVertexOffsetEXT = (PFNGLVERTEXARRAYVERTEXOFFSETEXTPROC)load("glVertexArrayVertexOffsetEXT"); + glad_glVertexArrayColorOffsetEXT = (PFNGLVERTEXARRAYCOLOROFFSETEXTPROC)load("glVertexArrayColorOffsetEXT"); + glad_glVertexArrayEdgeFlagOffsetEXT = (PFNGLVERTEXARRAYEDGEFLAGOFFSETEXTPROC)load("glVertexArrayEdgeFlagOffsetEXT"); + glad_glVertexArrayIndexOffsetEXT = (PFNGLVERTEXARRAYINDEXOFFSETEXTPROC)load("glVertexArrayIndexOffsetEXT"); + glad_glVertexArrayNormalOffsetEXT = (PFNGLVERTEXARRAYNORMALOFFSETEXTPROC)load("glVertexArrayNormalOffsetEXT"); + glad_glVertexArrayTexCoordOffsetEXT = (PFNGLVERTEXARRAYTEXCOORDOFFSETEXTPROC)load("glVertexArrayTexCoordOffsetEXT"); + glad_glVertexArrayMultiTexCoordOffsetEXT = (PFNGLVERTEXARRAYMULTITEXCOORDOFFSETEXTPROC)load("glVertexArrayMultiTexCoordOffsetEXT"); + glad_glVertexArrayFogCoordOffsetEXT = (PFNGLVERTEXARRAYFOGCOORDOFFSETEXTPROC)load("glVertexArrayFogCoordOffsetEXT"); + glad_glVertexArraySecondaryColorOffsetEXT = (PFNGLVERTEXARRAYSECONDARYCOLOROFFSETEXTPROC)load("glVertexArraySecondaryColorOffsetEXT"); + glad_glVertexArrayVertexAttribOffsetEXT = (PFNGLVERTEXARRAYVERTEXATTRIBOFFSETEXTPROC)load("glVertexArrayVertexAttribOffsetEXT"); + glad_glVertexArrayVertexAttribIOffsetEXT = (PFNGLVERTEXARRAYVERTEXATTRIBIOFFSETEXTPROC)load("glVertexArrayVertexAttribIOffsetEXT"); + glad_glEnableVertexArrayEXT = (PFNGLENABLEVERTEXARRAYEXTPROC)load("glEnableVertexArrayEXT"); + glad_glDisableVertexArrayEXT = (PFNGLDISABLEVERTEXARRAYEXTPROC)load("glDisableVertexArrayEXT"); + glad_glEnableVertexArrayAttribEXT = (PFNGLENABLEVERTEXARRAYATTRIBEXTPROC)load("glEnableVertexArrayAttribEXT"); + glad_glDisableVertexArrayAttribEXT = (PFNGLDISABLEVERTEXARRAYATTRIBEXTPROC)load("glDisableVertexArrayAttribEXT"); + glad_glGetVertexArrayIntegervEXT = (PFNGLGETVERTEXARRAYINTEGERVEXTPROC)load("glGetVertexArrayIntegervEXT"); + glad_glGetVertexArrayPointervEXT = (PFNGLGETVERTEXARRAYPOINTERVEXTPROC)load("glGetVertexArrayPointervEXT"); + glad_glGetVertexArrayIntegeri_vEXT = (PFNGLGETVERTEXARRAYINTEGERI_VEXTPROC)load("glGetVertexArrayIntegeri_vEXT"); + glad_glGetVertexArrayPointeri_vEXT = (PFNGLGETVERTEXARRAYPOINTERI_VEXTPROC)load("glGetVertexArrayPointeri_vEXT"); + glad_glMapNamedBufferRangeEXT = (PFNGLMAPNAMEDBUFFERRANGEEXTPROC)load("glMapNamedBufferRangeEXT"); + glad_glFlushMappedNamedBufferRangeEXT = (PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEEXTPROC)load("glFlushMappedNamedBufferRangeEXT"); + glad_glNamedBufferStorageEXT = (PFNGLNAMEDBUFFERSTORAGEEXTPROC)load("glNamedBufferStorageEXT"); + glad_glClearNamedBufferDataEXT = (PFNGLCLEARNAMEDBUFFERDATAEXTPROC)load("glClearNamedBufferDataEXT"); + glad_glClearNamedBufferSubDataEXT = (PFNGLCLEARNAMEDBUFFERSUBDATAEXTPROC)load("glClearNamedBufferSubDataEXT"); + glad_glNamedFramebufferParameteriEXT = (PFNGLNAMEDFRAMEBUFFERPARAMETERIEXTPROC)load("glNamedFramebufferParameteriEXT"); + glad_glGetNamedFramebufferParameterivEXT = (PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVEXTPROC)load("glGetNamedFramebufferParameterivEXT"); + glad_glProgramUniform1dEXT = (PFNGLPROGRAMUNIFORM1DEXTPROC)load("glProgramUniform1dEXT"); + glad_glProgramUniform2dEXT = (PFNGLPROGRAMUNIFORM2DEXTPROC)load("glProgramUniform2dEXT"); + glad_glProgramUniform3dEXT = (PFNGLPROGRAMUNIFORM3DEXTPROC)load("glProgramUniform3dEXT"); + glad_glProgramUniform4dEXT = (PFNGLPROGRAMUNIFORM4DEXTPROC)load("glProgramUniform4dEXT"); + glad_glProgramUniform1dvEXT = (PFNGLPROGRAMUNIFORM1DVEXTPROC)load("glProgramUniform1dvEXT"); + glad_glProgramUniform2dvEXT = (PFNGLPROGRAMUNIFORM2DVEXTPROC)load("glProgramUniform2dvEXT"); + glad_glProgramUniform3dvEXT = (PFNGLPROGRAMUNIFORM3DVEXTPROC)load("glProgramUniform3dvEXT"); + glad_glProgramUniform4dvEXT = (PFNGLPROGRAMUNIFORM4DVEXTPROC)load("glProgramUniform4dvEXT"); + glad_glProgramUniformMatrix2dvEXT = (PFNGLPROGRAMUNIFORMMATRIX2DVEXTPROC)load("glProgramUniformMatrix2dvEXT"); + glad_glProgramUniformMatrix3dvEXT = (PFNGLPROGRAMUNIFORMMATRIX3DVEXTPROC)load("glProgramUniformMatrix3dvEXT"); + glad_glProgramUniformMatrix4dvEXT = (PFNGLPROGRAMUNIFORMMATRIX4DVEXTPROC)load("glProgramUniformMatrix4dvEXT"); + glad_glProgramUniformMatrix2x3dvEXT = (PFNGLPROGRAMUNIFORMMATRIX2X3DVEXTPROC)load("glProgramUniformMatrix2x3dvEXT"); + glad_glProgramUniformMatrix2x4dvEXT = (PFNGLPROGRAMUNIFORMMATRIX2X4DVEXTPROC)load("glProgramUniformMatrix2x4dvEXT"); + glad_glProgramUniformMatrix3x2dvEXT = (PFNGLPROGRAMUNIFORMMATRIX3X2DVEXTPROC)load("glProgramUniformMatrix3x2dvEXT"); + glad_glProgramUniformMatrix3x4dvEXT = (PFNGLPROGRAMUNIFORMMATRIX3X4DVEXTPROC)load("glProgramUniformMatrix3x4dvEXT"); + glad_glProgramUniformMatrix4x2dvEXT = (PFNGLPROGRAMUNIFORMMATRIX4X2DVEXTPROC)load("glProgramUniformMatrix4x2dvEXT"); + glad_glProgramUniformMatrix4x3dvEXT = (PFNGLPROGRAMUNIFORMMATRIX4X3DVEXTPROC)load("glProgramUniformMatrix4x3dvEXT"); + glad_glTextureBufferRangeEXT = (PFNGLTEXTUREBUFFERRANGEEXTPROC)load("glTextureBufferRangeEXT"); + glad_glTextureStorage1DEXT = (PFNGLTEXTURESTORAGE1DEXTPROC)load("glTextureStorage1DEXT"); + glad_glTextureStorage2DEXT = (PFNGLTEXTURESTORAGE2DEXTPROC)load("glTextureStorage2DEXT"); + glad_glTextureStorage3DEXT = (PFNGLTEXTURESTORAGE3DEXTPROC)load("glTextureStorage3DEXT"); + glad_glTextureStorage2DMultisampleEXT = (PFNGLTEXTURESTORAGE2DMULTISAMPLEEXTPROC)load("glTextureStorage2DMultisampleEXT"); + glad_glTextureStorage3DMultisampleEXT = (PFNGLTEXTURESTORAGE3DMULTISAMPLEEXTPROC)load("glTextureStorage3DMultisampleEXT"); + glad_glVertexArrayBindVertexBufferEXT = (PFNGLVERTEXARRAYBINDVERTEXBUFFEREXTPROC)load("glVertexArrayBindVertexBufferEXT"); + glad_glVertexArrayVertexAttribFormatEXT = (PFNGLVERTEXARRAYVERTEXATTRIBFORMATEXTPROC)load("glVertexArrayVertexAttribFormatEXT"); + glad_glVertexArrayVertexAttribIFormatEXT = (PFNGLVERTEXARRAYVERTEXATTRIBIFORMATEXTPROC)load("glVertexArrayVertexAttribIFormatEXT"); + glad_glVertexArrayVertexAttribLFormatEXT = (PFNGLVERTEXARRAYVERTEXATTRIBLFORMATEXTPROC)load("glVertexArrayVertexAttribLFormatEXT"); + glad_glVertexArrayVertexAttribBindingEXT = (PFNGLVERTEXARRAYVERTEXATTRIBBINDINGEXTPROC)load("glVertexArrayVertexAttribBindingEXT"); + glad_glVertexArrayVertexBindingDivisorEXT = (PFNGLVERTEXARRAYVERTEXBINDINGDIVISOREXTPROC)load("glVertexArrayVertexBindingDivisorEXT"); + glad_glVertexArrayVertexAttribLOffsetEXT = (PFNGLVERTEXARRAYVERTEXATTRIBLOFFSETEXTPROC)load("glVertexArrayVertexAttribLOffsetEXT"); + glad_glTexturePageCommitmentEXT = (PFNGLTEXTUREPAGECOMMITMENTEXTPROC)load("glTexturePageCommitmentEXT"); + glad_glVertexArrayVertexAttribDivisorEXT = (PFNGLVERTEXARRAYVERTEXATTRIBDIVISOREXTPROC)load("glVertexArrayVertexAttribDivisorEXT"); +} +static void load_GL_EXT_draw_buffers2(GLADloadproc load) { + if(!GLAD_GL_EXT_draw_buffers2) return; + glad_glColorMaskIndexedEXT = (PFNGLCOLORMASKINDEXEDEXTPROC)load("glColorMaskIndexedEXT"); + glad_glGetBooleanIndexedvEXT = (PFNGLGETBOOLEANINDEXEDVEXTPROC)load("glGetBooleanIndexedvEXT"); + glad_glGetIntegerIndexedvEXT = (PFNGLGETINTEGERINDEXEDVEXTPROC)load("glGetIntegerIndexedvEXT"); + glad_glEnableIndexedEXT = (PFNGLENABLEINDEXEDEXTPROC)load("glEnableIndexedEXT"); + glad_glDisableIndexedEXT = (PFNGLDISABLEINDEXEDEXTPROC)load("glDisableIndexedEXT"); + glad_glIsEnabledIndexedEXT = (PFNGLISENABLEDINDEXEDEXTPROC)load("glIsEnabledIndexedEXT"); +} +static void load_GL_EXT_draw_instanced(GLADloadproc load) { + if(!GLAD_GL_EXT_draw_instanced) return; + glad_glDrawArraysInstancedEXT = (PFNGLDRAWARRAYSINSTANCEDEXTPROC)load("glDrawArraysInstancedEXT"); + glad_glDrawElementsInstancedEXT = (PFNGLDRAWELEMENTSINSTANCEDEXTPROC)load("glDrawElementsInstancedEXT"); +} +static void load_GL_EXT_draw_range_elements(GLADloadproc load) { + if(!GLAD_GL_EXT_draw_range_elements) return; + glad_glDrawRangeElementsEXT = (PFNGLDRAWRANGEELEMENTSEXTPROC)load("glDrawRangeElementsEXT"); +} +static void load_GL_EXT_external_buffer(GLADloadproc load) { + if(!GLAD_GL_EXT_external_buffer) return; + glad_glBufferStorageExternalEXT = (PFNGLBUFFERSTORAGEEXTERNALEXTPROC)load("glBufferStorageExternalEXT"); + glad_glNamedBufferStorageExternalEXT = (PFNGLNAMEDBUFFERSTORAGEEXTERNALEXTPROC)load("glNamedBufferStorageExternalEXT"); +} +static void load_GL_EXT_fog_coord(GLADloadproc load) { + if(!GLAD_GL_EXT_fog_coord) return; + glad_glFogCoordfEXT = (PFNGLFOGCOORDFEXTPROC)load("glFogCoordfEXT"); + glad_glFogCoordfvEXT = (PFNGLFOGCOORDFVEXTPROC)load("glFogCoordfvEXT"); + glad_glFogCoorddEXT = (PFNGLFOGCOORDDEXTPROC)load("glFogCoorddEXT"); + glad_glFogCoorddvEXT = (PFNGLFOGCOORDDVEXTPROC)load("glFogCoorddvEXT"); + glad_glFogCoordPointerEXT = (PFNGLFOGCOORDPOINTEREXTPROC)load("glFogCoordPointerEXT"); +} +static void load_GL_EXT_fragment_shading_rate(GLADloadproc load) { + if(!GLAD_GL_EXT_fragment_shading_rate) return; + glad_glGetFragmentShadingRatesEXT = (PFNGLGETFRAGMENTSHADINGRATESEXTPROC)load("glGetFragmentShadingRatesEXT"); + glad_glShadingRateEXT = (PFNGLSHADINGRATEEXTPROC)load("glShadingRateEXT"); + glad_glShadingRateCombinerOpsEXT = (PFNGLSHADINGRATECOMBINEROPSEXTPROC)load("glShadingRateCombinerOpsEXT"); + glad_glFramebufferShadingRateEXT = (PFNGLFRAMEBUFFERSHADINGRATEEXTPROC)load("glFramebufferShadingRateEXT"); +} +static void load_GL_EXT_framebuffer_blit(GLADloadproc load) { + if(!GLAD_GL_EXT_framebuffer_blit) return; + glad_glBlitFramebufferEXT = (PFNGLBLITFRAMEBUFFEREXTPROC)load("glBlitFramebufferEXT"); +} +static void load_GL_EXT_framebuffer_blit_layers(GLADloadproc load) { + if(!GLAD_GL_EXT_framebuffer_blit_layers) return; + glad_glBlitFramebufferLayersEXT = (PFNGLBLITFRAMEBUFFERLAYERSEXTPROC)load("glBlitFramebufferLayersEXT"); + glad_glBlitFramebufferLayerEXT = (PFNGLBLITFRAMEBUFFERLAYEREXTPROC)load("glBlitFramebufferLayerEXT"); +} +static void load_GL_EXT_framebuffer_multisample(GLADloadproc load) { + if(!GLAD_GL_EXT_framebuffer_multisample) return; + glad_glRenderbufferStorageMultisampleEXT = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC)load("glRenderbufferStorageMultisampleEXT"); +} +static void load_GL_EXT_framebuffer_object(GLADloadproc load) { + if(!GLAD_GL_EXT_framebuffer_object) return; + glad_glIsRenderbufferEXT = (PFNGLISRENDERBUFFEREXTPROC)load("glIsRenderbufferEXT"); + glad_glBindRenderbufferEXT = (PFNGLBINDRENDERBUFFEREXTPROC)load("glBindRenderbufferEXT"); + glad_glDeleteRenderbuffersEXT = (PFNGLDELETERENDERBUFFERSEXTPROC)load("glDeleteRenderbuffersEXT"); + glad_glGenRenderbuffersEXT = (PFNGLGENRENDERBUFFERSEXTPROC)load("glGenRenderbuffersEXT"); + glad_glRenderbufferStorageEXT = (PFNGLRENDERBUFFERSTORAGEEXTPROC)load("glRenderbufferStorageEXT"); + glad_glGetRenderbufferParameterivEXT = (PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC)load("glGetRenderbufferParameterivEXT"); + glad_glIsFramebufferEXT = (PFNGLISFRAMEBUFFEREXTPROC)load("glIsFramebufferEXT"); + glad_glBindFramebufferEXT = (PFNGLBINDFRAMEBUFFEREXTPROC)load("glBindFramebufferEXT"); + glad_glDeleteFramebuffersEXT = (PFNGLDELETEFRAMEBUFFERSEXTPROC)load("glDeleteFramebuffersEXT"); + glad_glGenFramebuffersEXT = (PFNGLGENFRAMEBUFFERSEXTPROC)load("glGenFramebuffersEXT"); + glad_glCheckFramebufferStatusEXT = (PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC)load("glCheckFramebufferStatusEXT"); + glad_glFramebufferTexture1DEXT = (PFNGLFRAMEBUFFERTEXTURE1DEXTPROC)load("glFramebufferTexture1DEXT"); + glad_glFramebufferTexture2DEXT = (PFNGLFRAMEBUFFERTEXTURE2DEXTPROC)load("glFramebufferTexture2DEXT"); + glad_glFramebufferTexture3DEXT = (PFNGLFRAMEBUFFERTEXTURE3DEXTPROC)load("glFramebufferTexture3DEXT"); + glad_glFramebufferRenderbufferEXT = (PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC)load("glFramebufferRenderbufferEXT"); + glad_glGetFramebufferAttachmentParameterivEXT = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC)load("glGetFramebufferAttachmentParameterivEXT"); + glad_glGenerateMipmapEXT = (PFNGLGENERATEMIPMAPEXTPROC)load("glGenerateMipmapEXT"); +} +static void load_GL_EXT_geometry_shader4(GLADloadproc load) { + if(!GLAD_GL_EXT_geometry_shader4) return; + glad_glProgramParameteriEXT = (PFNGLPROGRAMPARAMETERIEXTPROC)load("glProgramParameteriEXT"); +} +static void load_GL_EXT_gpu_program_parameters(GLADloadproc load) { + if(!GLAD_GL_EXT_gpu_program_parameters) return; + glad_glProgramEnvParameters4fvEXT = (PFNGLPROGRAMENVPARAMETERS4FVEXTPROC)load("glProgramEnvParameters4fvEXT"); + glad_glProgramLocalParameters4fvEXT = (PFNGLPROGRAMLOCALPARAMETERS4FVEXTPROC)load("glProgramLocalParameters4fvEXT"); +} +static void load_GL_EXT_gpu_shader4(GLADloadproc load) { + if(!GLAD_GL_EXT_gpu_shader4) return; + glad_glGetUniformuivEXT = (PFNGLGETUNIFORMUIVEXTPROC)load("glGetUniformuivEXT"); + glad_glBindFragDataLocationEXT = (PFNGLBINDFRAGDATALOCATIONEXTPROC)load("glBindFragDataLocationEXT"); + glad_glGetFragDataLocationEXT = (PFNGLGETFRAGDATALOCATIONEXTPROC)load("glGetFragDataLocationEXT"); + glad_glUniform1uiEXT = (PFNGLUNIFORM1UIEXTPROC)load("glUniform1uiEXT"); + glad_glUniform2uiEXT = (PFNGLUNIFORM2UIEXTPROC)load("glUniform2uiEXT"); + glad_glUniform3uiEXT = (PFNGLUNIFORM3UIEXTPROC)load("glUniform3uiEXT"); + glad_glUniform4uiEXT = (PFNGLUNIFORM4UIEXTPROC)load("glUniform4uiEXT"); + glad_glUniform1uivEXT = (PFNGLUNIFORM1UIVEXTPROC)load("glUniform1uivEXT"); + glad_glUniform2uivEXT = (PFNGLUNIFORM2UIVEXTPROC)load("glUniform2uivEXT"); + glad_glUniform3uivEXT = (PFNGLUNIFORM3UIVEXTPROC)load("glUniform3uivEXT"); + glad_glUniform4uivEXT = (PFNGLUNIFORM4UIVEXTPROC)load("glUniform4uivEXT"); + glad_glVertexAttribI1iEXT = (PFNGLVERTEXATTRIBI1IEXTPROC)load("glVertexAttribI1iEXT"); + glad_glVertexAttribI2iEXT = (PFNGLVERTEXATTRIBI2IEXTPROC)load("glVertexAttribI2iEXT"); + glad_glVertexAttribI3iEXT = (PFNGLVERTEXATTRIBI3IEXTPROC)load("glVertexAttribI3iEXT"); + glad_glVertexAttribI4iEXT = (PFNGLVERTEXATTRIBI4IEXTPROC)load("glVertexAttribI4iEXT"); + glad_glVertexAttribI1uiEXT = (PFNGLVERTEXATTRIBI1UIEXTPROC)load("glVertexAttribI1uiEXT"); + glad_glVertexAttribI2uiEXT = (PFNGLVERTEXATTRIBI2UIEXTPROC)load("glVertexAttribI2uiEXT"); + glad_glVertexAttribI3uiEXT = (PFNGLVERTEXATTRIBI3UIEXTPROC)load("glVertexAttribI3uiEXT"); + glad_glVertexAttribI4uiEXT = (PFNGLVERTEXATTRIBI4UIEXTPROC)load("glVertexAttribI4uiEXT"); + glad_glVertexAttribI1ivEXT = (PFNGLVERTEXATTRIBI1IVEXTPROC)load("glVertexAttribI1ivEXT"); + glad_glVertexAttribI2ivEXT = (PFNGLVERTEXATTRIBI2IVEXTPROC)load("glVertexAttribI2ivEXT"); + glad_glVertexAttribI3ivEXT = (PFNGLVERTEXATTRIBI3IVEXTPROC)load("glVertexAttribI3ivEXT"); + glad_glVertexAttribI4ivEXT = (PFNGLVERTEXATTRIBI4IVEXTPROC)load("glVertexAttribI4ivEXT"); + glad_glVertexAttribI1uivEXT = (PFNGLVERTEXATTRIBI1UIVEXTPROC)load("glVertexAttribI1uivEXT"); + glad_glVertexAttribI2uivEXT = (PFNGLVERTEXATTRIBI2UIVEXTPROC)load("glVertexAttribI2uivEXT"); + glad_glVertexAttribI3uivEXT = (PFNGLVERTEXATTRIBI3UIVEXTPROC)load("glVertexAttribI3uivEXT"); + glad_glVertexAttribI4uivEXT = (PFNGLVERTEXATTRIBI4UIVEXTPROC)load("glVertexAttribI4uivEXT"); + glad_glVertexAttribI4bvEXT = (PFNGLVERTEXATTRIBI4BVEXTPROC)load("glVertexAttribI4bvEXT"); + glad_glVertexAttribI4svEXT = (PFNGLVERTEXATTRIBI4SVEXTPROC)load("glVertexAttribI4svEXT"); + glad_glVertexAttribI4ubvEXT = (PFNGLVERTEXATTRIBI4UBVEXTPROC)load("glVertexAttribI4ubvEXT"); + glad_glVertexAttribI4usvEXT = (PFNGLVERTEXATTRIBI4USVEXTPROC)load("glVertexAttribI4usvEXT"); + glad_glVertexAttribIPointerEXT = (PFNGLVERTEXATTRIBIPOINTEREXTPROC)load("glVertexAttribIPointerEXT"); + glad_glGetVertexAttribIivEXT = (PFNGLGETVERTEXATTRIBIIVEXTPROC)load("glGetVertexAttribIivEXT"); + glad_glGetVertexAttribIuivEXT = (PFNGLGETVERTEXATTRIBIUIVEXTPROC)load("glGetVertexAttribIuivEXT"); +} +static void load_GL_EXT_histogram(GLADloadproc load) { + if(!GLAD_GL_EXT_histogram) return; + glad_glGetHistogramEXT = (PFNGLGETHISTOGRAMEXTPROC)load("glGetHistogramEXT"); + glad_glGetHistogramParameterfvEXT = (PFNGLGETHISTOGRAMPARAMETERFVEXTPROC)load("glGetHistogramParameterfvEXT"); + glad_glGetHistogramParameterivEXT = (PFNGLGETHISTOGRAMPARAMETERIVEXTPROC)load("glGetHistogramParameterivEXT"); + glad_glGetMinmaxEXT = (PFNGLGETMINMAXEXTPROC)load("glGetMinmaxEXT"); + glad_glGetMinmaxParameterfvEXT = (PFNGLGETMINMAXPARAMETERFVEXTPROC)load("glGetMinmaxParameterfvEXT"); + glad_glGetMinmaxParameterivEXT = (PFNGLGETMINMAXPARAMETERIVEXTPROC)load("glGetMinmaxParameterivEXT"); + glad_glHistogramEXT = (PFNGLHISTOGRAMEXTPROC)load("glHistogramEXT"); + glad_glMinmaxEXT = (PFNGLMINMAXEXTPROC)load("glMinmaxEXT"); + glad_glResetHistogramEXT = (PFNGLRESETHISTOGRAMEXTPROC)load("glResetHistogramEXT"); + glad_glResetMinmaxEXT = (PFNGLRESETMINMAXEXTPROC)load("glResetMinmaxEXT"); +} +static void load_GL_EXT_index_func(GLADloadproc load) { + if(!GLAD_GL_EXT_index_func) return; + glad_glIndexFuncEXT = (PFNGLINDEXFUNCEXTPROC)load("glIndexFuncEXT"); +} +static void load_GL_EXT_index_material(GLADloadproc load) { + if(!GLAD_GL_EXT_index_material) return; + glad_glIndexMaterialEXT = (PFNGLINDEXMATERIALEXTPROC)load("glIndexMaterialEXT"); +} +static void load_GL_EXT_light_texture(GLADloadproc load) { + if(!GLAD_GL_EXT_light_texture) return; + glad_glApplyTextureEXT = (PFNGLAPPLYTEXTUREEXTPROC)load("glApplyTextureEXT"); + glad_glTextureLightEXT = (PFNGLTEXTURELIGHTEXTPROC)load("glTextureLightEXT"); + glad_glTextureMaterialEXT = (PFNGLTEXTUREMATERIALEXTPROC)load("glTextureMaterialEXT"); +} +static void load_GL_EXT_memory_object(GLADloadproc load) { + if(!GLAD_GL_EXT_memory_object) return; + glad_glGetUnsignedBytevEXT = (PFNGLGETUNSIGNEDBYTEVEXTPROC)load("glGetUnsignedBytevEXT"); + glad_glGetUnsignedBytei_vEXT = (PFNGLGETUNSIGNEDBYTEI_VEXTPROC)load("glGetUnsignedBytei_vEXT"); + glad_glDeleteMemoryObjectsEXT = (PFNGLDELETEMEMORYOBJECTSEXTPROC)load("glDeleteMemoryObjectsEXT"); + glad_glIsMemoryObjectEXT = (PFNGLISMEMORYOBJECTEXTPROC)load("glIsMemoryObjectEXT"); + glad_glCreateMemoryObjectsEXT = (PFNGLCREATEMEMORYOBJECTSEXTPROC)load("glCreateMemoryObjectsEXT"); + glad_glMemoryObjectParameterivEXT = (PFNGLMEMORYOBJECTPARAMETERIVEXTPROC)load("glMemoryObjectParameterivEXT"); + glad_glGetMemoryObjectParameterivEXT = (PFNGLGETMEMORYOBJECTPARAMETERIVEXTPROC)load("glGetMemoryObjectParameterivEXT"); + glad_glTexStorageMem2DEXT = (PFNGLTEXSTORAGEMEM2DEXTPROC)load("glTexStorageMem2DEXT"); + glad_glTexStorageMem2DMultisampleEXT = (PFNGLTEXSTORAGEMEM2DMULTISAMPLEEXTPROC)load("glTexStorageMem2DMultisampleEXT"); + glad_glTexStorageMem3DEXT = (PFNGLTEXSTORAGEMEM3DEXTPROC)load("glTexStorageMem3DEXT"); + glad_glTexStorageMem3DMultisampleEXT = (PFNGLTEXSTORAGEMEM3DMULTISAMPLEEXTPROC)load("glTexStorageMem3DMultisampleEXT"); + glad_glBufferStorageMemEXT = (PFNGLBUFFERSTORAGEMEMEXTPROC)load("glBufferStorageMemEXT"); + glad_glTextureStorageMem2DEXT = (PFNGLTEXTURESTORAGEMEM2DEXTPROC)load("glTextureStorageMem2DEXT"); + glad_glTextureStorageMem2DMultisampleEXT = (PFNGLTEXTURESTORAGEMEM2DMULTISAMPLEEXTPROC)load("glTextureStorageMem2DMultisampleEXT"); + glad_glTextureStorageMem3DEXT = (PFNGLTEXTURESTORAGEMEM3DEXTPROC)load("glTextureStorageMem3DEXT"); + glad_glTextureStorageMem3DMultisampleEXT = (PFNGLTEXTURESTORAGEMEM3DMULTISAMPLEEXTPROC)load("glTextureStorageMem3DMultisampleEXT"); + glad_glNamedBufferStorageMemEXT = (PFNGLNAMEDBUFFERSTORAGEMEMEXTPROC)load("glNamedBufferStorageMemEXT"); + glad_glTexStorageMem1DEXT = (PFNGLTEXSTORAGEMEM1DEXTPROC)load("glTexStorageMem1DEXT"); + glad_glTextureStorageMem1DEXT = (PFNGLTEXTURESTORAGEMEM1DEXTPROC)load("glTextureStorageMem1DEXT"); +} +static void load_GL_EXT_memory_object_fd(GLADloadproc load) { + if(!GLAD_GL_EXT_memory_object_fd) return; + glad_glImportMemoryFdEXT = (PFNGLIMPORTMEMORYFDEXTPROC)load("glImportMemoryFdEXT"); +} +static void load_GL_EXT_memory_object_win32(GLADloadproc load) { + if(!GLAD_GL_EXT_memory_object_win32) return; + glad_glImportMemoryWin32HandleEXT = (PFNGLIMPORTMEMORYWIN32HANDLEEXTPROC)load("glImportMemoryWin32HandleEXT"); + glad_glImportMemoryWin32NameEXT = (PFNGLIMPORTMEMORYWIN32NAMEEXTPROC)load("glImportMemoryWin32NameEXT"); +} +static void load_GL_EXT_mesh_shader(GLADloadproc load) { + if(!GLAD_GL_EXT_mesh_shader) return; + glad_glDrawMeshTasksEXT = (PFNGLDRAWMESHTASKSEXTPROC)load("glDrawMeshTasksEXT"); + glad_glDrawMeshTasksIndirectEXT = (PFNGLDRAWMESHTASKSINDIRECTEXTPROC)load("glDrawMeshTasksIndirectEXT"); + glad_glMultiDrawMeshTasksIndirectEXT = (PFNGLMULTIDRAWMESHTASKSINDIRECTEXTPROC)load("glMultiDrawMeshTasksIndirectEXT"); + glad_glMultiDrawMeshTasksIndirectCountEXT = (PFNGLMULTIDRAWMESHTASKSINDIRECTCOUNTEXTPROC)load("glMultiDrawMeshTasksIndirectCountEXT"); +} +static void load_GL_EXT_multi_draw_arrays(GLADloadproc load) { + if(!GLAD_GL_EXT_multi_draw_arrays) return; + glad_glMultiDrawArraysEXT = (PFNGLMULTIDRAWARRAYSEXTPROC)load("glMultiDrawArraysEXT"); + glad_glMultiDrawElementsEXT = (PFNGLMULTIDRAWELEMENTSEXTPROC)load("glMultiDrawElementsEXT"); +} +static void load_GL_EXT_multisample(GLADloadproc load) { + if(!GLAD_GL_EXT_multisample) return; + glad_glSampleMaskEXT = (PFNGLSAMPLEMASKEXTPROC)load("glSampleMaskEXT"); + glad_glSamplePatternEXT = (PFNGLSAMPLEPATTERNEXTPROC)load("glSamplePatternEXT"); +} +static void load_GL_EXT_paletted_texture(GLADloadproc load) { + if(!GLAD_GL_EXT_paletted_texture) return; + glad_glColorTableEXT = (PFNGLCOLORTABLEEXTPROC)load("glColorTableEXT"); + glad_glGetColorTableEXT = (PFNGLGETCOLORTABLEEXTPROC)load("glGetColorTableEXT"); + glad_glGetColorTableParameterivEXT = (PFNGLGETCOLORTABLEPARAMETERIVEXTPROC)load("glGetColorTableParameterivEXT"); + glad_glGetColorTableParameterfvEXT = (PFNGLGETCOLORTABLEPARAMETERFVEXTPROC)load("glGetColorTableParameterfvEXT"); +} +static void load_GL_EXT_pixel_transform(GLADloadproc load) { + if(!GLAD_GL_EXT_pixel_transform) return; + glad_glPixelTransformParameteriEXT = (PFNGLPIXELTRANSFORMPARAMETERIEXTPROC)load("glPixelTransformParameteriEXT"); + glad_glPixelTransformParameterfEXT = (PFNGLPIXELTRANSFORMPARAMETERFEXTPROC)load("glPixelTransformParameterfEXT"); + glad_glPixelTransformParameterivEXT = (PFNGLPIXELTRANSFORMPARAMETERIVEXTPROC)load("glPixelTransformParameterivEXT"); + glad_glPixelTransformParameterfvEXT = (PFNGLPIXELTRANSFORMPARAMETERFVEXTPROC)load("glPixelTransformParameterfvEXT"); + glad_glGetPixelTransformParameterivEXT = (PFNGLGETPIXELTRANSFORMPARAMETERIVEXTPROC)load("glGetPixelTransformParameterivEXT"); + glad_glGetPixelTransformParameterfvEXT = (PFNGLGETPIXELTRANSFORMPARAMETERFVEXTPROC)load("glGetPixelTransformParameterfvEXT"); +} +static void load_GL_EXT_point_parameters(GLADloadproc load) { + if(!GLAD_GL_EXT_point_parameters) return; + glad_glPointParameterfEXT = (PFNGLPOINTPARAMETERFEXTPROC)load("glPointParameterfEXT"); + glad_glPointParameterfvEXT = (PFNGLPOINTPARAMETERFVEXTPROC)load("glPointParameterfvEXT"); +} +static void load_GL_EXT_polygon_offset(GLADloadproc load) { + if(!GLAD_GL_EXT_polygon_offset) return; + glad_glPolygonOffsetEXT = (PFNGLPOLYGONOFFSETEXTPROC)load("glPolygonOffsetEXT"); +} +static void load_GL_EXT_polygon_offset_clamp(GLADloadproc load) { + if(!GLAD_GL_EXT_polygon_offset_clamp) return; + glad_glPolygonOffsetClampEXT = (PFNGLPOLYGONOFFSETCLAMPEXTPROC)load("glPolygonOffsetClampEXT"); +} +static void load_GL_EXT_provoking_vertex(GLADloadproc load) { + if(!GLAD_GL_EXT_provoking_vertex) return; + glad_glProvokingVertexEXT = (PFNGLPROVOKINGVERTEXEXTPROC)load("glProvokingVertexEXT"); +} +static void load_GL_EXT_raster_multisample(GLADloadproc load) { + if(!GLAD_GL_EXT_raster_multisample) return; + glad_glRasterSamplesEXT = (PFNGLRASTERSAMPLESEXTPROC)load("glRasterSamplesEXT"); +} +static void load_GL_EXT_secondary_color(GLADloadproc load) { + if(!GLAD_GL_EXT_secondary_color) return; + glad_glSecondaryColor3bEXT = (PFNGLSECONDARYCOLOR3BEXTPROC)load("glSecondaryColor3bEXT"); + glad_glSecondaryColor3bvEXT = (PFNGLSECONDARYCOLOR3BVEXTPROC)load("glSecondaryColor3bvEXT"); + glad_glSecondaryColor3dEXT = (PFNGLSECONDARYCOLOR3DEXTPROC)load("glSecondaryColor3dEXT"); + glad_glSecondaryColor3dvEXT = (PFNGLSECONDARYCOLOR3DVEXTPROC)load("glSecondaryColor3dvEXT"); + glad_glSecondaryColor3fEXT = (PFNGLSECONDARYCOLOR3FEXTPROC)load("glSecondaryColor3fEXT"); + glad_glSecondaryColor3fvEXT = (PFNGLSECONDARYCOLOR3FVEXTPROC)load("glSecondaryColor3fvEXT"); + glad_glSecondaryColor3iEXT = (PFNGLSECONDARYCOLOR3IEXTPROC)load("glSecondaryColor3iEXT"); + glad_glSecondaryColor3ivEXT = (PFNGLSECONDARYCOLOR3IVEXTPROC)load("glSecondaryColor3ivEXT"); + glad_glSecondaryColor3sEXT = (PFNGLSECONDARYCOLOR3SEXTPROC)load("glSecondaryColor3sEXT"); + glad_glSecondaryColor3svEXT = (PFNGLSECONDARYCOLOR3SVEXTPROC)load("glSecondaryColor3svEXT"); + glad_glSecondaryColor3ubEXT = (PFNGLSECONDARYCOLOR3UBEXTPROC)load("glSecondaryColor3ubEXT"); + glad_glSecondaryColor3ubvEXT = (PFNGLSECONDARYCOLOR3UBVEXTPROC)load("glSecondaryColor3ubvEXT"); + glad_glSecondaryColor3uiEXT = (PFNGLSECONDARYCOLOR3UIEXTPROC)load("glSecondaryColor3uiEXT"); + glad_glSecondaryColor3uivEXT = (PFNGLSECONDARYCOLOR3UIVEXTPROC)load("glSecondaryColor3uivEXT"); + glad_glSecondaryColor3usEXT = (PFNGLSECONDARYCOLOR3USEXTPROC)load("glSecondaryColor3usEXT"); + glad_glSecondaryColor3usvEXT = (PFNGLSECONDARYCOLOR3USVEXTPROC)load("glSecondaryColor3usvEXT"); + glad_glSecondaryColorPointerEXT = (PFNGLSECONDARYCOLORPOINTEREXTPROC)load("glSecondaryColorPointerEXT"); +} +static void load_GL_EXT_semaphore(GLADloadproc load) { + if(!GLAD_GL_EXT_semaphore) return; + glad_glGetUnsignedBytevEXT = (PFNGLGETUNSIGNEDBYTEVEXTPROC)load("glGetUnsignedBytevEXT"); + glad_glGetUnsignedBytei_vEXT = (PFNGLGETUNSIGNEDBYTEI_VEXTPROC)load("glGetUnsignedBytei_vEXT"); + glad_glGenSemaphoresEXT = (PFNGLGENSEMAPHORESEXTPROC)load("glGenSemaphoresEXT"); + glad_glDeleteSemaphoresEXT = (PFNGLDELETESEMAPHORESEXTPROC)load("glDeleteSemaphoresEXT"); + glad_glIsSemaphoreEXT = (PFNGLISSEMAPHOREEXTPROC)load("glIsSemaphoreEXT"); + glad_glSemaphoreParameterui64vEXT = (PFNGLSEMAPHOREPARAMETERUI64VEXTPROC)load("glSemaphoreParameterui64vEXT"); + glad_glGetSemaphoreParameterui64vEXT = (PFNGLGETSEMAPHOREPARAMETERUI64VEXTPROC)load("glGetSemaphoreParameterui64vEXT"); + glad_glWaitSemaphoreEXT = (PFNGLWAITSEMAPHOREEXTPROC)load("glWaitSemaphoreEXT"); + glad_glSignalSemaphoreEXT = (PFNGLSIGNALSEMAPHOREEXTPROC)load("glSignalSemaphoreEXT"); +} +static void load_GL_EXT_semaphore_fd(GLADloadproc load) { + if(!GLAD_GL_EXT_semaphore_fd) return; + glad_glImportSemaphoreFdEXT = (PFNGLIMPORTSEMAPHOREFDEXTPROC)load("glImportSemaphoreFdEXT"); +} +static void load_GL_EXT_semaphore_win32(GLADloadproc load) { + if(!GLAD_GL_EXT_semaphore_win32) return; + glad_glImportSemaphoreWin32HandleEXT = (PFNGLIMPORTSEMAPHOREWIN32HANDLEEXTPROC)load("glImportSemaphoreWin32HandleEXT"); + glad_glImportSemaphoreWin32NameEXT = (PFNGLIMPORTSEMAPHOREWIN32NAMEEXTPROC)load("glImportSemaphoreWin32NameEXT"); +} +static void load_GL_EXT_separate_shader_objects(GLADloadproc load) { + if(!GLAD_GL_EXT_separate_shader_objects) return; + glad_glUseShaderProgramEXT = (PFNGLUSESHADERPROGRAMEXTPROC)load("glUseShaderProgramEXT"); + glad_glActiveProgramEXT = (PFNGLACTIVEPROGRAMEXTPROC)load("glActiveProgramEXT"); + glad_glCreateShaderProgramEXT = (PFNGLCREATESHADERPROGRAMEXTPROC)load("glCreateShaderProgramEXT"); + glad_glActiveShaderProgramEXT = (PFNGLACTIVESHADERPROGRAMEXTPROC)load("glActiveShaderProgramEXT"); + glad_glBindProgramPipelineEXT = (PFNGLBINDPROGRAMPIPELINEEXTPROC)load("glBindProgramPipelineEXT"); + glad_glCreateShaderProgramvEXT = (PFNGLCREATESHADERPROGRAMVEXTPROC)load("glCreateShaderProgramvEXT"); + glad_glDeleteProgramPipelinesEXT = (PFNGLDELETEPROGRAMPIPELINESEXTPROC)load("glDeleteProgramPipelinesEXT"); + glad_glGenProgramPipelinesEXT = (PFNGLGENPROGRAMPIPELINESEXTPROC)load("glGenProgramPipelinesEXT"); + glad_glGetProgramPipelineInfoLogEXT = (PFNGLGETPROGRAMPIPELINEINFOLOGEXTPROC)load("glGetProgramPipelineInfoLogEXT"); + glad_glGetProgramPipelineivEXT = (PFNGLGETPROGRAMPIPELINEIVEXTPROC)load("glGetProgramPipelineivEXT"); + glad_glIsProgramPipelineEXT = (PFNGLISPROGRAMPIPELINEEXTPROC)load("glIsProgramPipelineEXT"); + glad_glProgramParameteriEXT = (PFNGLPROGRAMPARAMETERIEXTPROC)load("glProgramParameteriEXT"); + glad_glProgramUniform1fEXT = (PFNGLPROGRAMUNIFORM1FEXTPROC)load("glProgramUniform1fEXT"); + glad_glProgramUniform1fvEXT = (PFNGLPROGRAMUNIFORM1FVEXTPROC)load("glProgramUniform1fvEXT"); + glad_glProgramUniform1iEXT = (PFNGLPROGRAMUNIFORM1IEXTPROC)load("glProgramUniform1iEXT"); + glad_glProgramUniform1ivEXT = (PFNGLPROGRAMUNIFORM1IVEXTPROC)load("glProgramUniform1ivEXT"); + glad_glProgramUniform2fEXT = (PFNGLPROGRAMUNIFORM2FEXTPROC)load("glProgramUniform2fEXT"); + glad_glProgramUniform2fvEXT = (PFNGLPROGRAMUNIFORM2FVEXTPROC)load("glProgramUniform2fvEXT"); + glad_glProgramUniform2iEXT = (PFNGLPROGRAMUNIFORM2IEXTPROC)load("glProgramUniform2iEXT"); + glad_glProgramUniform2ivEXT = (PFNGLPROGRAMUNIFORM2IVEXTPROC)load("glProgramUniform2ivEXT"); + glad_glProgramUniform3fEXT = (PFNGLPROGRAMUNIFORM3FEXTPROC)load("glProgramUniform3fEXT"); + glad_glProgramUniform3fvEXT = (PFNGLPROGRAMUNIFORM3FVEXTPROC)load("glProgramUniform3fvEXT"); + glad_glProgramUniform3iEXT = (PFNGLPROGRAMUNIFORM3IEXTPROC)load("glProgramUniform3iEXT"); + glad_glProgramUniform3ivEXT = (PFNGLPROGRAMUNIFORM3IVEXTPROC)load("glProgramUniform3ivEXT"); + glad_glProgramUniform4fEXT = (PFNGLPROGRAMUNIFORM4FEXTPROC)load("glProgramUniform4fEXT"); + glad_glProgramUniform4fvEXT = (PFNGLPROGRAMUNIFORM4FVEXTPROC)load("glProgramUniform4fvEXT"); + glad_glProgramUniform4iEXT = (PFNGLPROGRAMUNIFORM4IEXTPROC)load("glProgramUniform4iEXT"); + glad_glProgramUniform4ivEXT = (PFNGLPROGRAMUNIFORM4IVEXTPROC)load("glProgramUniform4ivEXT"); + glad_glProgramUniformMatrix2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC)load("glProgramUniformMatrix2fvEXT"); + glad_glProgramUniformMatrix3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC)load("glProgramUniformMatrix3fvEXT"); + glad_glProgramUniformMatrix4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC)load("glProgramUniformMatrix4fvEXT"); + glad_glUseProgramStagesEXT = (PFNGLUSEPROGRAMSTAGESEXTPROC)load("glUseProgramStagesEXT"); + glad_glValidateProgramPipelineEXT = (PFNGLVALIDATEPROGRAMPIPELINEEXTPROC)load("glValidateProgramPipelineEXT"); + glad_glProgramUniform1uiEXT = (PFNGLPROGRAMUNIFORM1UIEXTPROC)load("glProgramUniform1uiEXT"); + glad_glProgramUniform2uiEXT = (PFNGLPROGRAMUNIFORM2UIEXTPROC)load("glProgramUniform2uiEXT"); + glad_glProgramUniform3uiEXT = (PFNGLPROGRAMUNIFORM3UIEXTPROC)load("glProgramUniform3uiEXT"); + glad_glProgramUniform4uiEXT = (PFNGLPROGRAMUNIFORM4UIEXTPROC)load("glProgramUniform4uiEXT"); + glad_glProgramUniform1uivEXT = (PFNGLPROGRAMUNIFORM1UIVEXTPROC)load("glProgramUniform1uivEXT"); + glad_glProgramUniform2uivEXT = (PFNGLPROGRAMUNIFORM2UIVEXTPROC)load("glProgramUniform2uivEXT"); + glad_glProgramUniform3uivEXT = (PFNGLPROGRAMUNIFORM3UIVEXTPROC)load("glProgramUniform3uivEXT"); + glad_glProgramUniform4uivEXT = (PFNGLPROGRAMUNIFORM4UIVEXTPROC)load("glProgramUniform4uivEXT"); + glad_glProgramUniformMatrix2x3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC)load("glProgramUniformMatrix2x3fvEXT"); + glad_glProgramUniformMatrix3x2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC)load("glProgramUniformMatrix3x2fvEXT"); + glad_glProgramUniformMatrix2x4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC)load("glProgramUniformMatrix2x4fvEXT"); + glad_glProgramUniformMatrix4x2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC)load("glProgramUniformMatrix4x2fvEXT"); + glad_glProgramUniformMatrix3x4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC)load("glProgramUniformMatrix3x4fvEXT"); + glad_glProgramUniformMatrix4x3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC)load("glProgramUniformMatrix4x3fvEXT"); +} +static void load_GL_EXT_shader_framebuffer_fetch_non_coherent(GLADloadproc load) { + if(!GLAD_GL_EXT_shader_framebuffer_fetch_non_coherent) return; + glad_glFramebufferFetchBarrierEXT = (PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC)load("glFramebufferFetchBarrierEXT"); +} +static void load_GL_EXT_shader_image_load_store(GLADloadproc load) { + if(!GLAD_GL_EXT_shader_image_load_store) return; + glad_glBindImageTextureEXT = (PFNGLBINDIMAGETEXTUREEXTPROC)load("glBindImageTextureEXT"); + glad_glMemoryBarrierEXT = (PFNGLMEMORYBARRIEREXTPROC)load("glMemoryBarrierEXT"); +} +static void load_GL_EXT_stencil_clear_tag(GLADloadproc load) { + if(!GLAD_GL_EXT_stencil_clear_tag) return; + glad_glStencilClearTagEXT = (PFNGLSTENCILCLEARTAGEXTPROC)load("glStencilClearTagEXT"); +} +static void load_GL_EXT_stencil_two_side(GLADloadproc load) { + if(!GLAD_GL_EXT_stencil_two_side) return; + glad_glActiveStencilFaceEXT = (PFNGLACTIVESTENCILFACEEXTPROC)load("glActiveStencilFaceEXT"); +} +static void load_GL_EXT_subtexture(GLADloadproc load) { + if(!GLAD_GL_EXT_subtexture) return; + glad_glTexSubImage1DEXT = (PFNGLTEXSUBIMAGE1DEXTPROC)load("glTexSubImage1DEXT"); + glad_glTexSubImage2DEXT = (PFNGLTEXSUBIMAGE2DEXTPROC)load("glTexSubImage2DEXT"); +} +static void load_GL_EXT_texture3D(GLADloadproc load) { + if(!GLAD_GL_EXT_texture3D) return; + glad_glTexImage3DEXT = (PFNGLTEXIMAGE3DEXTPROC)load("glTexImage3DEXT"); + glad_glTexSubImage3DEXT = (PFNGLTEXSUBIMAGE3DEXTPROC)load("glTexSubImage3DEXT"); +} +static void load_GL_EXT_texture_array(GLADloadproc load) { + if(!GLAD_GL_EXT_texture_array) return; + glad_glFramebufferTextureLayerEXT = (PFNGLFRAMEBUFFERTEXTURELAYEREXTPROC)load("glFramebufferTextureLayerEXT"); +} +static void load_GL_EXT_texture_buffer_object(GLADloadproc load) { + if(!GLAD_GL_EXT_texture_buffer_object) return; + glad_glTexBufferEXT = (PFNGLTEXBUFFEREXTPROC)load("glTexBufferEXT"); +} +static void load_GL_EXT_texture_integer(GLADloadproc load) { + if(!GLAD_GL_EXT_texture_integer) return; + glad_glTexParameterIivEXT = (PFNGLTEXPARAMETERIIVEXTPROC)load("glTexParameterIivEXT"); + glad_glTexParameterIuivEXT = (PFNGLTEXPARAMETERIUIVEXTPROC)load("glTexParameterIuivEXT"); + glad_glGetTexParameterIivEXT = (PFNGLGETTEXPARAMETERIIVEXTPROC)load("glGetTexParameterIivEXT"); + glad_glGetTexParameterIuivEXT = (PFNGLGETTEXPARAMETERIUIVEXTPROC)load("glGetTexParameterIuivEXT"); + glad_glClearColorIiEXT = (PFNGLCLEARCOLORIIEXTPROC)load("glClearColorIiEXT"); + glad_glClearColorIuiEXT = (PFNGLCLEARCOLORIUIEXTPROC)load("glClearColorIuiEXT"); +} +static void load_GL_EXT_texture_object(GLADloadproc load) { + if(!GLAD_GL_EXT_texture_object) return; + glad_glAreTexturesResidentEXT = (PFNGLARETEXTURESRESIDENTEXTPROC)load("glAreTexturesResidentEXT"); + glad_glBindTextureEXT = (PFNGLBINDTEXTUREEXTPROC)load("glBindTextureEXT"); + glad_glDeleteTexturesEXT = (PFNGLDELETETEXTURESEXTPROC)load("glDeleteTexturesEXT"); + glad_glGenTexturesEXT = (PFNGLGENTEXTURESEXTPROC)load("glGenTexturesEXT"); + glad_glIsTextureEXT = (PFNGLISTEXTUREEXTPROC)load("glIsTextureEXT"); + glad_glPrioritizeTexturesEXT = (PFNGLPRIORITIZETEXTURESEXTPROC)load("glPrioritizeTexturesEXT"); +} +static void load_GL_EXT_texture_perturb_normal(GLADloadproc load) { + if(!GLAD_GL_EXT_texture_perturb_normal) return; + glad_glTextureNormalEXT = (PFNGLTEXTURENORMALEXTPROC)load("glTextureNormalEXT"); +} +static void load_GL_EXT_texture_storage(GLADloadproc load) { + if(!GLAD_GL_EXT_texture_storage) return; + glad_glTexStorage1DEXT = (PFNGLTEXSTORAGE1DEXTPROC)load("glTexStorage1DEXT"); + glad_glTexStorage2DEXT = (PFNGLTEXSTORAGE2DEXTPROC)load("glTexStorage2DEXT"); + glad_glTexStorage3DEXT = (PFNGLTEXSTORAGE3DEXTPROC)load("glTexStorage3DEXT"); + glad_glTextureStorage1DEXT = (PFNGLTEXTURESTORAGE1DEXTPROC)load("glTextureStorage1DEXT"); + glad_glTextureStorage2DEXT = (PFNGLTEXTURESTORAGE2DEXTPROC)load("glTextureStorage2DEXT"); + glad_glTextureStorage3DEXT = (PFNGLTEXTURESTORAGE3DEXTPROC)load("glTextureStorage3DEXT"); +} +static void load_GL_EXT_timer_query(GLADloadproc load) { + if(!GLAD_GL_EXT_timer_query) return; + glad_glGetQueryObjecti64vEXT = (PFNGLGETQUERYOBJECTI64VEXTPROC)load("glGetQueryObjecti64vEXT"); + glad_glGetQueryObjectui64vEXT = (PFNGLGETQUERYOBJECTUI64VEXTPROC)load("glGetQueryObjectui64vEXT"); +} +static void load_GL_EXT_transform_feedback(GLADloadproc load) { + if(!GLAD_GL_EXT_transform_feedback) return; + glad_glBeginTransformFeedbackEXT = (PFNGLBEGINTRANSFORMFEEDBACKEXTPROC)load("glBeginTransformFeedbackEXT"); + glad_glEndTransformFeedbackEXT = (PFNGLENDTRANSFORMFEEDBACKEXTPROC)load("glEndTransformFeedbackEXT"); + glad_glBindBufferRangeEXT = (PFNGLBINDBUFFERRANGEEXTPROC)load("glBindBufferRangeEXT"); + glad_glBindBufferOffsetEXT = (PFNGLBINDBUFFEROFFSETEXTPROC)load("glBindBufferOffsetEXT"); + glad_glBindBufferBaseEXT = (PFNGLBINDBUFFERBASEEXTPROC)load("glBindBufferBaseEXT"); + glad_glTransformFeedbackVaryingsEXT = (PFNGLTRANSFORMFEEDBACKVARYINGSEXTPROC)load("glTransformFeedbackVaryingsEXT"); + glad_glGetTransformFeedbackVaryingEXT = (PFNGLGETTRANSFORMFEEDBACKVARYINGEXTPROC)load("glGetTransformFeedbackVaryingEXT"); +} +static void load_GL_EXT_vertex_array(GLADloadproc load) { + if(!GLAD_GL_EXT_vertex_array) return; + glad_glArrayElementEXT = (PFNGLARRAYELEMENTEXTPROC)load("glArrayElementEXT"); + glad_glColorPointerEXT = (PFNGLCOLORPOINTEREXTPROC)load("glColorPointerEXT"); + glad_glDrawArraysEXT = (PFNGLDRAWARRAYSEXTPROC)load("glDrawArraysEXT"); + glad_glEdgeFlagPointerEXT = (PFNGLEDGEFLAGPOINTEREXTPROC)load("glEdgeFlagPointerEXT"); + glad_glGetPointervEXT = (PFNGLGETPOINTERVEXTPROC)load("glGetPointervEXT"); + glad_glIndexPointerEXT = (PFNGLINDEXPOINTEREXTPROC)load("glIndexPointerEXT"); + glad_glNormalPointerEXT = (PFNGLNORMALPOINTEREXTPROC)load("glNormalPointerEXT"); + glad_glTexCoordPointerEXT = (PFNGLTEXCOORDPOINTEREXTPROC)load("glTexCoordPointerEXT"); + glad_glVertexPointerEXT = (PFNGLVERTEXPOINTEREXTPROC)load("glVertexPointerEXT"); +} +static void load_GL_EXT_vertex_attrib_64bit(GLADloadproc load) { + if(!GLAD_GL_EXT_vertex_attrib_64bit) return; + glad_glVertexAttribL1dEXT = (PFNGLVERTEXATTRIBL1DEXTPROC)load("glVertexAttribL1dEXT"); + glad_glVertexAttribL2dEXT = (PFNGLVERTEXATTRIBL2DEXTPROC)load("glVertexAttribL2dEXT"); + glad_glVertexAttribL3dEXT = (PFNGLVERTEXATTRIBL3DEXTPROC)load("glVertexAttribL3dEXT"); + glad_glVertexAttribL4dEXT = (PFNGLVERTEXATTRIBL4DEXTPROC)load("glVertexAttribL4dEXT"); + glad_glVertexAttribL1dvEXT = (PFNGLVERTEXATTRIBL1DVEXTPROC)load("glVertexAttribL1dvEXT"); + glad_glVertexAttribL2dvEXT = (PFNGLVERTEXATTRIBL2DVEXTPROC)load("glVertexAttribL2dvEXT"); + glad_glVertexAttribL3dvEXT = (PFNGLVERTEXATTRIBL3DVEXTPROC)load("glVertexAttribL3dvEXT"); + glad_glVertexAttribL4dvEXT = (PFNGLVERTEXATTRIBL4DVEXTPROC)load("glVertexAttribL4dvEXT"); + glad_glVertexAttribLPointerEXT = (PFNGLVERTEXATTRIBLPOINTEREXTPROC)load("glVertexAttribLPointerEXT"); + glad_glGetVertexAttribLdvEXT = (PFNGLGETVERTEXATTRIBLDVEXTPROC)load("glGetVertexAttribLdvEXT"); +} +static void load_GL_EXT_vertex_shader(GLADloadproc load) { + if(!GLAD_GL_EXT_vertex_shader) return; + glad_glBeginVertexShaderEXT = (PFNGLBEGINVERTEXSHADEREXTPROC)load("glBeginVertexShaderEXT"); + glad_glEndVertexShaderEXT = (PFNGLENDVERTEXSHADEREXTPROC)load("glEndVertexShaderEXT"); + glad_glBindVertexShaderEXT = (PFNGLBINDVERTEXSHADEREXTPROC)load("glBindVertexShaderEXT"); + glad_glGenVertexShadersEXT = (PFNGLGENVERTEXSHADERSEXTPROC)load("glGenVertexShadersEXT"); + glad_glDeleteVertexShaderEXT = (PFNGLDELETEVERTEXSHADEREXTPROC)load("glDeleteVertexShaderEXT"); + glad_glShaderOp1EXT = (PFNGLSHADEROP1EXTPROC)load("glShaderOp1EXT"); + glad_glShaderOp2EXT = (PFNGLSHADEROP2EXTPROC)load("glShaderOp2EXT"); + glad_glShaderOp3EXT = (PFNGLSHADEROP3EXTPROC)load("glShaderOp3EXT"); + glad_glSwizzleEXT = (PFNGLSWIZZLEEXTPROC)load("glSwizzleEXT"); + glad_glWriteMaskEXT = (PFNGLWRITEMASKEXTPROC)load("glWriteMaskEXT"); + glad_glInsertComponentEXT = (PFNGLINSERTCOMPONENTEXTPROC)load("glInsertComponentEXT"); + glad_glExtractComponentEXT = (PFNGLEXTRACTCOMPONENTEXTPROC)load("glExtractComponentEXT"); + glad_glGenSymbolsEXT = (PFNGLGENSYMBOLSEXTPROC)load("glGenSymbolsEXT"); + glad_glSetInvariantEXT = (PFNGLSETINVARIANTEXTPROC)load("glSetInvariantEXT"); + glad_glSetLocalConstantEXT = (PFNGLSETLOCALCONSTANTEXTPROC)load("glSetLocalConstantEXT"); + glad_glVariantbvEXT = (PFNGLVARIANTBVEXTPROC)load("glVariantbvEXT"); + glad_glVariantsvEXT = (PFNGLVARIANTSVEXTPROC)load("glVariantsvEXT"); + glad_glVariantivEXT = (PFNGLVARIANTIVEXTPROC)load("glVariantivEXT"); + glad_glVariantfvEXT = (PFNGLVARIANTFVEXTPROC)load("glVariantfvEXT"); + glad_glVariantdvEXT = (PFNGLVARIANTDVEXTPROC)load("glVariantdvEXT"); + glad_glVariantubvEXT = (PFNGLVARIANTUBVEXTPROC)load("glVariantubvEXT"); + glad_glVariantusvEXT = (PFNGLVARIANTUSVEXTPROC)load("glVariantusvEXT"); + glad_glVariantuivEXT = (PFNGLVARIANTUIVEXTPROC)load("glVariantuivEXT"); + glad_glVariantPointerEXT = (PFNGLVARIANTPOINTEREXTPROC)load("glVariantPointerEXT"); + glad_glEnableVariantClientStateEXT = (PFNGLENABLEVARIANTCLIENTSTATEEXTPROC)load("glEnableVariantClientStateEXT"); + glad_glDisableVariantClientStateEXT = (PFNGLDISABLEVARIANTCLIENTSTATEEXTPROC)load("glDisableVariantClientStateEXT"); + glad_glBindLightParameterEXT = (PFNGLBINDLIGHTPARAMETEREXTPROC)load("glBindLightParameterEXT"); + glad_glBindMaterialParameterEXT = (PFNGLBINDMATERIALPARAMETEREXTPROC)load("glBindMaterialParameterEXT"); + glad_glBindTexGenParameterEXT = (PFNGLBINDTEXGENPARAMETEREXTPROC)load("glBindTexGenParameterEXT"); + glad_glBindTextureUnitParameterEXT = (PFNGLBINDTEXTUREUNITPARAMETEREXTPROC)load("glBindTextureUnitParameterEXT"); + glad_glBindParameterEXT = (PFNGLBINDPARAMETEREXTPROC)load("glBindParameterEXT"); + glad_glIsVariantEnabledEXT = (PFNGLISVARIANTENABLEDEXTPROC)load("glIsVariantEnabledEXT"); + glad_glGetVariantBooleanvEXT = (PFNGLGETVARIANTBOOLEANVEXTPROC)load("glGetVariantBooleanvEXT"); + glad_glGetVariantIntegervEXT = (PFNGLGETVARIANTINTEGERVEXTPROC)load("glGetVariantIntegervEXT"); + glad_glGetVariantFloatvEXT = (PFNGLGETVARIANTFLOATVEXTPROC)load("glGetVariantFloatvEXT"); + glad_glGetVariantPointervEXT = (PFNGLGETVARIANTPOINTERVEXTPROC)load("glGetVariantPointervEXT"); + glad_glGetInvariantBooleanvEXT = (PFNGLGETINVARIANTBOOLEANVEXTPROC)load("glGetInvariantBooleanvEXT"); + glad_glGetInvariantIntegervEXT = (PFNGLGETINVARIANTINTEGERVEXTPROC)load("glGetInvariantIntegervEXT"); + glad_glGetInvariantFloatvEXT = (PFNGLGETINVARIANTFLOATVEXTPROC)load("glGetInvariantFloatvEXT"); + glad_glGetLocalConstantBooleanvEXT = (PFNGLGETLOCALCONSTANTBOOLEANVEXTPROC)load("glGetLocalConstantBooleanvEXT"); + glad_glGetLocalConstantIntegervEXT = (PFNGLGETLOCALCONSTANTINTEGERVEXTPROC)load("glGetLocalConstantIntegervEXT"); + glad_glGetLocalConstantFloatvEXT = (PFNGLGETLOCALCONSTANTFLOATVEXTPROC)load("glGetLocalConstantFloatvEXT"); +} +static void load_GL_EXT_vertex_weighting(GLADloadproc load) { + if(!GLAD_GL_EXT_vertex_weighting) return; + glad_glVertexWeightfEXT = (PFNGLVERTEXWEIGHTFEXTPROC)load("glVertexWeightfEXT"); + glad_glVertexWeightfvEXT = (PFNGLVERTEXWEIGHTFVEXTPROC)load("glVertexWeightfvEXT"); + glad_glVertexWeightPointerEXT = (PFNGLVERTEXWEIGHTPOINTEREXTPROC)load("glVertexWeightPointerEXT"); +} +static void load_GL_EXT_win32_keyed_mutex(GLADloadproc load) { + if(!GLAD_GL_EXT_win32_keyed_mutex) return; + glad_glAcquireKeyedMutexWin32EXT = (PFNGLACQUIREKEYEDMUTEXWIN32EXTPROC)load("glAcquireKeyedMutexWin32EXT"); + glad_glReleaseKeyedMutexWin32EXT = (PFNGLRELEASEKEYEDMUTEXWIN32EXTPROC)load("glReleaseKeyedMutexWin32EXT"); +} +static void load_GL_EXT_window_rectangles(GLADloadproc load) { + if(!GLAD_GL_EXT_window_rectangles) return; + glad_glWindowRectanglesEXT = (PFNGLWINDOWRECTANGLESEXTPROC)load("glWindowRectanglesEXT"); +} +static void load_GL_EXT_x11_sync_object(GLADloadproc load) { + if(!GLAD_GL_EXT_x11_sync_object) return; + glad_glImportSyncEXT = (PFNGLIMPORTSYNCEXTPROC)load("glImportSyncEXT"); +} +static void load_GL_GREMEDY_frame_terminator(GLADloadproc load) { + if(!GLAD_GL_GREMEDY_frame_terminator) return; + glad_glFrameTerminatorGREMEDY = (PFNGLFRAMETERMINATORGREMEDYPROC)load("glFrameTerminatorGREMEDY"); +} +static void load_GL_GREMEDY_string_marker(GLADloadproc load) { + if(!GLAD_GL_GREMEDY_string_marker) return; + glad_glStringMarkerGREMEDY = (PFNGLSTRINGMARKERGREMEDYPROC)load("glStringMarkerGREMEDY"); +} +static void load_GL_HP_image_transform(GLADloadproc load) { + if(!GLAD_GL_HP_image_transform) return; + glad_glImageTransformParameteriHP = (PFNGLIMAGETRANSFORMPARAMETERIHPPROC)load("glImageTransformParameteriHP"); + glad_glImageTransformParameterfHP = (PFNGLIMAGETRANSFORMPARAMETERFHPPROC)load("glImageTransformParameterfHP"); + glad_glImageTransformParameterivHP = (PFNGLIMAGETRANSFORMPARAMETERIVHPPROC)load("glImageTransformParameterivHP"); + glad_glImageTransformParameterfvHP = (PFNGLIMAGETRANSFORMPARAMETERFVHPPROC)load("glImageTransformParameterfvHP"); + glad_glGetImageTransformParameterivHP = (PFNGLGETIMAGETRANSFORMPARAMETERIVHPPROC)load("glGetImageTransformParameterivHP"); + glad_glGetImageTransformParameterfvHP = (PFNGLGETIMAGETRANSFORMPARAMETERFVHPPROC)load("glGetImageTransformParameterfvHP"); +} +static void load_GL_IBM_multimode_draw_arrays(GLADloadproc load) { + if(!GLAD_GL_IBM_multimode_draw_arrays) return; + glad_glMultiModeDrawArraysIBM = (PFNGLMULTIMODEDRAWARRAYSIBMPROC)load("glMultiModeDrawArraysIBM"); + glad_glMultiModeDrawElementsIBM = (PFNGLMULTIMODEDRAWELEMENTSIBMPROC)load("glMultiModeDrawElementsIBM"); +} +static void load_GL_IBM_static_data(GLADloadproc load) { + if(!GLAD_GL_IBM_static_data) return; + glad_glFlushStaticDataIBM = (PFNGLFLUSHSTATICDATAIBMPROC)load("glFlushStaticDataIBM"); +} +static void load_GL_IBM_vertex_array_lists(GLADloadproc load) { + if(!GLAD_GL_IBM_vertex_array_lists) return; + glad_glColorPointerListIBM = (PFNGLCOLORPOINTERLISTIBMPROC)load("glColorPointerListIBM"); + glad_glSecondaryColorPointerListIBM = (PFNGLSECONDARYCOLORPOINTERLISTIBMPROC)load("glSecondaryColorPointerListIBM"); + glad_glEdgeFlagPointerListIBM = (PFNGLEDGEFLAGPOINTERLISTIBMPROC)load("glEdgeFlagPointerListIBM"); + glad_glFogCoordPointerListIBM = (PFNGLFOGCOORDPOINTERLISTIBMPROC)load("glFogCoordPointerListIBM"); + glad_glIndexPointerListIBM = (PFNGLINDEXPOINTERLISTIBMPROC)load("glIndexPointerListIBM"); + glad_glNormalPointerListIBM = (PFNGLNORMALPOINTERLISTIBMPROC)load("glNormalPointerListIBM"); + glad_glTexCoordPointerListIBM = (PFNGLTEXCOORDPOINTERLISTIBMPROC)load("glTexCoordPointerListIBM"); + glad_glVertexPointerListIBM = (PFNGLVERTEXPOINTERLISTIBMPROC)load("glVertexPointerListIBM"); +} +static void load_GL_INGR_blend_func_separate(GLADloadproc load) { + if(!GLAD_GL_INGR_blend_func_separate) return; + glad_glBlendFuncSeparateINGR = (PFNGLBLENDFUNCSEPARATEINGRPROC)load("glBlendFuncSeparateINGR"); +} +static void load_GL_INTEL_framebuffer_CMAA(GLADloadproc load) { + if(!GLAD_GL_INTEL_framebuffer_CMAA) return; + glad_glApplyFramebufferAttachmentCMAAINTEL = (PFNGLAPPLYFRAMEBUFFERATTACHMENTCMAAINTELPROC)load("glApplyFramebufferAttachmentCMAAINTEL"); +} +static void load_GL_INTEL_map_texture(GLADloadproc load) { + if(!GLAD_GL_INTEL_map_texture) return; + glad_glSyncTextureINTEL = (PFNGLSYNCTEXTUREINTELPROC)load("glSyncTextureINTEL"); + glad_glUnmapTexture2DINTEL = (PFNGLUNMAPTEXTURE2DINTELPROC)load("glUnmapTexture2DINTEL"); + glad_glMapTexture2DINTEL = (PFNGLMAPTEXTURE2DINTELPROC)load("glMapTexture2DINTEL"); +} +static void load_GL_INTEL_parallel_arrays(GLADloadproc load) { + if(!GLAD_GL_INTEL_parallel_arrays) return; + glad_glVertexPointervINTEL = (PFNGLVERTEXPOINTERVINTELPROC)load("glVertexPointervINTEL"); + glad_glNormalPointervINTEL = (PFNGLNORMALPOINTERVINTELPROC)load("glNormalPointervINTEL"); + glad_glColorPointervINTEL = (PFNGLCOLORPOINTERVINTELPROC)load("glColorPointervINTEL"); + glad_glTexCoordPointervINTEL = (PFNGLTEXCOORDPOINTERVINTELPROC)load("glTexCoordPointervINTEL"); +} +static void load_GL_INTEL_performance_query(GLADloadproc load) { + if(!GLAD_GL_INTEL_performance_query) return; + glad_glBeginPerfQueryINTEL = (PFNGLBEGINPERFQUERYINTELPROC)load("glBeginPerfQueryINTEL"); + glad_glCreatePerfQueryINTEL = (PFNGLCREATEPERFQUERYINTELPROC)load("glCreatePerfQueryINTEL"); + glad_glDeletePerfQueryINTEL = (PFNGLDELETEPERFQUERYINTELPROC)load("glDeletePerfQueryINTEL"); + glad_glEndPerfQueryINTEL = (PFNGLENDPERFQUERYINTELPROC)load("glEndPerfQueryINTEL"); + glad_glGetFirstPerfQueryIdINTEL = (PFNGLGETFIRSTPERFQUERYIDINTELPROC)load("glGetFirstPerfQueryIdINTEL"); + glad_glGetNextPerfQueryIdINTEL = (PFNGLGETNEXTPERFQUERYIDINTELPROC)load("glGetNextPerfQueryIdINTEL"); + glad_glGetPerfCounterInfoINTEL = (PFNGLGETPERFCOUNTERINFOINTELPROC)load("glGetPerfCounterInfoINTEL"); + glad_glGetPerfQueryDataINTEL = (PFNGLGETPERFQUERYDATAINTELPROC)load("glGetPerfQueryDataINTEL"); + glad_glGetPerfQueryIdByNameINTEL = (PFNGLGETPERFQUERYIDBYNAMEINTELPROC)load("glGetPerfQueryIdByNameINTEL"); + glad_glGetPerfQueryInfoINTEL = (PFNGLGETPERFQUERYINFOINTELPROC)load("glGetPerfQueryInfoINTEL"); +} +static void load_GL_KHR_blend_equation_advanced(GLADloadproc load) { + if(!GLAD_GL_KHR_blend_equation_advanced) return; + glad_glBlendBarrierKHR = (PFNGLBLENDBARRIERKHRPROC)load("glBlendBarrierKHR"); +} +static void load_GL_KHR_debug(GLADloadproc load) { + if(!GLAD_GL_KHR_debug) return; + glad_glDebugMessageControl = (PFNGLDEBUGMESSAGECONTROLPROC)load("glDebugMessageControl"); + glad_glDebugMessageInsert = (PFNGLDEBUGMESSAGEINSERTPROC)load("glDebugMessageInsert"); + glad_glDebugMessageCallback = (PFNGLDEBUGMESSAGECALLBACKPROC)load("glDebugMessageCallback"); + glad_glGetDebugMessageLog = (PFNGLGETDEBUGMESSAGELOGPROC)load("glGetDebugMessageLog"); + glad_glPushDebugGroup = (PFNGLPUSHDEBUGGROUPPROC)load("glPushDebugGroup"); + glad_glPopDebugGroup = (PFNGLPOPDEBUGGROUPPROC)load("glPopDebugGroup"); + glad_glObjectLabel = (PFNGLOBJECTLABELPROC)load("glObjectLabel"); + glad_glGetObjectLabel = (PFNGLGETOBJECTLABELPROC)load("glGetObjectLabel"); + glad_glObjectPtrLabel = (PFNGLOBJECTPTRLABELPROC)load("glObjectPtrLabel"); + glad_glGetObjectPtrLabel = (PFNGLGETOBJECTPTRLABELPROC)load("glGetObjectPtrLabel"); + glad_glGetPointerv = (PFNGLGETPOINTERVPROC)load("glGetPointerv"); + glad_glDebugMessageControlKHR = (PFNGLDEBUGMESSAGECONTROLKHRPROC)load("glDebugMessageControlKHR"); + glad_glDebugMessageInsertKHR = (PFNGLDEBUGMESSAGEINSERTKHRPROC)load("glDebugMessageInsertKHR"); + glad_glDebugMessageCallbackKHR = (PFNGLDEBUGMESSAGECALLBACKKHRPROC)load("glDebugMessageCallbackKHR"); + glad_glGetDebugMessageLogKHR = (PFNGLGETDEBUGMESSAGELOGKHRPROC)load("glGetDebugMessageLogKHR"); + glad_glPushDebugGroupKHR = (PFNGLPUSHDEBUGGROUPKHRPROC)load("glPushDebugGroupKHR"); + glad_glPopDebugGroupKHR = (PFNGLPOPDEBUGGROUPKHRPROC)load("glPopDebugGroupKHR"); + glad_glObjectLabelKHR = (PFNGLOBJECTLABELKHRPROC)load("glObjectLabelKHR"); + glad_glGetObjectLabelKHR = (PFNGLGETOBJECTLABELKHRPROC)load("glGetObjectLabelKHR"); + glad_glObjectPtrLabelKHR = (PFNGLOBJECTPTRLABELKHRPROC)load("glObjectPtrLabelKHR"); + glad_glGetObjectPtrLabelKHR = (PFNGLGETOBJECTPTRLABELKHRPROC)load("glGetObjectPtrLabelKHR"); + glad_glGetPointervKHR = (PFNGLGETPOINTERVKHRPROC)load("glGetPointervKHR"); +} +static void load_GL_KHR_parallel_shader_compile(GLADloadproc load) { + if(!GLAD_GL_KHR_parallel_shader_compile) return; + glad_glMaxShaderCompilerThreadsKHR = (PFNGLMAXSHADERCOMPILERTHREADSKHRPROC)load("glMaxShaderCompilerThreadsKHR"); +} +static void load_GL_KHR_robustness(GLADloadproc load) { + if(!GLAD_GL_KHR_robustness) return; + glad_glGetGraphicsResetStatus = (PFNGLGETGRAPHICSRESETSTATUSPROC)load("glGetGraphicsResetStatus"); + glad_glReadnPixels = (PFNGLREADNPIXELSPROC)load("glReadnPixels"); + glad_glGetnUniformfv = (PFNGLGETNUNIFORMFVPROC)load("glGetnUniformfv"); + glad_glGetnUniformiv = (PFNGLGETNUNIFORMIVPROC)load("glGetnUniformiv"); + glad_glGetnUniformuiv = (PFNGLGETNUNIFORMUIVPROC)load("glGetnUniformuiv"); + glad_glGetGraphicsResetStatusKHR = (PFNGLGETGRAPHICSRESETSTATUSKHRPROC)load("glGetGraphicsResetStatusKHR"); + glad_glReadnPixelsKHR = (PFNGLREADNPIXELSKHRPROC)load("glReadnPixelsKHR"); + glad_glGetnUniformfvKHR = (PFNGLGETNUNIFORMFVKHRPROC)load("glGetnUniformfvKHR"); + glad_glGetnUniformivKHR = (PFNGLGETNUNIFORMIVKHRPROC)load("glGetnUniformivKHR"); + glad_glGetnUniformuivKHR = (PFNGLGETNUNIFORMUIVKHRPROC)load("glGetnUniformuivKHR"); +} +static void load_GL_MESA_framebuffer_flip_y(GLADloadproc load) { + if(!GLAD_GL_MESA_framebuffer_flip_y) return; + glad_glFramebufferParameteriMESA = (PFNGLFRAMEBUFFERPARAMETERIMESAPROC)load("glFramebufferParameteriMESA"); + glad_glGetFramebufferParameterivMESA = (PFNGLGETFRAMEBUFFERPARAMETERIVMESAPROC)load("glGetFramebufferParameterivMESA"); +} +static void load_GL_MESA_resize_buffers(GLADloadproc load) { + if(!GLAD_GL_MESA_resize_buffers) return; + glad_glResizeBuffersMESA = (PFNGLRESIZEBUFFERSMESAPROC)load("glResizeBuffersMESA"); +} +static void load_GL_MESA_window_pos(GLADloadproc load) { + if(!GLAD_GL_MESA_window_pos) return; + glad_glWindowPos2dMESA = (PFNGLWINDOWPOS2DMESAPROC)load("glWindowPos2dMESA"); + glad_glWindowPos2dvMESA = (PFNGLWINDOWPOS2DVMESAPROC)load("glWindowPos2dvMESA"); + glad_glWindowPos2fMESA = (PFNGLWINDOWPOS2FMESAPROC)load("glWindowPos2fMESA"); + glad_glWindowPos2fvMESA = (PFNGLWINDOWPOS2FVMESAPROC)load("glWindowPos2fvMESA"); + glad_glWindowPos2iMESA = (PFNGLWINDOWPOS2IMESAPROC)load("glWindowPos2iMESA"); + glad_glWindowPos2ivMESA = (PFNGLWINDOWPOS2IVMESAPROC)load("glWindowPos2ivMESA"); + glad_glWindowPos2sMESA = (PFNGLWINDOWPOS2SMESAPROC)load("glWindowPos2sMESA"); + glad_glWindowPos2svMESA = (PFNGLWINDOWPOS2SVMESAPROC)load("glWindowPos2svMESA"); + glad_glWindowPos3dMESA = (PFNGLWINDOWPOS3DMESAPROC)load("glWindowPos3dMESA"); + glad_glWindowPos3dvMESA = (PFNGLWINDOWPOS3DVMESAPROC)load("glWindowPos3dvMESA"); + glad_glWindowPos3fMESA = (PFNGLWINDOWPOS3FMESAPROC)load("glWindowPos3fMESA"); + glad_glWindowPos3fvMESA = (PFNGLWINDOWPOS3FVMESAPROC)load("glWindowPos3fvMESA"); + glad_glWindowPos3iMESA = (PFNGLWINDOWPOS3IMESAPROC)load("glWindowPos3iMESA"); + glad_glWindowPos3ivMESA = (PFNGLWINDOWPOS3IVMESAPROC)load("glWindowPos3ivMESA"); + glad_glWindowPos3sMESA = (PFNGLWINDOWPOS3SMESAPROC)load("glWindowPos3sMESA"); + glad_glWindowPos3svMESA = (PFNGLWINDOWPOS3SVMESAPROC)load("glWindowPos3svMESA"); + glad_glWindowPos4dMESA = (PFNGLWINDOWPOS4DMESAPROC)load("glWindowPos4dMESA"); + glad_glWindowPos4dvMESA = (PFNGLWINDOWPOS4DVMESAPROC)load("glWindowPos4dvMESA"); + glad_glWindowPos4fMESA = (PFNGLWINDOWPOS4FMESAPROC)load("glWindowPos4fMESA"); + glad_glWindowPos4fvMESA = (PFNGLWINDOWPOS4FVMESAPROC)load("glWindowPos4fvMESA"); + glad_glWindowPos4iMESA = (PFNGLWINDOWPOS4IMESAPROC)load("glWindowPos4iMESA"); + glad_glWindowPos4ivMESA = (PFNGLWINDOWPOS4IVMESAPROC)load("glWindowPos4ivMESA"); + glad_glWindowPos4sMESA = (PFNGLWINDOWPOS4SMESAPROC)load("glWindowPos4sMESA"); + glad_glWindowPos4svMESA = (PFNGLWINDOWPOS4SVMESAPROC)load("glWindowPos4svMESA"); +} +static void load_GL_NVX_conditional_render(GLADloadproc load) { + if(!GLAD_GL_NVX_conditional_render) return; + glad_glBeginConditionalRenderNVX = (PFNGLBEGINCONDITIONALRENDERNVXPROC)load("glBeginConditionalRenderNVX"); + glad_glEndConditionalRenderNVX = (PFNGLENDCONDITIONALRENDERNVXPROC)load("glEndConditionalRenderNVX"); +} +static void load_GL_NVX_gpu_multicast2(GLADloadproc load) { + if(!GLAD_GL_NVX_gpu_multicast2) return; + glad_glUploadGpuMaskNVX = (PFNGLUPLOADGPUMASKNVXPROC)load("glUploadGpuMaskNVX"); + glad_glMulticastViewportArrayvNVX = (PFNGLMULTICASTVIEWPORTARRAYVNVXPROC)load("glMulticastViewportArrayvNVX"); + glad_glMulticastViewportPositionWScaleNVX = (PFNGLMULTICASTVIEWPORTPOSITIONWSCALENVXPROC)load("glMulticastViewportPositionWScaleNVX"); + glad_glMulticastScissorArrayvNVX = (PFNGLMULTICASTSCISSORARRAYVNVXPROC)load("glMulticastScissorArrayvNVX"); + glad_glAsyncCopyBufferSubDataNVX = (PFNGLASYNCCOPYBUFFERSUBDATANVXPROC)load("glAsyncCopyBufferSubDataNVX"); + glad_glAsyncCopyImageSubDataNVX = (PFNGLASYNCCOPYIMAGESUBDATANVXPROC)load("glAsyncCopyImageSubDataNVX"); +} +static void load_GL_NVX_linked_gpu_multicast(GLADloadproc load) { + if(!GLAD_GL_NVX_linked_gpu_multicast) return; + glad_glLGPUNamedBufferSubDataNVX = (PFNGLLGPUNAMEDBUFFERSUBDATANVXPROC)load("glLGPUNamedBufferSubDataNVX"); + glad_glLGPUCopyImageSubDataNVX = (PFNGLLGPUCOPYIMAGESUBDATANVXPROC)load("glLGPUCopyImageSubDataNVX"); + glad_glLGPUInterlockNVX = (PFNGLLGPUINTERLOCKNVXPROC)load("glLGPUInterlockNVX"); +} +static void load_GL_NVX_progress_fence(GLADloadproc load) { + if(!GLAD_GL_NVX_progress_fence) return; + glad_glCreateProgressFenceNVX = (PFNGLCREATEPROGRESSFENCENVXPROC)load("glCreateProgressFenceNVX"); + glad_glSignalSemaphoreui64NVX = (PFNGLSIGNALSEMAPHOREUI64NVXPROC)load("glSignalSemaphoreui64NVX"); + glad_glWaitSemaphoreui64NVX = (PFNGLWAITSEMAPHOREUI64NVXPROC)load("glWaitSemaphoreui64NVX"); + glad_glClientWaitSemaphoreui64NVX = (PFNGLCLIENTWAITSEMAPHOREUI64NVXPROC)load("glClientWaitSemaphoreui64NVX"); +} +static void load_GL_NV_alpha_to_coverage_dither_control(GLADloadproc load) { + if(!GLAD_GL_NV_alpha_to_coverage_dither_control) return; + glad_glAlphaToCoverageDitherControlNV = (PFNGLALPHATOCOVERAGEDITHERCONTROLNVPROC)load("glAlphaToCoverageDitherControlNV"); +} +static void load_GL_NV_bindless_multi_draw_indirect(GLADloadproc load) { + if(!GLAD_GL_NV_bindless_multi_draw_indirect) return; + glad_glMultiDrawArraysIndirectBindlessNV = (PFNGLMULTIDRAWARRAYSINDIRECTBINDLESSNVPROC)load("glMultiDrawArraysIndirectBindlessNV"); + glad_glMultiDrawElementsIndirectBindlessNV = (PFNGLMULTIDRAWELEMENTSINDIRECTBINDLESSNVPROC)load("glMultiDrawElementsIndirectBindlessNV"); +} +static void load_GL_NV_bindless_multi_draw_indirect_count(GLADloadproc load) { + if(!GLAD_GL_NV_bindless_multi_draw_indirect_count) return; + glad_glMultiDrawArraysIndirectBindlessCountNV = (PFNGLMULTIDRAWARRAYSINDIRECTBINDLESSCOUNTNVPROC)load("glMultiDrawArraysIndirectBindlessCountNV"); + glad_glMultiDrawElementsIndirectBindlessCountNV = (PFNGLMULTIDRAWELEMENTSINDIRECTBINDLESSCOUNTNVPROC)load("glMultiDrawElementsIndirectBindlessCountNV"); +} +static void load_GL_NV_bindless_texture(GLADloadproc load) { + if(!GLAD_GL_NV_bindless_texture) return; + glad_glGetTextureHandleNV = (PFNGLGETTEXTUREHANDLENVPROC)load("glGetTextureHandleNV"); + glad_glGetTextureSamplerHandleNV = (PFNGLGETTEXTURESAMPLERHANDLENVPROC)load("glGetTextureSamplerHandleNV"); + glad_glMakeTextureHandleResidentNV = (PFNGLMAKETEXTUREHANDLERESIDENTNVPROC)load("glMakeTextureHandleResidentNV"); + glad_glMakeTextureHandleNonResidentNV = (PFNGLMAKETEXTUREHANDLENONRESIDENTNVPROC)load("glMakeTextureHandleNonResidentNV"); + glad_glGetImageHandleNV = (PFNGLGETIMAGEHANDLENVPROC)load("glGetImageHandleNV"); + glad_glMakeImageHandleResidentNV = (PFNGLMAKEIMAGEHANDLERESIDENTNVPROC)load("glMakeImageHandleResidentNV"); + glad_glMakeImageHandleNonResidentNV = (PFNGLMAKEIMAGEHANDLENONRESIDENTNVPROC)load("glMakeImageHandleNonResidentNV"); + glad_glUniformHandleui64NV = (PFNGLUNIFORMHANDLEUI64NVPROC)load("glUniformHandleui64NV"); + glad_glUniformHandleui64vNV = (PFNGLUNIFORMHANDLEUI64VNVPROC)load("glUniformHandleui64vNV"); + glad_glProgramUniformHandleui64NV = (PFNGLPROGRAMUNIFORMHANDLEUI64NVPROC)load("glProgramUniformHandleui64NV"); + glad_glProgramUniformHandleui64vNV = (PFNGLPROGRAMUNIFORMHANDLEUI64VNVPROC)load("glProgramUniformHandleui64vNV"); + glad_glIsTextureHandleResidentNV = (PFNGLISTEXTUREHANDLERESIDENTNVPROC)load("glIsTextureHandleResidentNV"); + glad_glIsImageHandleResidentNV = (PFNGLISIMAGEHANDLERESIDENTNVPROC)load("glIsImageHandleResidentNV"); +} +static void load_GL_NV_blend_equation_advanced(GLADloadproc load) { + if(!GLAD_GL_NV_blend_equation_advanced) return; + glad_glBlendParameteriNV = (PFNGLBLENDPARAMETERINVPROC)load("glBlendParameteriNV"); + glad_glBlendBarrierNV = (PFNGLBLENDBARRIERNVPROC)load("glBlendBarrierNV"); +} +static void load_GL_NV_clip_space_w_scaling(GLADloadproc load) { + if(!GLAD_GL_NV_clip_space_w_scaling) return; + glad_glViewportPositionWScaleNV = (PFNGLVIEWPORTPOSITIONWSCALENVPROC)load("glViewportPositionWScaleNV"); +} +static void load_GL_NV_command_list(GLADloadproc load) { + if(!GLAD_GL_NV_command_list) return; + glad_glCreateStatesNV = (PFNGLCREATESTATESNVPROC)load("glCreateStatesNV"); + glad_glDeleteStatesNV = (PFNGLDELETESTATESNVPROC)load("glDeleteStatesNV"); + glad_glIsStateNV = (PFNGLISSTATENVPROC)load("glIsStateNV"); + glad_glStateCaptureNV = (PFNGLSTATECAPTURENVPROC)load("glStateCaptureNV"); + glad_glGetCommandHeaderNV = (PFNGLGETCOMMANDHEADERNVPROC)load("glGetCommandHeaderNV"); + glad_glGetStageIndexNV = (PFNGLGETSTAGEINDEXNVPROC)load("glGetStageIndexNV"); + glad_glDrawCommandsNV = (PFNGLDRAWCOMMANDSNVPROC)load("glDrawCommandsNV"); + glad_glDrawCommandsAddressNV = (PFNGLDRAWCOMMANDSADDRESSNVPROC)load("glDrawCommandsAddressNV"); + glad_glDrawCommandsStatesNV = (PFNGLDRAWCOMMANDSSTATESNVPROC)load("glDrawCommandsStatesNV"); + glad_glDrawCommandsStatesAddressNV = (PFNGLDRAWCOMMANDSSTATESADDRESSNVPROC)load("glDrawCommandsStatesAddressNV"); + glad_glCreateCommandListsNV = (PFNGLCREATECOMMANDLISTSNVPROC)load("glCreateCommandListsNV"); + glad_glDeleteCommandListsNV = (PFNGLDELETECOMMANDLISTSNVPROC)load("glDeleteCommandListsNV"); + glad_glIsCommandListNV = (PFNGLISCOMMANDLISTNVPROC)load("glIsCommandListNV"); + glad_glListDrawCommandsStatesClientNV = (PFNGLLISTDRAWCOMMANDSSTATESCLIENTNVPROC)load("glListDrawCommandsStatesClientNV"); + glad_glCommandListSegmentsNV = (PFNGLCOMMANDLISTSEGMENTSNVPROC)load("glCommandListSegmentsNV"); + glad_glCompileCommandListNV = (PFNGLCOMPILECOMMANDLISTNVPROC)load("glCompileCommandListNV"); + glad_glCallCommandListNV = (PFNGLCALLCOMMANDLISTNVPROC)load("glCallCommandListNV"); +} +static void load_GL_NV_conditional_render(GLADloadproc load) { + if(!GLAD_GL_NV_conditional_render) return; + glad_glBeginConditionalRenderNV = (PFNGLBEGINCONDITIONALRENDERNVPROC)load("glBeginConditionalRenderNV"); + glad_glEndConditionalRenderNV = (PFNGLENDCONDITIONALRENDERNVPROC)load("glEndConditionalRenderNV"); +} +static void load_GL_NV_conservative_raster(GLADloadproc load) { + if(!GLAD_GL_NV_conservative_raster) return; + glad_glSubpixelPrecisionBiasNV = (PFNGLSUBPIXELPRECISIONBIASNVPROC)load("glSubpixelPrecisionBiasNV"); +} +static void load_GL_NV_conservative_raster_dilate(GLADloadproc load) { + if(!GLAD_GL_NV_conservative_raster_dilate) return; + glad_glConservativeRasterParameterfNV = (PFNGLCONSERVATIVERASTERPARAMETERFNVPROC)load("glConservativeRasterParameterfNV"); +} +static void load_GL_NV_conservative_raster_pre_snap_triangles(GLADloadproc load) { + if(!GLAD_GL_NV_conservative_raster_pre_snap_triangles) return; + glad_glConservativeRasterParameteriNV = (PFNGLCONSERVATIVERASTERPARAMETERINVPROC)load("glConservativeRasterParameteriNV"); +} +static void load_GL_NV_copy_image(GLADloadproc load) { + if(!GLAD_GL_NV_copy_image) return; + glad_glCopyImageSubDataNV = (PFNGLCOPYIMAGESUBDATANVPROC)load("glCopyImageSubDataNV"); +} +static void load_GL_NV_depth_buffer_float(GLADloadproc load) { + if(!GLAD_GL_NV_depth_buffer_float) return; + glad_glDepthRangedNV = (PFNGLDEPTHRANGEDNVPROC)load("glDepthRangedNV"); + glad_glClearDepthdNV = (PFNGLCLEARDEPTHDNVPROC)load("glClearDepthdNV"); + glad_glDepthBoundsdNV = (PFNGLDEPTHBOUNDSDNVPROC)load("glDepthBoundsdNV"); +} +static void load_GL_NV_draw_texture(GLADloadproc load) { + if(!GLAD_GL_NV_draw_texture) return; + glad_glDrawTextureNV = (PFNGLDRAWTEXTURENVPROC)load("glDrawTextureNV"); +} +static void load_GL_NV_draw_vulkan_image(GLADloadproc load) { + if(!GLAD_GL_NV_draw_vulkan_image) return; + glad_glDrawVkImageNV = (PFNGLDRAWVKIMAGENVPROC)load("glDrawVkImageNV"); + glad_glGetVkProcAddrNV = (PFNGLGETVKPROCADDRNVPROC)load("glGetVkProcAddrNV"); + glad_glWaitVkSemaphoreNV = (PFNGLWAITVKSEMAPHORENVPROC)load("glWaitVkSemaphoreNV"); + glad_glSignalVkSemaphoreNV = (PFNGLSIGNALVKSEMAPHORENVPROC)load("glSignalVkSemaphoreNV"); + glad_glSignalVkFenceNV = (PFNGLSIGNALVKFENCENVPROC)load("glSignalVkFenceNV"); +} +static void load_GL_NV_evaluators(GLADloadproc load) { + if(!GLAD_GL_NV_evaluators) return; + glad_glMapControlPointsNV = (PFNGLMAPCONTROLPOINTSNVPROC)load("glMapControlPointsNV"); + glad_glMapParameterivNV = (PFNGLMAPPARAMETERIVNVPROC)load("glMapParameterivNV"); + glad_glMapParameterfvNV = (PFNGLMAPPARAMETERFVNVPROC)load("glMapParameterfvNV"); + glad_glGetMapControlPointsNV = (PFNGLGETMAPCONTROLPOINTSNVPROC)load("glGetMapControlPointsNV"); + glad_glGetMapParameterivNV = (PFNGLGETMAPPARAMETERIVNVPROC)load("glGetMapParameterivNV"); + glad_glGetMapParameterfvNV = (PFNGLGETMAPPARAMETERFVNVPROC)load("glGetMapParameterfvNV"); + glad_glGetMapAttribParameterivNV = (PFNGLGETMAPATTRIBPARAMETERIVNVPROC)load("glGetMapAttribParameterivNV"); + glad_glGetMapAttribParameterfvNV = (PFNGLGETMAPATTRIBPARAMETERFVNVPROC)load("glGetMapAttribParameterfvNV"); + glad_glEvalMapsNV = (PFNGLEVALMAPSNVPROC)load("glEvalMapsNV"); +} +static void load_GL_NV_explicit_multisample(GLADloadproc load) { + if(!GLAD_GL_NV_explicit_multisample) return; + glad_glGetMultisamplefvNV = (PFNGLGETMULTISAMPLEFVNVPROC)load("glGetMultisamplefvNV"); + glad_glSampleMaskIndexedNV = (PFNGLSAMPLEMASKINDEXEDNVPROC)load("glSampleMaskIndexedNV"); + glad_glTexRenderbufferNV = (PFNGLTEXRENDERBUFFERNVPROC)load("glTexRenderbufferNV"); +} +static void load_GL_NV_fence(GLADloadproc load) { + if(!GLAD_GL_NV_fence) return; + glad_glDeleteFencesNV = (PFNGLDELETEFENCESNVPROC)load("glDeleteFencesNV"); + glad_glGenFencesNV = (PFNGLGENFENCESNVPROC)load("glGenFencesNV"); + glad_glIsFenceNV = (PFNGLISFENCENVPROC)load("glIsFenceNV"); + glad_glTestFenceNV = (PFNGLTESTFENCENVPROC)load("glTestFenceNV"); + glad_glGetFenceivNV = (PFNGLGETFENCEIVNVPROC)load("glGetFenceivNV"); + glad_glFinishFenceNV = (PFNGLFINISHFENCENVPROC)load("glFinishFenceNV"); + glad_glSetFenceNV = (PFNGLSETFENCENVPROC)load("glSetFenceNV"); +} +static void load_GL_NV_fragment_coverage_to_color(GLADloadproc load) { + if(!GLAD_GL_NV_fragment_coverage_to_color) return; + glad_glFragmentCoverageColorNV = (PFNGLFRAGMENTCOVERAGECOLORNVPROC)load("glFragmentCoverageColorNV"); +} +static void load_GL_NV_fragment_program(GLADloadproc load) { + if(!GLAD_GL_NV_fragment_program) return; + glad_glProgramNamedParameter4fNV = (PFNGLPROGRAMNAMEDPARAMETER4FNVPROC)load("glProgramNamedParameter4fNV"); + glad_glProgramNamedParameter4fvNV = (PFNGLPROGRAMNAMEDPARAMETER4FVNVPROC)load("glProgramNamedParameter4fvNV"); + glad_glProgramNamedParameter4dNV = (PFNGLPROGRAMNAMEDPARAMETER4DNVPROC)load("glProgramNamedParameter4dNV"); + glad_glProgramNamedParameter4dvNV = (PFNGLPROGRAMNAMEDPARAMETER4DVNVPROC)load("glProgramNamedParameter4dvNV"); + glad_glGetProgramNamedParameterfvNV = (PFNGLGETPROGRAMNAMEDPARAMETERFVNVPROC)load("glGetProgramNamedParameterfvNV"); + glad_glGetProgramNamedParameterdvNV = (PFNGLGETPROGRAMNAMEDPARAMETERDVNVPROC)load("glGetProgramNamedParameterdvNV"); +} +static void load_GL_NV_framebuffer_mixed_samples(GLADloadproc load) { + if(!GLAD_GL_NV_framebuffer_mixed_samples) return; + glad_glRasterSamplesEXT = (PFNGLRASTERSAMPLESEXTPROC)load("glRasterSamplesEXT"); + glad_glCoverageModulationTableNV = (PFNGLCOVERAGEMODULATIONTABLENVPROC)load("glCoverageModulationTableNV"); + glad_glGetCoverageModulationTableNV = (PFNGLGETCOVERAGEMODULATIONTABLENVPROC)load("glGetCoverageModulationTableNV"); + glad_glCoverageModulationNV = (PFNGLCOVERAGEMODULATIONNVPROC)load("glCoverageModulationNV"); +} +static void load_GL_NV_framebuffer_multisample_coverage(GLADloadproc load) { + if(!GLAD_GL_NV_framebuffer_multisample_coverage) return; + glad_glRenderbufferStorageMultisampleCoverageNV = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLECOVERAGENVPROC)load("glRenderbufferStorageMultisampleCoverageNV"); +} +static void load_GL_NV_geometry_program4(GLADloadproc load) { + if(!GLAD_GL_NV_geometry_program4) return; + glad_glProgramVertexLimitNV = (PFNGLPROGRAMVERTEXLIMITNVPROC)load("glProgramVertexLimitNV"); + glad_glFramebufferTextureEXT = (PFNGLFRAMEBUFFERTEXTUREEXTPROC)load("glFramebufferTextureEXT"); + glad_glFramebufferTextureLayerEXT = (PFNGLFRAMEBUFFERTEXTURELAYEREXTPROC)load("glFramebufferTextureLayerEXT"); + glad_glFramebufferTextureFaceEXT = (PFNGLFRAMEBUFFERTEXTUREFACEEXTPROC)load("glFramebufferTextureFaceEXT"); +} +static void load_GL_NV_gpu_multicast(GLADloadproc load) { + if(!GLAD_GL_NV_gpu_multicast) return; + glad_glRenderGpuMaskNV = (PFNGLRENDERGPUMASKNVPROC)load("glRenderGpuMaskNV"); + glad_glMulticastBufferSubDataNV = (PFNGLMULTICASTBUFFERSUBDATANVPROC)load("glMulticastBufferSubDataNV"); + glad_glMulticastCopyBufferSubDataNV = (PFNGLMULTICASTCOPYBUFFERSUBDATANVPROC)load("glMulticastCopyBufferSubDataNV"); + glad_glMulticastCopyImageSubDataNV = (PFNGLMULTICASTCOPYIMAGESUBDATANVPROC)load("glMulticastCopyImageSubDataNV"); + glad_glMulticastBlitFramebufferNV = (PFNGLMULTICASTBLITFRAMEBUFFERNVPROC)load("glMulticastBlitFramebufferNV"); + glad_glMulticastFramebufferSampleLocationsfvNV = (PFNGLMULTICASTFRAMEBUFFERSAMPLELOCATIONSFVNVPROC)load("glMulticastFramebufferSampleLocationsfvNV"); + glad_glMulticastBarrierNV = (PFNGLMULTICASTBARRIERNVPROC)load("glMulticastBarrierNV"); + glad_glMulticastWaitSyncNV = (PFNGLMULTICASTWAITSYNCNVPROC)load("glMulticastWaitSyncNV"); + glad_glMulticastGetQueryObjectivNV = (PFNGLMULTICASTGETQUERYOBJECTIVNVPROC)load("glMulticastGetQueryObjectivNV"); + glad_glMulticastGetQueryObjectuivNV = (PFNGLMULTICASTGETQUERYOBJECTUIVNVPROC)load("glMulticastGetQueryObjectuivNV"); + glad_glMulticastGetQueryObjecti64vNV = (PFNGLMULTICASTGETQUERYOBJECTI64VNVPROC)load("glMulticastGetQueryObjecti64vNV"); + glad_glMulticastGetQueryObjectui64vNV = (PFNGLMULTICASTGETQUERYOBJECTUI64VNVPROC)load("glMulticastGetQueryObjectui64vNV"); +} +static void load_GL_NV_gpu_program4(GLADloadproc load) { + if(!GLAD_GL_NV_gpu_program4) return; + glad_glProgramLocalParameterI4iNV = (PFNGLPROGRAMLOCALPARAMETERI4INVPROC)load("glProgramLocalParameterI4iNV"); + glad_glProgramLocalParameterI4ivNV = (PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC)load("glProgramLocalParameterI4ivNV"); + glad_glProgramLocalParametersI4ivNV = (PFNGLPROGRAMLOCALPARAMETERSI4IVNVPROC)load("glProgramLocalParametersI4ivNV"); + glad_glProgramLocalParameterI4uiNV = (PFNGLPROGRAMLOCALPARAMETERI4UINVPROC)load("glProgramLocalParameterI4uiNV"); + glad_glProgramLocalParameterI4uivNV = (PFNGLPROGRAMLOCALPARAMETERI4UIVNVPROC)load("glProgramLocalParameterI4uivNV"); + glad_glProgramLocalParametersI4uivNV = (PFNGLPROGRAMLOCALPARAMETERSI4UIVNVPROC)load("glProgramLocalParametersI4uivNV"); + glad_glProgramEnvParameterI4iNV = (PFNGLPROGRAMENVPARAMETERI4INVPROC)load("glProgramEnvParameterI4iNV"); + glad_glProgramEnvParameterI4ivNV = (PFNGLPROGRAMENVPARAMETERI4IVNVPROC)load("glProgramEnvParameterI4ivNV"); + glad_glProgramEnvParametersI4ivNV = (PFNGLPROGRAMENVPARAMETERSI4IVNVPROC)load("glProgramEnvParametersI4ivNV"); + glad_glProgramEnvParameterI4uiNV = (PFNGLPROGRAMENVPARAMETERI4UINVPROC)load("glProgramEnvParameterI4uiNV"); + glad_glProgramEnvParameterI4uivNV = (PFNGLPROGRAMENVPARAMETERI4UIVNVPROC)load("glProgramEnvParameterI4uivNV"); + glad_glProgramEnvParametersI4uivNV = (PFNGLPROGRAMENVPARAMETERSI4UIVNVPROC)load("glProgramEnvParametersI4uivNV"); + glad_glGetProgramLocalParameterIivNV = (PFNGLGETPROGRAMLOCALPARAMETERIIVNVPROC)load("glGetProgramLocalParameterIivNV"); + glad_glGetProgramLocalParameterIuivNV = (PFNGLGETPROGRAMLOCALPARAMETERIUIVNVPROC)load("glGetProgramLocalParameterIuivNV"); + glad_glGetProgramEnvParameterIivNV = (PFNGLGETPROGRAMENVPARAMETERIIVNVPROC)load("glGetProgramEnvParameterIivNV"); + glad_glGetProgramEnvParameterIuivNV = (PFNGLGETPROGRAMENVPARAMETERIUIVNVPROC)load("glGetProgramEnvParameterIuivNV"); +} +static void load_GL_NV_gpu_program5(GLADloadproc load) { + if(!GLAD_GL_NV_gpu_program5) return; + glad_glProgramSubroutineParametersuivNV = (PFNGLPROGRAMSUBROUTINEPARAMETERSUIVNVPROC)load("glProgramSubroutineParametersuivNV"); + glad_glGetProgramSubroutineParameteruivNV = (PFNGLGETPROGRAMSUBROUTINEPARAMETERUIVNVPROC)load("glGetProgramSubroutineParameteruivNV"); +} +static void load_GL_NV_gpu_shader5(GLADloadproc load) { + if(!GLAD_GL_NV_gpu_shader5) return; + glad_glUniform1i64NV = (PFNGLUNIFORM1I64NVPROC)load("glUniform1i64NV"); + glad_glUniform2i64NV = (PFNGLUNIFORM2I64NVPROC)load("glUniform2i64NV"); + glad_glUniform3i64NV = (PFNGLUNIFORM3I64NVPROC)load("glUniform3i64NV"); + glad_glUniform4i64NV = (PFNGLUNIFORM4I64NVPROC)load("glUniform4i64NV"); + glad_glUniform1i64vNV = (PFNGLUNIFORM1I64VNVPROC)load("glUniform1i64vNV"); + glad_glUniform2i64vNV = (PFNGLUNIFORM2I64VNVPROC)load("glUniform2i64vNV"); + glad_glUniform3i64vNV = (PFNGLUNIFORM3I64VNVPROC)load("glUniform3i64vNV"); + glad_glUniform4i64vNV = (PFNGLUNIFORM4I64VNVPROC)load("glUniform4i64vNV"); + glad_glUniform1ui64NV = (PFNGLUNIFORM1UI64NVPROC)load("glUniform1ui64NV"); + glad_glUniform2ui64NV = (PFNGLUNIFORM2UI64NVPROC)load("glUniform2ui64NV"); + glad_glUniform3ui64NV = (PFNGLUNIFORM3UI64NVPROC)load("glUniform3ui64NV"); + glad_glUniform4ui64NV = (PFNGLUNIFORM4UI64NVPROC)load("glUniform4ui64NV"); + glad_glUniform1ui64vNV = (PFNGLUNIFORM1UI64VNVPROC)load("glUniform1ui64vNV"); + glad_glUniform2ui64vNV = (PFNGLUNIFORM2UI64VNVPROC)load("glUniform2ui64vNV"); + glad_glUniform3ui64vNV = (PFNGLUNIFORM3UI64VNVPROC)load("glUniform3ui64vNV"); + glad_glUniform4ui64vNV = (PFNGLUNIFORM4UI64VNVPROC)load("glUniform4ui64vNV"); + glad_glGetUniformi64vNV = (PFNGLGETUNIFORMI64VNVPROC)load("glGetUniformi64vNV"); + glad_glProgramUniform1i64NV = (PFNGLPROGRAMUNIFORM1I64NVPROC)load("glProgramUniform1i64NV"); + glad_glProgramUniform2i64NV = (PFNGLPROGRAMUNIFORM2I64NVPROC)load("glProgramUniform2i64NV"); + glad_glProgramUniform3i64NV = (PFNGLPROGRAMUNIFORM3I64NVPROC)load("glProgramUniform3i64NV"); + glad_glProgramUniform4i64NV = (PFNGLPROGRAMUNIFORM4I64NVPROC)load("glProgramUniform4i64NV"); + glad_glProgramUniform1i64vNV = (PFNGLPROGRAMUNIFORM1I64VNVPROC)load("glProgramUniform1i64vNV"); + glad_glProgramUniform2i64vNV = (PFNGLPROGRAMUNIFORM2I64VNVPROC)load("glProgramUniform2i64vNV"); + glad_glProgramUniform3i64vNV = (PFNGLPROGRAMUNIFORM3I64VNVPROC)load("glProgramUniform3i64vNV"); + glad_glProgramUniform4i64vNV = (PFNGLPROGRAMUNIFORM4I64VNVPROC)load("glProgramUniform4i64vNV"); + glad_glProgramUniform1ui64NV = (PFNGLPROGRAMUNIFORM1UI64NVPROC)load("glProgramUniform1ui64NV"); + glad_glProgramUniform2ui64NV = (PFNGLPROGRAMUNIFORM2UI64NVPROC)load("glProgramUniform2ui64NV"); + glad_glProgramUniform3ui64NV = (PFNGLPROGRAMUNIFORM3UI64NVPROC)load("glProgramUniform3ui64NV"); + glad_glProgramUniform4ui64NV = (PFNGLPROGRAMUNIFORM4UI64NVPROC)load("glProgramUniform4ui64NV"); + glad_glProgramUniform1ui64vNV = (PFNGLPROGRAMUNIFORM1UI64VNVPROC)load("glProgramUniform1ui64vNV"); + glad_glProgramUniform2ui64vNV = (PFNGLPROGRAMUNIFORM2UI64VNVPROC)load("glProgramUniform2ui64vNV"); + glad_glProgramUniform3ui64vNV = (PFNGLPROGRAMUNIFORM3UI64VNVPROC)load("glProgramUniform3ui64vNV"); + glad_glProgramUniform4ui64vNV = (PFNGLPROGRAMUNIFORM4UI64VNVPROC)load("glProgramUniform4ui64vNV"); +} +static void load_GL_NV_half_float(GLADloadproc load) { + if(!GLAD_GL_NV_half_float) return; + glad_glVertex2hNV = (PFNGLVERTEX2HNVPROC)load("glVertex2hNV"); + glad_glVertex2hvNV = (PFNGLVERTEX2HVNVPROC)load("glVertex2hvNV"); + glad_glVertex3hNV = (PFNGLVERTEX3HNVPROC)load("glVertex3hNV"); + glad_glVertex3hvNV = (PFNGLVERTEX3HVNVPROC)load("glVertex3hvNV"); + glad_glVertex4hNV = (PFNGLVERTEX4HNVPROC)load("glVertex4hNV"); + glad_glVertex4hvNV = (PFNGLVERTEX4HVNVPROC)load("glVertex4hvNV"); + glad_glNormal3hNV = (PFNGLNORMAL3HNVPROC)load("glNormal3hNV"); + glad_glNormal3hvNV = (PFNGLNORMAL3HVNVPROC)load("glNormal3hvNV"); + glad_glColor3hNV = (PFNGLCOLOR3HNVPROC)load("glColor3hNV"); + glad_glColor3hvNV = (PFNGLCOLOR3HVNVPROC)load("glColor3hvNV"); + glad_glColor4hNV = (PFNGLCOLOR4HNVPROC)load("glColor4hNV"); + glad_glColor4hvNV = (PFNGLCOLOR4HVNVPROC)load("glColor4hvNV"); + glad_glTexCoord1hNV = (PFNGLTEXCOORD1HNVPROC)load("glTexCoord1hNV"); + glad_glTexCoord1hvNV = (PFNGLTEXCOORD1HVNVPROC)load("glTexCoord1hvNV"); + glad_glTexCoord2hNV = (PFNGLTEXCOORD2HNVPROC)load("glTexCoord2hNV"); + glad_glTexCoord2hvNV = (PFNGLTEXCOORD2HVNVPROC)load("glTexCoord2hvNV"); + glad_glTexCoord3hNV = (PFNGLTEXCOORD3HNVPROC)load("glTexCoord3hNV"); + glad_glTexCoord3hvNV = (PFNGLTEXCOORD3HVNVPROC)load("glTexCoord3hvNV"); + glad_glTexCoord4hNV = (PFNGLTEXCOORD4HNVPROC)load("glTexCoord4hNV"); + glad_glTexCoord4hvNV = (PFNGLTEXCOORD4HVNVPROC)load("glTexCoord4hvNV"); + glad_glMultiTexCoord1hNV = (PFNGLMULTITEXCOORD1HNVPROC)load("glMultiTexCoord1hNV"); + glad_glMultiTexCoord1hvNV = (PFNGLMULTITEXCOORD1HVNVPROC)load("glMultiTexCoord1hvNV"); + glad_glMultiTexCoord2hNV = (PFNGLMULTITEXCOORD2HNVPROC)load("glMultiTexCoord2hNV"); + glad_glMultiTexCoord2hvNV = (PFNGLMULTITEXCOORD2HVNVPROC)load("glMultiTexCoord2hvNV"); + glad_glMultiTexCoord3hNV = (PFNGLMULTITEXCOORD3HNVPROC)load("glMultiTexCoord3hNV"); + glad_glMultiTexCoord3hvNV = (PFNGLMULTITEXCOORD3HVNVPROC)load("glMultiTexCoord3hvNV"); + glad_glMultiTexCoord4hNV = (PFNGLMULTITEXCOORD4HNVPROC)load("glMultiTexCoord4hNV"); + glad_glMultiTexCoord4hvNV = (PFNGLMULTITEXCOORD4HVNVPROC)load("glMultiTexCoord4hvNV"); + glad_glVertexAttrib1hNV = (PFNGLVERTEXATTRIB1HNVPROC)load("glVertexAttrib1hNV"); + glad_glVertexAttrib1hvNV = (PFNGLVERTEXATTRIB1HVNVPROC)load("glVertexAttrib1hvNV"); + glad_glVertexAttrib2hNV = (PFNGLVERTEXATTRIB2HNVPROC)load("glVertexAttrib2hNV"); + glad_glVertexAttrib2hvNV = (PFNGLVERTEXATTRIB2HVNVPROC)load("glVertexAttrib2hvNV"); + glad_glVertexAttrib3hNV = (PFNGLVERTEXATTRIB3HNVPROC)load("glVertexAttrib3hNV"); + glad_glVertexAttrib3hvNV = (PFNGLVERTEXATTRIB3HVNVPROC)load("glVertexAttrib3hvNV"); + glad_glVertexAttrib4hNV = (PFNGLVERTEXATTRIB4HNVPROC)load("glVertexAttrib4hNV"); + glad_glVertexAttrib4hvNV = (PFNGLVERTEXATTRIB4HVNVPROC)load("glVertexAttrib4hvNV"); + glad_glVertexAttribs1hvNV = (PFNGLVERTEXATTRIBS1HVNVPROC)load("glVertexAttribs1hvNV"); + glad_glVertexAttribs2hvNV = (PFNGLVERTEXATTRIBS2HVNVPROC)load("glVertexAttribs2hvNV"); + glad_glVertexAttribs3hvNV = (PFNGLVERTEXATTRIBS3HVNVPROC)load("glVertexAttribs3hvNV"); + glad_glVertexAttribs4hvNV = (PFNGLVERTEXATTRIBS4HVNVPROC)load("glVertexAttribs4hvNV"); + glad_glFogCoordhNV = (PFNGLFOGCOORDHNVPROC)load("glFogCoordhNV"); + glad_glFogCoordhvNV = (PFNGLFOGCOORDHVNVPROC)load("glFogCoordhvNV"); + glad_glSecondaryColor3hNV = (PFNGLSECONDARYCOLOR3HNVPROC)load("glSecondaryColor3hNV"); + glad_glSecondaryColor3hvNV = (PFNGLSECONDARYCOLOR3HVNVPROC)load("glSecondaryColor3hvNV"); + glad_glVertexWeighthNV = (PFNGLVERTEXWEIGHTHNVPROC)load("glVertexWeighthNV"); + glad_glVertexWeighthvNV = (PFNGLVERTEXWEIGHTHVNVPROC)load("glVertexWeighthvNV"); +} +static void load_GL_NV_internalformat_sample_query(GLADloadproc load) { + if(!GLAD_GL_NV_internalformat_sample_query) return; + glad_glGetInternalformatSampleivNV = (PFNGLGETINTERNALFORMATSAMPLEIVNVPROC)load("glGetInternalformatSampleivNV"); +} +static void load_GL_NV_memory_attachment(GLADloadproc load) { + if(!GLAD_GL_NV_memory_attachment) return; + glad_glGetMemoryObjectDetachedResourcesuivNV = (PFNGLGETMEMORYOBJECTDETACHEDRESOURCESUIVNVPROC)load("glGetMemoryObjectDetachedResourcesuivNV"); + glad_glResetMemoryObjectParameterNV = (PFNGLRESETMEMORYOBJECTPARAMETERNVPROC)load("glResetMemoryObjectParameterNV"); + glad_glTexAttachMemoryNV = (PFNGLTEXATTACHMEMORYNVPROC)load("glTexAttachMemoryNV"); + glad_glBufferAttachMemoryNV = (PFNGLBUFFERATTACHMEMORYNVPROC)load("glBufferAttachMemoryNV"); + glad_glTextureAttachMemoryNV = (PFNGLTEXTUREATTACHMEMORYNVPROC)load("glTextureAttachMemoryNV"); + glad_glNamedBufferAttachMemoryNV = (PFNGLNAMEDBUFFERATTACHMEMORYNVPROC)load("glNamedBufferAttachMemoryNV"); +} +static void load_GL_NV_memory_object_sparse(GLADloadproc load) { + if(!GLAD_GL_NV_memory_object_sparse) return; + glad_glBufferPageCommitmentMemNV = (PFNGLBUFFERPAGECOMMITMENTMEMNVPROC)load("glBufferPageCommitmentMemNV"); + glad_glTexPageCommitmentMemNV = (PFNGLTEXPAGECOMMITMENTMEMNVPROC)load("glTexPageCommitmentMemNV"); + glad_glNamedBufferPageCommitmentMemNV = (PFNGLNAMEDBUFFERPAGECOMMITMENTMEMNVPROC)load("glNamedBufferPageCommitmentMemNV"); + glad_glTexturePageCommitmentMemNV = (PFNGLTEXTUREPAGECOMMITMENTMEMNVPROC)load("glTexturePageCommitmentMemNV"); +} +static void load_GL_NV_mesh_shader(GLADloadproc load) { + if(!GLAD_GL_NV_mesh_shader) return; + glad_glDrawMeshTasksNV = (PFNGLDRAWMESHTASKSNVPROC)load("glDrawMeshTasksNV"); + glad_glDrawMeshTasksIndirectNV = (PFNGLDRAWMESHTASKSINDIRECTNVPROC)load("glDrawMeshTasksIndirectNV"); + glad_glMultiDrawMeshTasksIndirectNV = (PFNGLMULTIDRAWMESHTASKSINDIRECTNVPROC)load("glMultiDrawMeshTasksIndirectNV"); + glad_glMultiDrawMeshTasksIndirectCountNV = (PFNGLMULTIDRAWMESHTASKSINDIRECTCOUNTNVPROC)load("glMultiDrawMeshTasksIndirectCountNV"); +} +static void load_GL_NV_occlusion_query(GLADloadproc load) { + if(!GLAD_GL_NV_occlusion_query) return; + glad_glGenOcclusionQueriesNV = (PFNGLGENOCCLUSIONQUERIESNVPROC)load("glGenOcclusionQueriesNV"); + glad_glDeleteOcclusionQueriesNV = (PFNGLDELETEOCCLUSIONQUERIESNVPROC)load("glDeleteOcclusionQueriesNV"); + glad_glIsOcclusionQueryNV = (PFNGLISOCCLUSIONQUERYNVPROC)load("glIsOcclusionQueryNV"); + glad_glBeginOcclusionQueryNV = (PFNGLBEGINOCCLUSIONQUERYNVPROC)load("glBeginOcclusionQueryNV"); + glad_glEndOcclusionQueryNV = (PFNGLENDOCCLUSIONQUERYNVPROC)load("glEndOcclusionQueryNV"); + glad_glGetOcclusionQueryivNV = (PFNGLGETOCCLUSIONQUERYIVNVPROC)load("glGetOcclusionQueryivNV"); + glad_glGetOcclusionQueryuivNV = (PFNGLGETOCCLUSIONQUERYUIVNVPROC)load("glGetOcclusionQueryuivNV"); +} +static void load_GL_NV_parameter_buffer_object(GLADloadproc load) { + if(!GLAD_GL_NV_parameter_buffer_object) return; + glad_glProgramBufferParametersfvNV = (PFNGLPROGRAMBUFFERPARAMETERSFVNVPROC)load("glProgramBufferParametersfvNV"); + glad_glProgramBufferParametersIivNV = (PFNGLPROGRAMBUFFERPARAMETERSIIVNVPROC)load("glProgramBufferParametersIivNV"); + glad_glProgramBufferParametersIuivNV = (PFNGLPROGRAMBUFFERPARAMETERSIUIVNVPROC)load("glProgramBufferParametersIuivNV"); +} +static void load_GL_NV_path_rendering(GLADloadproc load) { + if(!GLAD_GL_NV_path_rendering) return; + glad_glGenPathsNV = (PFNGLGENPATHSNVPROC)load("glGenPathsNV"); + glad_glDeletePathsNV = (PFNGLDELETEPATHSNVPROC)load("glDeletePathsNV"); + glad_glIsPathNV = (PFNGLISPATHNVPROC)load("glIsPathNV"); + glad_glPathCommandsNV = (PFNGLPATHCOMMANDSNVPROC)load("glPathCommandsNV"); + glad_glPathCoordsNV = (PFNGLPATHCOORDSNVPROC)load("glPathCoordsNV"); + glad_glPathSubCommandsNV = (PFNGLPATHSUBCOMMANDSNVPROC)load("glPathSubCommandsNV"); + glad_glPathSubCoordsNV = (PFNGLPATHSUBCOORDSNVPROC)load("glPathSubCoordsNV"); + glad_glPathStringNV = (PFNGLPATHSTRINGNVPROC)load("glPathStringNV"); + glad_glPathGlyphsNV = (PFNGLPATHGLYPHSNVPROC)load("glPathGlyphsNV"); + glad_glPathGlyphRangeNV = (PFNGLPATHGLYPHRANGENVPROC)load("glPathGlyphRangeNV"); + glad_glWeightPathsNV = (PFNGLWEIGHTPATHSNVPROC)load("glWeightPathsNV"); + glad_glCopyPathNV = (PFNGLCOPYPATHNVPROC)load("glCopyPathNV"); + glad_glInterpolatePathsNV = (PFNGLINTERPOLATEPATHSNVPROC)load("glInterpolatePathsNV"); + glad_glTransformPathNV = (PFNGLTRANSFORMPATHNVPROC)load("glTransformPathNV"); + glad_glPathParameterivNV = (PFNGLPATHPARAMETERIVNVPROC)load("glPathParameterivNV"); + glad_glPathParameteriNV = (PFNGLPATHPARAMETERINVPROC)load("glPathParameteriNV"); + glad_glPathParameterfvNV = (PFNGLPATHPARAMETERFVNVPROC)load("glPathParameterfvNV"); + glad_glPathParameterfNV = (PFNGLPATHPARAMETERFNVPROC)load("glPathParameterfNV"); + glad_glPathDashArrayNV = (PFNGLPATHDASHARRAYNVPROC)load("glPathDashArrayNV"); + glad_glPathStencilFuncNV = (PFNGLPATHSTENCILFUNCNVPROC)load("glPathStencilFuncNV"); + glad_glPathStencilDepthOffsetNV = (PFNGLPATHSTENCILDEPTHOFFSETNVPROC)load("glPathStencilDepthOffsetNV"); + glad_glStencilFillPathNV = (PFNGLSTENCILFILLPATHNVPROC)load("glStencilFillPathNV"); + glad_glStencilStrokePathNV = (PFNGLSTENCILSTROKEPATHNVPROC)load("glStencilStrokePathNV"); + glad_glStencilFillPathInstancedNV = (PFNGLSTENCILFILLPATHINSTANCEDNVPROC)load("glStencilFillPathInstancedNV"); + glad_glStencilStrokePathInstancedNV = (PFNGLSTENCILSTROKEPATHINSTANCEDNVPROC)load("glStencilStrokePathInstancedNV"); + glad_glPathCoverDepthFuncNV = (PFNGLPATHCOVERDEPTHFUNCNVPROC)load("glPathCoverDepthFuncNV"); + glad_glCoverFillPathNV = (PFNGLCOVERFILLPATHNVPROC)load("glCoverFillPathNV"); + glad_glCoverStrokePathNV = (PFNGLCOVERSTROKEPATHNVPROC)load("glCoverStrokePathNV"); + glad_glCoverFillPathInstancedNV = (PFNGLCOVERFILLPATHINSTANCEDNVPROC)load("glCoverFillPathInstancedNV"); + glad_glCoverStrokePathInstancedNV = (PFNGLCOVERSTROKEPATHINSTANCEDNVPROC)load("glCoverStrokePathInstancedNV"); + glad_glGetPathParameterivNV = (PFNGLGETPATHPARAMETERIVNVPROC)load("glGetPathParameterivNV"); + glad_glGetPathParameterfvNV = (PFNGLGETPATHPARAMETERFVNVPROC)load("glGetPathParameterfvNV"); + glad_glGetPathCommandsNV = (PFNGLGETPATHCOMMANDSNVPROC)load("glGetPathCommandsNV"); + glad_glGetPathCoordsNV = (PFNGLGETPATHCOORDSNVPROC)load("glGetPathCoordsNV"); + glad_glGetPathDashArrayNV = (PFNGLGETPATHDASHARRAYNVPROC)load("glGetPathDashArrayNV"); + glad_glGetPathMetricsNV = (PFNGLGETPATHMETRICSNVPROC)load("glGetPathMetricsNV"); + glad_glGetPathMetricRangeNV = (PFNGLGETPATHMETRICRANGENVPROC)load("glGetPathMetricRangeNV"); + glad_glGetPathSpacingNV = (PFNGLGETPATHSPACINGNVPROC)load("glGetPathSpacingNV"); + glad_glIsPointInFillPathNV = (PFNGLISPOINTINFILLPATHNVPROC)load("glIsPointInFillPathNV"); + glad_glIsPointInStrokePathNV = (PFNGLISPOINTINSTROKEPATHNVPROC)load("glIsPointInStrokePathNV"); + glad_glGetPathLengthNV = (PFNGLGETPATHLENGTHNVPROC)load("glGetPathLengthNV"); + glad_glPointAlongPathNV = (PFNGLPOINTALONGPATHNVPROC)load("glPointAlongPathNV"); + glad_glMatrixLoad3x2fNV = (PFNGLMATRIXLOAD3X2FNVPROC)load("glMatrixLoad3x2fNV"); + glad_glMatrixLoad3x3fNV = (PFNGLMATRIXLOAD3X3FNVPROC)load("glMatrixLoad3x3fNV"); + glad_glMatrixLoadTranspose3x3fNV = (PFNGLMATRIXLOADTRANSPOSE3X3FNVPROC)load("glMatrixLoadTranspose3x3fNV"); + glad_glMatrixMult3x2fNV = (PFNGLMATRIXMULT3X2FNVPROC)load("glMatrixMult3x2fNV"); + glad_glMatrixMult3x3fNV = (PFNGLMATRIXMULT3X3FNVPROC)load("glMatrixMult3x3fNV"); + glad_glMatrixMultTranspose3x3fNV = (PFNGLMATRIXMULTTRANSPOSE3X3FNVPROC)load("glMatrixMultTranspose3x3fNV"); + glad_glStencilThenCoverFillPathNV = (PFNGLSTENCILTHENCOVERFILLPATHNVPROC)load("glStencilThenCoverFillPathNV"); + glad_glStencilThenCoverStrokePathNV = (PFNGLSTENCILTHENCOVERSTROKEPATHNVPROC)load("glStencilThenCoverStrokePathNV"); + glad_glStencilThenCoverFillPathInstancedNV = (PFNGLSTENCILTHENCOVERFILLPATHINSTANCEDNVPROC)load("glStencilThenCoverFillPathInstancedNV"); + glad_glStencilThenCoverStrokePathInstancedNV = (PFNGLSTENCILTHENCOVERSTROKEPATHINSTANCEDNVPROC)load("glStencilThenCoverStrokePathInstancedNV"); + glad_glPathGlyphIndexRangeNV = (PFNGLPATHGLYPHINDEXRANGENVPROC)load("glPathGlyphIndexRangeNV"); + glad_glPathGlyphIndexArrayNV = (PFNGLPATHGLYPHINDEXARRAYNVPROC)load("glPathGlyphIndexArrayNV"); + glad_glPathMemoryGlyphIndexArrayNV = (PFNGLPATHMEMORYGLYPHINDEXARRAYNVPROC)load("glPathMemoryGlyphIndexArrayNV"); + glad_glProgramPathFragmentInputGenNV = (PFNGLPROGRAMPATHFRAGMENTINPUTGENNVPROC)load("glProgramPathFragmentInputGenNV"); + glad_glGetProgramResourcefvNV = (PFNGLGETPROGRAMRESOURCEFVNVPROC)load("glGetProgramResourcefvNV"); + glad_glPathColorGenNV = (PFNGLPATHCOLORGENNVPROC)load("glPathColorGenNV"); + glad_glPathTexGenNV = (PFNGLPATHTEXGENNVPROC)load("glPathTexGenNV"); + glad_glPathFogGenNV = (PFNGLPATHFOGGENNVPROC)load("glPathFogGenNV"); + glad_glGetPathColorGenivNV = (PFNGLGETPATHCOLORGENIVNVPROC)load("glGetPathColorGenivNV"); + glad_glGetPathColorGenfvNV = (PFNGLGETPATHCOLORGENFVNVPROC)load("glGetPathColorGenfvNV"); + glad_glGetPathTexGenivNV = (PFNGLGETPATHTEXGENIVNVPROC)load("glGetPathTexGenivNV"); + glad_glGetPathTexGenfvNV = (PFNGLGETPATHTEXGENFVNVPROC)load("glGetPathTexGenfvNV"); + glad_glMatrixFrustumEXT = (PFNGLMATRIXFRUSTUMEXTPROC)load("glMatrixFrustumEXT"); + glad_glMatrixLoadIdentityEXT = (PFNGLMATRIXLOADIDENTITYEXTPROC)load("glMatrixLoadIdentityEXT"); + glad_glMatrixLoadTransposefEXT = (PFNGLMATRIXLOADTRANSPOSEFEXTPROC)load("glMatrixLoadTransposefEXT"); + glad_glMatrixLoadTransposedEXT = (PFNGLMATRIXLOADTRANSPOSEDEXTPROC)load("glMatrixLoadTransposedEXT"); + glad_glMatrixLoadfEXT = (PFNGLMATRIXLOADFEXTPROC)load("glMatrixLoadfEXT"); + glad_glMatrixLoaddEXT = (PFNGLMATRIXLOADDEXTPROC)load("glMatrixLoaddEXT"); + glad_glMatrixMultTransposefEXT = (PFNGLMATRIXMULTTRANSPOSEFEXTPROC)load("glMatrixMultTransposefEXT"); + glad_glMatrixMultTransposedEXT = (PFNGLMATRIXMULTTRANSPOSEDEXTPROC)load("glMatrixMultTransposedEXT"); + glad_glMatrixMultfEXT = (PFNGLMATRIXMULTFEXTPROC)load("glMatrixMultfEXT"); + glad_glMatrixMultdEXT = (PFNGLMATRIXMULTDEXTPROC)load("glMatrixMultdEXT"); + glad_glMatrixOrthoEXT = (PFNGLMATRIXORTHOEXTPROC)load("glMatrixOrthoEXT"); + glad_glMatrixPopEXT = (PFNGLMATRIXPOPEXTPROC)load("glMatrixPopEXT"); + glad_glMatrixPushEXT = (PFNGLMATRIXPUSHEXTPROC)load("glMatrixPushEXT"); + glad_glMatrixRotatefEXT = (PFNGLMATRIXROTATEFEXTPROC)load("glMatrixRotatefEXT"); + glad_glMatrixRotatedEXT = (PFNGLMATRIXROTATEDEXTPROC)load("glMatrixRotatedEXT"); + glad_glMatrixScalefEXT = (PFNGLMATRIXSCALEFEXTPROC)load("glMatrixScalefEXT"); + glad_glMatrixScaledEXT = (PFNGLMATRIXSCALEDEXTPROC)load("glMatrixScaledEXT"); + glad_glMatrixTranslatefEXT = (PFNGLMATRIXTRANSLATEFEXTPROC)load("glMatrixTranslatefEXT"); + glad_glMatrixTranslatedEXT = (PFNGLMATRIXTRANSLATEDEXTPROC)load("glMatrixTranslatedEXT"); +} +static void load_GL_NV_pixel_data_range(GLADloadproc load) { + if(!GLAD_GL_NV_pixel_data_range) return; + glad_glPixelDataRangeNV = (PFNGLPIXELDATARANGENVPROC)load("glPixelDataRangeNV"); + glad_glFlushPixelDataRangeNV = (PFNGLFLUSHPIXELDATARANGENVPROC)load("glFlushPixelDataRangeNV"); +} +static void load_GL_NV_point_sprite(GLADloadproc load) { + if(!GLAD_GL_NV_point_sprite) return; + glad_glPointParameteriNV = (PFNGLPOINTPARAMETERINVPROC)load("glPointParameteriNV"); + glad_glPointParameterivNV = (PFNGLPOINTPARAMETERIVNVPROC)load("glPointParameterivNV"); +} +static void load_GL_NV_present_video(GLADloadproc load) { + if(!GLAD_GL_NV_present_video) return; + glad_glPresentFrameKeyedNV = (PFNGLPRESENTFRAMEKEYEDNVPROC)load("glPresentFrameKeyedNV"); + glad_glPresentFrameDualFillNV = (PFNGLPRESENTFRAMEDUALFILLNVPROC)load("glPresentFrameDualFillNV"); + glad_glGetVideoivNV = (PFNGLGETVIDEOIVNVPROC)load("glGetVideoivNV"); + glad_glGetVideouivNV = (PFNGLGETVIDEOUIVNVPROC)load("glGetVideouivNV"); + glad_glGetVideoi64vNV = (PFNGLGETVIDEOI64VNVPROC)load("glGetVideoi64vNV"); + glad_glGetVideoui64vNV = (PFNGLGETVIDEOUI64VNVPROC)load("glGetVideoui64vNV"); +} +static void load_GL_NV_primitive_restart(GLADloadproc load) { + if(!GLAD_GL_NV_primitive_restart) return; + glad_glPrimitiveRestartNV = (PFNGLPRIMITIVERESTARTNVPROC)load("glPrimitiveRestartNV"); + glad_glPrimitiveRestartIndexNV = (PFNGLPRIMITIVERESTARTINDEXNVPROC)load("glPrimitiveRestartIndexNV"); +} +static void load_GL_NV_query_resource(GLADloadproc load) { + if(!GLAD_GL_NV_query_resource) return; + glad_glQueryResourceNV = (PFNGLQUERYRESOURCENVPROC)load("glQueryResourceNV"); +} +static void load_GL_NV_query_resource_tag(GLADloadproc load) { + if(!GLAD_GL_NV_query_resource_tag) return; + glad_glGenQueryResourceTagNV = (PFNGLGENQUERYRESOURCETAGNVPROC)load("glGenQueryResourceTagNV"); + glad_glDeleteQueryResourceTagNV = (PFNGLDELETEQUERYRESOURCETAGNVPROC)load("glDeleteQueryResourceTagNV"); + glad_glQueryResourceTagNV = (PFNGLQUERYRESOURCETAGNVPROC)load("glQueryResourceTagNV"); +} +static void load_GL_NV_register_combiners(GLADloadproc load) { + if(!GLAD_GL_NV_register_combiners) return; + glad_glCombinerParameterfvNV = (PFNGLCOMBINERPARAMETERFVNVPROC)load("glCombinerParameterfvNV"); + glad_glCombinerParameterfNV = (PFNGLCOMBINERPARAMETERFNVPROC)load("glCombinerParameterfNV"); + glad_glCombinerParameterivNV = (PFNGLCOMBINERPARAMETERIVNVPROC)load("glCombinerParameterivNV"); + glad_glCombinerParameteriNV = (PFNGLCOMBINERPARAMETERINVPROC)load("glCombinerParameteriNV"); + glad_glCombinerInputNV = (PFNGLCOMBINERINPUTNVPROC)load("glCombinerInputNV"); + glad_glCombinerOutputNV = (PFNGLCOMBINEROUTPUTNVPROC)load("glCombinerOutputNV"); + glad_glFinalCombinerInputNV = (PFNGLFINALCOMBINERINPUTNVPROC)load("glFinalCombinerInputNV"); + glad_glGetCombinerInputParameterfvNV = (PFNGLGETCOMBINERINPUTPARAMETERFVNVPROC)load("glGetCombinerInputParameterfvNV"); + glad_glGetCombinerInputParameterivNV = (PFNGLGETCOMBINERINPUTPARAMETERIVNVPROC)load("glGetCombinerInputParameterivNV"); + glad_glGetCombinerOutputParameterfvNV = (PFNGLGETCOMBINEROUTPUTPARAMETERFVNVPROC)load("glGetCombinerOutputParameterfvNV"); + glad_glGetCombinerOutputParameterivNV = (PFNGLGETCOMBINEROUTPUTPARAMETERIVNVPROC)load("glGetCombinerOutputParameterivNV"); + glad_glGetFinalCombinerInputParameterfvNV = (PFNGLGETFINALCOMBINERINPUTPARAMETERFVNVPROC)load("glGetFinalCombinerInputParameterfvNV"); + glad_glGetFinalCombinerInputParameterivNV = (PFNGLGETFINALCOMBINERINPUTPARAMETERIVNVPROC)load("glGetFinalCombinerInputParameterivNV"); +} +static void load_GL_NV_register_combiners2(GLADloadproc load) { + if(!GLAD_GL_NV_register_combiners2) return; + glad_glCombinerStageParameterfvNV = (PFNGLCOMBINERSTAGEPARAMETERFVNVPROC)load("glCombinerStageParameterfvNV"); + glad_glGetCombinerStageParameterfvNV = (PFNGLGETCOMBINERSTAGEPARAMETERFVNVPROC)load("glGetCombinerStageParameterfvNV"); +} +static void load_GL_NV_sample_locations(GLADloadproc load) { + if(!GLAD_GL_NV_sample_locations) return; + glad_glFramebufferSampleLocationsfvNV = (PFNGLFRAMEBUFFERSAMPLELOCATIONSFVNVPROC)load("glFramebufferSampleLocationsfvNV"); + glad_glNamedFramebufferSampleLocationsfvNV = (PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVNVPROC)load("glNamedFramebufferSampleLocationsfvNV"); + glad_glResolveDepthValuesNV = (PFNGLRESOLVEDEPTHVALUESNVPROC)load("glResolveDepthValuesNV"); +} +static void load_GL_NV_scissor_exclusive(GLADloadproc load) { + if(!GLAD_GL_NV_scissor_exclusive) return; + glad_glScissorExclusiveNV = (PFNGLSCISSOREXCLUSIVENVPROC)load("glScissorExclusiveNV"); + glad_glScissorExclusiveArrayvNV = (PFNGLSCISSOREXCLUSIVEARRAYVNVPROC)load("glScissorExclusiveArrayvNV"); +} +static void load_GL_NV_shader_buffer_load(GLADloadproc load) { + if(!GLAD_GL_NV_shader_buffer_load) return; + glad_glMakeBufferResidentNV = (PFNGLMAKEBUFFERRESIDENTNVPROC)load("glMakeBufferResidentNV"); + glad_glMakeBufferNonResidentNV = (PFNGLMAKEBUFFERNONRESIDENTNVPROC)load("glMakeBufferNonResidentNV"); + glad_glIsBufferResidentNV = (PFNGLISBUFFERRESIDENTNVPROC)load("glIsBufferResidentNV"); + glad_glMakeNamedBufferResidentNV = (PFNGLMAKENAMEDBUFFERRESIDENTNVPROC)load("glMakeNamedBufferResidentNV"); + glad_glMakeNamedBufferNonResidentNV = (PFNGLMAKENAMEDBUFFERNONRESIDENTNVPROC)load("glMakeNamedBufferNonResidentNV"); + glad_glIsNamedBufferResidentNV = (PFNGLISNAMEDBUFFERRESIDENTNVPROC)load("glIsNamedBufferResidentNV"); + glad_glGetBufferParameterui64vNV = (PFNGLGETBUFFERPARAMETERUI64VNVPROC)load("glGetBufferParameterui64vNV"); + glad_glGetNamedBufferParameterui64vNV = (PFNGLGETNAMEDBUFFERPARAMETERUI64VNVPROC)load("glGetNamedBufferParameterui64vNV"); + glad_glGetIntegerui64vNV = (PFNGLGETINTEGERUI64VNVPROC)load("glGetIntegerui64vNV"); + glad_glUniformui64NV = (PFNGLUNIFORMUI64NVPROC)load("glUniformui64NV"); + glad_glUniformui64vNV = (PFNGLUNIFORMUI64VNVPROC)load("glUniformui64vNV"); + glad_glGetUniformui64vNV = (PFNGLGETUNIFORMUI64VNVPROC)load("glGetUniformui64vNV"); + glad_glProgramUniformui64NV = (PFNGLPROGRAMUNIFORMUI64NVPROC)load("glProgramUniformui64NV"); + glad_glProgramUniformui64vNV = (PFNGLPROGRAMUNIFORMUI64VNVPROC)load("glProgramUniformui64vNV"); +} +static void load_GL_NV_shading_rate_image(GLADloadproc load) { + if(!GLAD_GL_NV_shading_rate_image) return; + glad_glBindShadingRateImageNV = (PFNGLBINDSHADINGRATEIMAGENVPROC)load("glBindShadingRateImageNV"); + glad_glGetShadingRateImagePaletteNV = (PFNGLGETSHADINGRATEIMAGEPALETTENVPROC)load("glGetShadingRateImagePaletteNV"); + glad_glGetShadingRateSampleLocationivNV = (PFNGLGETSHADINGRATESAMPLELOCATIONIVNVPROC)load("glGetShadingRateSampleLocationivNV"); + glad_glShadingRateImageBarrierNV = (PFNGLSHADINGRATEIMAGEBARRIERNVPROC)load("glShadingRateImageBarrierNV"); + glad_glShadingRateImagePaletteNV = (PFNGLSHADINGRATEIMAGEPALETTENVPROC)load("glShadingRateImagePaletteNV"); + glad_glShadingRateSampleOrderNV = (PFNGLSHADINGRATESAMPLEORDERNVPROC)load("glShadingRateSampleOrderNV"); + glad_glShadingRateSampleOrderCustomNV = (PFNGLSHADINGRATESAMPLEORDERCUSTOMNVPROC)load("glShadingRateSampleOrderCustomNV"); +} +static void load_GL_NV_texture_barrier(GLADloadproc load) { + if(!GLAD_GL_NV_texture_barrier) return; + glad_glTextureBarrierNV = (PFNGLTEXTUREBARRIERNVPROC)load("glTextureBarrierNV"); +} +static void load_GL_NV_texture_multisample(GLADloadproc load) { + if(!GLAD_GL_NV_texture_multisample) return; + glad_glTexImage2DMultisampleCoverageNV = (PFNGLTEXIMAGE2DMULTISAMPLECOVERAGENVPROC)load("glTexImage2DMultisampleCoverageNV"); + glad_glTexImage3DMultisampleCoverageNV = (PFNGLTEXIMAGE3DMULTISAMPLECOVERAGENVPROC)load("glTexImage3DMultisampleCoverageNV"); + glad_glTextureImage2DMultisampleNV = (PFNGLTEXTUREIMAGE2DMULTISAMPLENVPROC)load("glTextureImage2DMultisampleNV"); + glad_glTextureImage3DMultisampleNV = (PFNGLTEXTUREIMAGE3DMULTISAMPLENVPROC)load("glTextureImage3DMultisampleNV"); + glad_glTextureImage2DMultisampleCoverageNV = (PFNGLTEXTUREIMAGE2DMULTISAMPLECOVERAGENVPROC)load("glTextureImage2DMultisampleCoverageNV"); + glad_glTextureImage3DMultisampleCoverageNV = (PFNGLTEXTUREIMAGE3DMULTISAMPLECOVERAGENVPROC)load("glTextureImage3DMultisampleCoverageNV"); +} +static void load_GL_NV_timeline_semaphore(GLADloadproc load) { + if(!GLAD_GL_NV_timeline_semaphore) return; + glad_glCreateSemaphoresNV = (PFNGLCREATESEMAPHORESNVPROC)load("glCreateSemaphoresNV"); + glad_glSemaphoreParameterivNV = (PFNGLSEMAPHOREPARAMETERIVNVPROC)load("glSemaphoreParameterivNV"); + glad_glGetSemaphoreParameterivNV = (PFNGLGETSEMAPHOREPARAMETERIVNVPROC)load("glGetSemaphoreParameterivNV"); +} +static void load_GL_NV_transform_feedback(GLADloadproc load) { + if(!GLAD_GL_NV_transform_feedback) return; + glad_glBeginTransformFeedbackNV = (PFNGLBEGINTRANSFORMFEEDBACKNVPROC)load("glBeginTransformFeedbackNV"); + glad_glEndTransformFeedbackNV = (PFNGLENDTRANSFORMFEEDBACKNVPROC)load("glEndTransformFeedbackNV"); + glad_glTransformFeedbackAttribsNV = (PFNGLTRANSFORMFEEDBACKATTRIBSNVPROC)load("glTransformFeedbackAttribsNV"); + glad_glBindBufferRangeNV = (PFNGLBINDBUFFERRANGENVPROC)load("glBindBufferRangeNV"); + glad_glBindBufferOffsetNV = (PFNGLBINDBUFFEROFFSETNVPROC)load("glBindBufferOffsetNV"); + glad_glBindBufferBaseNV = (PFNGLBINDBUFFERBASENVPROC)load("glBindBufferBaseNV"); + glad_glTransformFeedbackVaryingsNV = (PFNGLTRANSFORMFEEDBACKVARYINGSNVPROC)load("glTransformFeedbackVaryingsNV"); + glad_glActiveVaryingNV = (PFNGLACTIVEVARYINGNVPROC)load("glActiveVaryingNV"); + glad_glGetVaryingLocationNV = (PFNGLGETVARYINGLOCATIONNVPROC)load("glGetVaryingLocationNV"); + glad_glGetActiveVaryingNV = (PFNGLGETACTIVEVARYINGNVPROC)load("glGetActiveVaryingNV"); + glad_glGetTransformFeedbackVaryingNV = (PFNGLGETTRANSFORMFEEDBACKVARYINGNVPROC)load("glGetTransformFeedbackVaryingNV"); + glad_glTransformFeedbackStreamAttribsNV = (PFNGLTRANSFORMFEEDBACKSTREAMATTRIBSNVPROC)load("glTransformFeedbackStreamAttribsNV"); +} +static void load_GL_NV_transform_feedback2(GLADloadproc load) { + if(!GLAD_GL_NV_transform_feedback2) return; + glad_glBindTransformFeedbackNV = (PFNGLBINDTRANSFORMFEEDBACKNVPROC)load("glBindTransformFeedbackNV"); + glad_glDeleteTransformFeedbacksNV = (PFNGLDELETETRANSFORMFEEDBACKSNVPROC)load("glDeleteTransformFeedbacksNV"); + glad_glGenTransformFeedbacksNV = (PFNGLGENTRANSFORMFEEDBACKSNVPROC)load("glGenTransformFeedbacksNV"); + glad_glIsTransformFeedbackNV = (PFNGLISTRANSFORMFEEDBACKNVPROC)load("glIsTransformFeedbackNV"); + glad_glPauseTransformFeedbackNV = (PFNGLPAUSETRANSFORMFEEDBACKNVPROC)load("glPauseTransformFeedbackNV"); + glad_glResumeTransformFeedbackNV = (PFNGLRESUMETRANSFORMFEEDBACKNVPROC)load("glResumeTransformFeedbackNV"); + glad_glDrawTransformFeedbackNV = (PFNGLDRAWTRANSFORMFEEDBACKNVPROC)load("glDrawTransformFeedbackNV"); +} +static void load_GL_NV_vdpau_interop(GLADloadproc load) { + if(!GLAD_GL_NV_vdpau_interop) return; + glad_glVDPAUInitNV = (PFNGLVDPAUINITNVPROC)load("glVDPAUInitNV"); + glad_glVDPAUFiniNV = (PFNGLVDPAUFININVPROC)load("glVDPAUFiniNV"); + glad_glVDPAURegisterVideoSurfaceNV = (PFNGLVDPAUREGISTERVIDEOSURFACENVPROC)load("glVDPAURegisterVideoSurfaceNV"); + glad_glVDPAURegisterOutputSurfaceNV = (PFNGLVDPAUREGISTEROUTPUTSURFACENVPROC)load("glVDPAURegisterOutputSurfaceNV"); + glad_glVDPAUIsSurfaceNV = (PFNGLVDPAUISSURFACENVPROC)load("glVDPAUIsSurfaceNV"); + glad_glVDPAUUnregisterSurfaceNV = (PFNGLVDPAUUNREGISTERSURFACENVPROC)load("glVDPAUUnregisterSurfaceNV"); + glad_glVDPAUGetSurfaceivNV = (PFNGLVDPAUGETSURFACEIVNVPROC)load("glVDPAUGetSurfaceivNV"); + glad_glVDPAUSurfaceAccessNV = (PFNGLVDPAUSURFACEACCESSNVPROC)load("glVDPAUSurfaceAccessNV"); + glad_glVDPAUMapSurfacesNV = (PFNGLVDPAUMAPSURFACESNVPROC)load("glVDPAUMapSurfacesNV"); + glad_glVDPAUUnmapSurfacesNV = (PFNGLVDPAUUNMAPSURFACESNVPROC)load("glVDPAUUnmapSurfacesNV"); +} +static void load_GL_NV_vdpau_interop2(GLADloadproc load) { + if(!GLAD_GL_NV_vdpau_interop2) return; + glad_glVDPAURegisterVideoSurfaceWithPictureStructureNV = (PFNGLVDPAUREGISTERVIDEOSURFACEWITHPICTURESTRUCTURENVPROC)load("glVDPAURegisterVideoSurfaceWithPictureStructureNV"); +} +static void load_GL_NV_vertex_array_range(GLADloadproc load) { + if(!GLAD_GL_NV_vertex_array_range) return; + glad_glFlushVertexArrayRangeNV = (PFNGLFLUSHVERTEXARRAYRANGENVPROC)load("glFlushVertexArrayRangeNV"); + glad_glVertexArrayRangeNV = (PFNGLVERTEXARRAYRANGENVPROC)load("glVertexArrayRangeNV"); +} +static void load_GL_NV_vertex_attrib_integer_64bit(GLADloadproc load) { + if(!GLAD_GL_NV_vertex_attrib_integer_64bit) return; + glad_glVertexAttribL1i64NV = (PFNGLVERTEXATTRIBL1I64NVPROC)load("glVertexAttribL1i64NV"); + glad_glVertexAttribL2i64NV = (PFNGLVERTEXATTRIBL2I64NVPROC)load("glVertexAttribL2i64NV"); + glad_glVertexAttribL3i64NV = (PFNGLVERTEXATTRIBL3I64NVPROC)load("glVertexAttribL3i64NV"); + glad_glVertexAttribL4i64NV = (PFNGLVERTEXATTRIBL4I64NVPROC)load("glVertexAttribL4i64NV"); + glad_glVertexAttribL1i64vNV = (PFNGLVERTEXATTRIBL1I64VNVPROC)load("glVertexAttribL1i64vNV"); + glad_glVertexAttribL2i64vNV = (PFNGLVERTEXATTRIBL2I64VNVPROC)load("glVertexAttribL2i64vNV"); + glad_glVertexAttribL3i64vNV = (PFNGLVERTEXATTRIBL3I64VNVPROC)load("glVertexAttribL3i64vNV"); + glad_glVertexAttribL4i64vNV = (PFNGLVERTEXATTRIBL4I64VNVPROC)load("glVertexAttribL4i64vNV"); + glad_glVertexAttribL1ui64NV = (PFNGLVERTEXATTRIBL1UI64NVPROC)load("glVertexAttribL1ui64NV"); + glad_glVertexAttribL2ui64NV = (PFNGLVERTEXATTRIBL2UI64NVPROC)load("glVertexAttribL2ui64NV"); + glad_glVertexAttribL3ui64NV = (PFNGLVERTEXATTRIBL3UI64NVPROC)load("glVertexAttribL3ui64NV"); + glad_glVertexAttribL4ui64NV = (PFNGLVERTEXATTRIBL4UI64NVPROC)load("glVertexAttribL4ui64NV"); + glad_glVertexAttribL1ui64vNV = (PFNGLVERTEXATTRIBL1UI64VNVPROC)load("glVertexAttribL1ui64vNV"); + glad_glVertexAttribL2ui64vNV = (PFNGLVERTEXATTRIBL2UI64VNVPROC)load("glVertexAttribL2ui64vNV"); + glad_glVertexAttribL3ui64vNV = (PFNGLVERTEXATTRIBL3UI64VNVPROC)load("glVertexAttribL3ui64vNV"); + glad_glVertexAttribL4ui64vNV = (PFNGLVERTEXATTRIBL4UI64VNVPROC)load("glVertexAttribL4ui64vNV"); + glad_glGetVertexAttribLi64vNV = (PFNGLGETVERTEXATTRIBLI64VNVPROC)load("glGetVertexAttribLi64vNV"); + glad_glGetVertexAttribLui64vNV = (PFNGLGETVERTEXATTRIBLUI64VNVPROC)load("glGetVertexAttribLui64vNV"); + glad_glVertexAttribLFormatNV = (PFNGLVERTEXATTRIBLFORMATNVPROC)load("glVertexAttribLFormatNV"); +} +static void load_GL_NV_vertex_buffer_unified_memory(GLADloadproc load) { + if(!GLAD_GL_NV_vertex_buffer_unified_memory) return; + glad_glBufferAddressRangeNV = (PFNGLBUFFERADDRESSRANGENVPROC)load("glBufferAddressRangeNV"); + glad_glVertexFormatNV = (PFNGLVERTEXFORMATNVPROC)load("glVertexFormatNV"); + glad_glNormalFormatNV = (PFNGLNORMALFORMATNVPROC)load("glNormalFormatNV"); + glad_glColorFormatNV = (PFNGLCOLORFORMATNVPROC)load("glColorFormatNV"); + glad_glIndexFormatNV = (PFNGLINDEXFORMATNVPROC)load("glIndexFormatNV"); + glad_glTexCoordFormatNV = (PFNGLTEXCOORDFORMATNVPROC)load("glTexCoordFormatNV"); + glad_glEdgeFlagFormatNV = (PFNGLEDGEFLAGFORMATNVPROC)load("glEdgeFlagFormatNV"); + glad_glSecondaryColorFormatNV = (PFNGLSECONDARYCOLORFORMATNVPROC)load("glSecondaryColorFormatNV"); + glad_glFogCoordFormatNV = (PFNGLFOGCOORDFORMATNVPROC)load("glFogCoordFormatNV"); + glad_glVertexAttribFormatNV = (PFNGLVERTEXATTRIBFORMATNVPROC)load("glVertexAttribFormatNV"); + glad_glVertexAttribIFormatNV = (PFNGLVERTEXATTRIBIFORMATNVPROC)load("glVertexAttribIFormatNV"); + glad_glGetIntegerui64i_vNV = (PFNGLGETINTEGERUI64I_VNVPROC)load("glGetIntegerui64i_vNV"); +} +static void load_GL_NV_vertex_program(GLADloadproc load) { + if(!GLAD_GL_NV_vertex_program) return; + glad_glAreProgramsResidentNV = (PFNGLAREPROGRAMSRESIDENTNVPROC)load("glAreProgramsResidentNV"); + glad_glBindProgramNV = (PFNGLBINDPROGRAMNVPROC)load("glBindProgramNV"); + glad_glDeleteProgramsNV = (PFNGLDELETEPROGRAMSNVPROC)load("glDeleteProgramsNV"); + glad_glExecuteProgramNV = (PFNGLEXECUTEPROGRAMNVPROC)load("glExecuteProgramNV"); + glad_glGenProgramsNV = (PFNGLGENPROGRAMSNVPROC)load("glGenProgramsNV"); + glad_glGetProgramParameterdvNV = (PFNGLGETPROGRAMPARAMETERDVNVPROC)load("glGetProgramParameterdvNV"); + glad_glGetProgramParameterfvNV = (PFNGLGETPROGRAMPARAMETERFVNVPROC)load("glGetProgramParameterfvNV"); + glad_glGetProgramivNV = (PFNGLGETPROGRAMIVNVPROC)load("glGetProgramivNV"); + glad_glGetProgramStringNV = (PFNGLGETPROGRAMSTRINGNVPROC)load("glGetProgramStringNV"); + glad_glGetTrackMatrixivNV = (PFNGLGETTRACKMATRIXIVNVPROC)load("glGetTrackMatrixivNV"); + glad_glGetVertexAttribdvNV = (PFNGLGETVERTEXATTRIBDVNVPROC)load("glGetVertexAttribdvNV"); + glad_glGetVertexAttribfvNV = (PFNGLGETVERTEXATTRIBFVNVPROC)load("glGetVertexAttribfvNV"); + glad_glGetVertexAttribivNV = (PFNGLGETVERTEXATTRIBIVNVPROC)load("glGetVertexAttribivNV"); + glad_glGetVertexAttribPointervNV = (PFNGLGETVERTEXATTRIBPOINTERVNVPROC)load("glGetVertexAttribPointervNV"); + glad_glIsProgramNV = (PFNGLISPROGRAMNVPROC)load("glIsProgramNV"); + glad_glLoadProgramNV = (PFNGLLOADPROGRAMNVPROC)load("glLoadProgramNV"); + glad_glProgramParameter4dNV = (PFNGLPROGRAMPARAMETER4DNVPROC)load("glProgramParameter4dNV"); + glad_glProgramParameter4dvNV = (PFNGLPROGRAMPARAMETER4DVNVPROC)load("glProgramParameter4dvNV"); + glad_glProgramParameter4fNV = (PFNGLPROGRAMPARAMETER4FNVPROC)load("glProgramParameter4fNV"); + glad_glProgramParameter4fvNV = (PFNGLPROGRAMPARAMETER4FVNVPROC)load("glProgramParameter4fvNV"); + glad_glProgramParameters4dvNV = (PFNGLPROGRAMPARAMETERS4DVNVPROC)load("glProgramParameters4dvNV"); + glad_glProgramParameters4fvNV = (PFNGLPROGRAMPARAMETERS4FVNVPROC)load("glProgramParameters4fvNV"); + glad_glRequestResidentProgramsNV = (PFNGLREQUESTRESIDENTPROGRAMSNVPROC)load("glRequestResidentProgramsNV"); + glad_glTrackMatrixNV = (PFNGLTRACKMATRIXNVPROC)load("glTrackMatrixNV"); + glad_glVertexAttribPointerNV = (PFNGLVERTEXATTRIBPOINTERNVPROC)load("glVertexAttribPointerNV"); + glad_glVertexAttrib1dNV = (PFNGLVERTEXATTRIB1DNVPROC)load("glVertexAttrib1dNV"); + glad_glVertexAttrib1dvNV = (PFNGLVERTEXATTRIB1DVNVPROC)load("glVertexAttrib1dvNV"); + glad_glVertexAttrib1fNV = (PFNGLVERTEXATTRIB1FNVPROC)load("glVertexAttrib1fNV"); + glad_glVertexAttrib1fvNV = (PFNGLVERTEXATTRIB1FVNVPROC)load("glVertexAttrib1fvNV"); + glad_glVertexAttrib1sNV = (PFNGLVERTEXATTRIB1SNVPROC)load("glVertexAttrib1sNV"); + glad_glVertexAttrib1svNV = (PFNGLVERTEXATTRIB1SVNVPROC)load("glVertexAttrib1svNV"); + glad_glVertexAttrib2dNV = (PFNGLVERTEXATTRIB2DNVPROC)load("glVertexAttrib2dNV"); + glad_glVertexAttrib2dvNV = (PFNGLVERTEXATTRIB2DVNVPROC)load("glVertexAttrib2dvNV"); + glad_glVertexAttrib2fNV = (PFNGLVERTEXATTRIB2FNVPROC)load("glVertexAttrib2fNV"); + glad_glVertexAttrib2fvNV = (PFNGLVERTEXATTRIB2FVNVPROC)load("glVertexAttrib2fvNV"); + glad_glVertexAttrib2sNV = (PFNGLVERTEXATTRIB2SNVPROC)load("glVertexAttrib2sNV"); + glad_glVertexAttrib2svNV = (PFNGLVERTEXATTRIB2SVNVPROC)load("glVertexAttrib2svNV"); + glad_glVertexAttrib3dNV = (PFNGLVERTEXATTRIB3DNVPROC)load("glVertexAttrib3dNV"); + glad_glVertexAttrib3dvNV = (PFNGLVERTEXATTRIB3DVNVPROC)load("glVertexAttrib3dvNV"); + glad_glVertexAttrib3fNV = (PFNGLVERTEXATTRIB3FNVPROC)load("glVertexAttrib3fNV"); + glad_glVertexAttrib3fvNV = (PFNGLVERTEXATTRIB3FVNVPROC)load("glVertexAttrib3fvNV"); + glad_glVertexAttrib3sNV = (PFNGLVERTEXATTRIB3SNVPROC)load("glVertexAttrib3sNV"); + glad_glVertexAttrib3svNV = (PFNGLVERTEXATTRIB3SVNVPROC)load("glVertexAttrib3svNV"); + glad_glVertexAttrib4dNV = (PFNGLVERTEXATTRIB4DNVPROC)load("glVertexAttrib4dNV"); + glad_glVertexAttrib4dvNV = (PFNGLVERTEXATTRIB4DVNVPROC)load("glVertexAttrib4dvNV"); + glad_glVertexAttrib4fNV = (PFNGLVERTEXATTRIB4FNVPROC)load("glVertexAttrib4fNV"); + glad_glVertexAttrib4fvNV = (PFNGLVERTEXATTRIB4FVNVPROC)load("glVertexAttrib4fvNV"); + glad_glVertexAttrib4sNV = (PFNGLVERTEXATTRIB4SNVPROC)load("glVertexAttrib4sNV"); + glad_glVertexAttrib4svNV = (PFNGLVERTEXATTRIB4SVNVPROC)load("glVertexAttrib4svNV"); + glad_glVertexAttrib4ubNV = (PFNGLVERTEXATTRIB4UBNVPROC)load("glVertexAttrib4ubNV"); + glad_glVertexAttrib4ubvNV = (PFNGLVERTEXATTRIB4UBVNVPROC)load("glVertexAttrib4ubvNV"); + glad_glVertexAttribs1dvNV = (PFNGLVERTEXATTRIBS1DVNVPROC)load("glVertexAttribs1dvNV"); + glad_glVertexAttribs1fvNV = (PFNGLVERTEXATTRIBS1FVNVPROC)load("glVertexAttribs1fvNV"); + glad_glVertexAttribs1svNV = (PFNGLVERTEXATTRIBS1SVNVPROC)load("glVertexAttribs1svNV"); + glad_glVertexAttribs2dvNV = (PFNGLVERTEXATTRIBS2DVNVPROC)load("glVertexAttribs2dvNV"); + glad_glVertexAttribs2fvNV = (PFNGLVERTEXATTRIBS2FVNVPROC)load("glVertexAttribs2fvNV"); + glad_glVertexAttribs2svNV = (PFNGLVERTEXATTRIBS2SVNVPROC)load("glVertexAttribs2svNV"); + glad_glVertexAttribs3dvNV = (PFNGLVERTEXATTRIBS3DVNVPROC)load("glVertexAttribs3dvNV"); + glad_glVertexAttribs3fvNV = (PFNGLVERTEXATTRIBS3FVNVPROC)load("glVertexAttribs3fvNV"); + glad_glVertexAttribs3svNV = (PFNGLVERTEXATTRIBS3SVNVPROC)load("glVertexAttribs3svNV"); + glad_glVertexAttribs4dvNV = (PFNGLVERTEXATTRIBS4DVNVPROC)load("glVertexAttribs4dvNV"); + glad_glVertexAttribs4fvNV = (PFNGLVERTEXATTRIBS4FVNVPROC)load("glVertexAttribs4fvNV"); + glad_glVertexAttribs4svNV = (PFNGLVERTEXATTRIBS4SVNVPROC)load("glVertexAttribs4svNV"); + glad_glVertexAttribs4ubvNV = (PFNGLVERTEXATTRIBS4UBVNVPROC)load("glVertexAttribs4ubvNV"); +} +static void load_GL_NV_vertex_program4(GLADloadproc load) { + if(!GLAD_GL_NV_vertex_program4) return; + glad_glVertexAttribI1iEXT = (PFNGLVERTEXATTRIBI1IEXTPROC)load("glVertexAttribI1iEXT"); + glad_glVertexAttribI2iEXT = (PFNGLVERTEXATTRIBI2IEXTPROC)load("glVertexAttribI2iEXT"); + glad_glVertexAttribI3iEXT = (PFNGLVERTEXATTRIBI3IEXTPROC)load("glVertexAttribI3iEXT"); + glad_glVertexAttribI4iEXT = (PFNGLVERTEXATTRIBI4IEXTPROC)load("glVertexAttribI4iEXT"); + glad_glVertexAttribI1uiEXT = (PFNGLVERTEXATTRIBI1UIEXTPROC)load("glVertexAttribI1uiEXT"); + glad_glVertexAttribI2uiEXT = (PFNGLVERTEXATTRIBI2UIEXTPROC)load("glVertexAttribI2uiEXT"); + glad_glVertexAttribI3uiEXT = (PFNGLVERTEXATTRIBI3UIEXTPROC)load("glVertexAttribI3uiEXT"); + glad_glVertexAttribI4uiEXT = (PFNGLVERTEXATTRIBI4UIEXTPROC)load("glVertexAttribI4uiEXT"); + glad_glVertexAttribI1ivEXT = (PFNGLVERTEXATTRIBI1IVEXTPROC)load("glVertexAttribI1ivEXT"); + glad_glVertexAttribI2ivEXT = (PFNGLVERTEXATTRIBI2IVEXTPROC)load("glVertexAttribI2ivEXT"); + glad_glVertexAttribI3ivEXT = (PFNGLVERTEXATTRIBI3IVEXTPROC)load("glVertexAttribI3ivEXT"); + glad_glVertexAttribI4ivEXT = (PFNGLVERTEXATTRIBI4IVEXTPROC)load("glVertexAttribI4ivEXT"); + glad_glVertexAttribI1uivEXT = (PFNGLVERTEXATTRIBI1UIVEXTPROC)load("glVertexAttribI1uivEXT"); + glad_glVertexAttribI2uivEXT = (PFNGLVERTEXATTRIBI2UIVEXTPROC)load("glVertexAttribI2uivEXT"); + glad_glVertexAttribI3uivEXT = (PFNGLVERTEXATTRIBI3UIVEXTPROC)load("glVertexAttribI3uivEXT"); + glad_glVertexAttribI4uivEXT = (PFNGLVERTEXATTRIBI4UIVEXTPROC)load("glVertexAttribI4uivEXT"); + glad_glVertexAttribI4bvEXT = (PFNGLVERTEXATTRIBI4BVEXTPROC)load("glVertexAttribI4bvEXT"); + glad_glVertexAttribI4svEXT = (PFNGLVERTEXATTRIBI4SVEXTPROC)load("glVertexAttribI4svEXT"); + glad_glVertexAttribI4ubvEXT = (PFNGLVERTEXATTRIBI4UBVEXTPROC)load("glVertexAttribI4ubvEXT"); + glad_glVertexAttribI4usvEXT = (PFNGLVERTEXATTRIBI4USVEXTPROC)load("glVertexAttribI4usvEXT"); + glad_glVertexAttribIPointerEXT = (PFNGLVERTEXATTRIBIPOINTEREXTPROC)load("glVertexAttribIPointerEXT"); + glad_glGetVertexAttribIivEXT = (PFNGLGETVERTEXATTRIBIIVEXTPROC)load("glGetVertexAttribIivEXT"); + glad_glGetVertexAttribIuivEXT = (PFNGLGETVERTEXATTRIBIUIVEXTPROC)load("glGetVertexAttribIuivEXT"); +} +static void load_GL_NV_video_capture(GLADloadproc load) { + if(!GLAD_GL_NV_video_capture) return; + glad_glBeginVideoCaptureNV = (PFNGLBEGINVIDEOCAPTURENVPROC)load("glBeginVideoCaptureNV"); + glad_glBindVideoCaptureStreamBufferNV = (PFNGLBINDVIDEOCAPTURESTREAMBUFFERNVPROC)load("glBindVideoCaptureStreamBufferNV"); + glad_glBindVideoCaptureStreamTextureNV = (PFNGLBINDVIDEOCAPTURESTREAMTEXTURENVPROC)load("glBindVideoCaptureStreamTextureNV"); + glad_glEndVideoCaptureNV = (PFNGLENDVIDEOCAPTURENVPROC)load("glEndVideoCaptureNV"); + glad_glGetVideoCaptureivNV = (PFNGLGETVIDEOCAPTUREIVNVPROC)load("glGetVideoCaptureivNV"); + glad_glGetVideoCaptureStreamivNV = (PFNGLGETVIDEOCAPTURESTREAMIVNVPROC)load("glGetVideoCaptureStreamivNV"); + glad_glGetVideoCaptureStreamfvNV = (PFNGLGETVIDEOCAPTURESTREAMFVNVPROC)load("glGetVideoCaptureStreamfvNV"); + glad_glGetVideoCaptureStreamdvNV = (PFNGLGETVIDEOCAPTURESTREAMDVNVPROC)load("glGetVideoCaptureStreamdvNV"); + glad_glVideoCaptureNV = (PFNGLVIDEOCAPTURENVPROC)load("glVideoCaptureNV"); + glad_glVideoCaptureStreamParameterivNV = (PFNGLVIDEOCAPTURESTREAMPARAMETERIVNVPROC)load("glVideoCaptureStreamParameterivNV"); + glad_glVideoCaptureStreamParameterfvNV = (PFNGLVIDEOCAPTURESTREAMPARAMETERFVNVPROC)load("glVideoCaptureStreamParameterfvNV"); + glad_glVideoCaptureStreamParameterdvNV = (PFNGLVIDEOCAPTURESTREAMPARAMETERDVNVPROC)load("glVideoCaptureStreamParameterdvNV"); +} +static void load_GL_NV_viewport_swizzle(GLADloadproc load) { + if(!GLAD_GL_NV_viewport_swizzle) return; + glad_glViewportSwizzleNV = (PFNGLVIEWPORTSWIZZLENVPROC)load("glViewportSwizzleNV"); +} +static void load_GL_OES_byte_coordinates(GLADloadproc load) { + if(!GLAD_GL_OES_byte_coordinates) return; + glad_glMultiTexCoord1bOES = (PFNGLMULTITEXCOORD1BOESPROC)load("glMultiTexCoord1bOES"); + glad_glMultiTexCoord1bvOES = (PFNGLMULTITEXCOORD1BVOESPROC)load("glMultiTexCoord1bvOES"); + glad_glMultiTexCoord2bOES = (PFNGLMULTITEXCOORD2BOESPROC)load("glMultiTexCoord2bOES"); + glad_glMultiTexCoord2bvOES = (PFNGLMULTITEXCOORD2BVOESPROC)load("glMultiTexCoord2bvOES"); + glad_glMultiTexCoord3bOES = (PFNGLMULTITEXCOORD3BOESPROC)load("glMultiTexCoord3bOES"); + glad_glMultiTexCoord3bvOES = (PFNGLMULTITEXCOORD3BVOESPROC)load("glMultiTexCoord3bvOES"); + glad_glMultiTexCoord4bOES = (PFNGLMULTITEXCOORD4BOESPROC)load("glMultiTexCoord4bOES"); + glad_glMultiTexCoord4bvOES = (PFNGLMULTITEXCOORD4BVOESPROC)load("glMultiTexCoord4bvOES"); + glad_glTexCoord1bOES = (PFNGLTEXCOORD1BOESPROC)load("glTexCoord1bOES"); + glad_glTexCoord1bvOES = (PFNGLTEXCOORD1BVOESPROC)load("glTexCoord1bvOES"); + glad_glTexCoord2bOES = (PFNGLTEXCOORD2BOESPROC)load("glTexCoord2bOES"); + glad_glTexCoord2bvOES = (PFNGLTEXCOORD2BVOESPROC)load("glTexCoord2bvOES"); + glad_glTexCoord3bOES = (PFNGLTEXCOORD3BOESPROC)load("glTexCoord3bOES"); + glad_glTexCoord3bvOES = (PFNGLTEXCOORD3BVOESPROC)load("glTexCoord3bvOES"); + glad_glTexCoord4bOES = (PFNGLTEXCOORD4BOESPROC)load("glTexCoord4bOES"); + glad_glTexCoord4bvOES = (PFNGLTEXCOORD4BVOESPROC)load("glTexCoord4bvOES"); + glad_glVertex2bOES = (PFNGLVERTEX2BOESPROC)load("glVertex2bOES"); + glad_glVertex2bvOES = (PFNGLVERTEX2BVOESPROC)load("glVertex2bvOES"); + glad_glVertex3bOES = (PFNGLVERTEX3BOESPROC)load("glVertex3bOES"); + glad_glVertex3bvOES = (PFNGLVERTEX3BVOESPROC)load("glVertex3bvOES"); + glad_glVertex4bOES = (PFNGLVERTEX4BOESPROC)load("glVertex4bOES"); + glad_glVertex4bvOES = (PFNGLVERTEX4BVOESPROC)load("glVertex4bvOES"); +} +static void load_GL_OES_fixed_point(GLADloadproc load) { + if(!GLAD_GL_OES_fixed_point) return; + glad_glAlphaFuncxOES = (PFNGLALPHAFUNCXOESPROC)load("glAlphaFuncxOES"); + glad_glClearColorxOES = (PFNGLCLEARCOLORXOESPROC)load("glClearColorxOES"); + glad_glClearDepthxOES = (PFNGLCLEARDEPTHXOESPROC)load("glClearDepthxOES"); + glad_glClipPlanexOES = (PFNGLCLIPPLANEXOESPROC)load("glClipPlanexOES"); + glad_glColor4xOES = (PFNGLCOLOR4XOESPROC)load("glColor4xOES"); + glad_glDepthRangexOES = (PFNGLDEPTHRANGEXOESPROC)load("glDepthRangexOES"); + glad_glFogxOES = (PFNGLFOGXOESPROC)load("glFogxOES"); + glad_glFogxvOES = (PFNGLFOGXVOESPROC)load("glFogxvOES"); + glad_glFrustumxOES = (PFNGLFRUSTUMXOESPROC)load("glFrustumxOES"); + glad_glGetClipPlanexOES = (PFNGLGETCLIPPLANEXOESPROC)load("glGetClipPlanexOES"); + glad_glGetFixedvOES = (PFNGLGETFIXEDVOESPROC)load("glGetFixedvOES"); + glad_glGetTexEnvxvOES = (PFNGLGETTEXENVXVOESPROC)load("glGetTexEnvxvOES"); + glad_glGetTexParameterxvOES = (PFNGLGETTEXPARAMETERXVOESPROC)load("glGetTexParameterxvOES"); + glad_glLightModelxOES = (PFNGLLIGHTMODELXOESPROC)load("glLightModelxOES"); + glad_glLightModelxvOES = (PFNGLLIGHTMODELXVOESPROC)load("glLightModelxvOES"); + glad_glLightxOES = (PFNGLLIGHTXOESPROC)load("glLightxOES"); + glad_glLightxvOES = (PFNGLLIGHTXVOESPROC)load("glLightxvOES"); + glad_glLineWidthxOES = (PFNGLLINEWIDTHXOESPROC)load("glLineWidthxOES"); + glad_glLoadMatrixxOES = (PFNGLLOADMATRIXXOESPROC)load("glLoadMatrixxOES"); + glad_glMaterialxOES = (PFNGLMATERIALXOESPROC)load("glMaterialxOES"); + glad_glMaterialxvOES = (PFNGLMATERIALXVOESPROC)load("glMaterialxvOES"); + glad_glMultMatrixxOES = (PFNGLMULTMATRIXXOESPROC)load("glMultMatrixxOES"); + glad_glMultiTexCoord4xOES = (PFNGLMULTITEXCOORD4XOESPROC)load("glMultiTexCoord4xOES"); + glad_glNormal3xOES = (PFNGLNORMAL3XOESPROC)load("glNormal3xOES"); + glad_glOrthoxOES = (PFNGLORTHOXOESPROC)load("glOrthoxOES"); + glad_glPointParameterxvOES = (PFNGLPOINTPARAMETERXVOESPROC)load("glPointParameterxvOES"); + glad_glPointSizexOES = (PFNGLPOINTSIZEXOESPROC)load("glPointSizexOES"); + glad_glPolygonOffsetxOES = (PFNGLPOLYGONOFFSETXOESPROC)load("glPolygonOffsetxOES"); + glad_glRotatexOES = (PFNGLROTATEXOESPROC)load("glRotatexOES"); + glad_glScalexOES = (PFNGLSCALEXOESPROC)load("glScalexOES"); + glad_glTexEnvxOES = (PFNGLTEXENVXOESPROC)load("glTexEnvxOES"); + glad_glTexEnvxvOES = (PFNGLTEXENVXVOESPROC)load("glTexEnvxvOES"); + glad_glTexParameterxOES = (PFNGLTEXPARAMETERXOESPROC)load("glTexParameterxOES"); + glad_glTexParameterxvOES = (PFNGLTEXPARAMETERXVOESPROC)load("glTexParameterxvOES"); + glad_glTranslatexOES = (PFNGLTRANSLATEXOESPROC)load("glTranslatexOES"); + glad_glGetLightxvOES = (PFNGLGETLIGHTXVOESPROC)load("glGetLightxvOES"); + glad_glGetMaterialxvOES = (PFNGLGETMATERIALXVOESPROC)load("glGetMaterialxvOES"); + glad_glPointParameterxOES = (PFNGLPOINTPARAMETERXOESPROC)load("glPointParameterxOES"); + glad_glSampleCoveragexOES = (PFNGLSAMPLECOVERAGEXOESPROC)load("glSampleCoveragexOES"); + glad_glAccumxOES = (PFNGLACCUMXOESPROC)load("glAccumxOES"); + glad_glBitmapxOES = (PFNGLBITMAPXOESPROC)load("glBitmapxOES"); + glad_glBlendColorxOES = (PFNGLBLENDCOLORXOESPROC)load("glBlendColorxOES"); + glad_glClearAccumxOES = (PFNGLCLEARACCUMXOESPROC)load("glClearAccumxOES"); + glad_glColor3xOES = (PFNGLCOLOR3XOESPROC)load("glColor3xOES"); + glad_glColor3xvOES = (PFNGLCOLOR3XVOESPROC)load("glColor3xvOES"); + glad_glColor4xvOES = (PFNGLCOLOR4XVOESPROC)load("glColor4xvOES"); + glad_glConvolutionParameterxOES = (PFNGLCONVOLUTIONPARAMETERXOESPROC)load("glConvolutionParameterxOES"); + glad_glConvolutionParameterxvOES = (PFNGLCONVOLUTIONPARAMETERXVOESPROC)load("glConvolutionParameterxvOES"); + glad_glEvalCoord1xOES = (PFNGLEVALCOORD1XOESPROC)load("glEvalCoord1xOES"); + glad_glEvalCoord1xvOES = (PFNGLEVALCOORD1XVOESPROC)load("glEvalCoord1xvOES"); + glad_glEvalCoord2xOES = (PFNGLEVALCOORD2XOESPROC)load("glEvalCoord2xOES"); + glad_glEvalCoord2xvOES = (PFNGLEVALCOORD2XVOESPROC)load("glEvalCoord2xvOES"); + glad_glFeedbackBufferxOES = (PFNGLFEEDBACKBUFFERXOESPROC)load("glFeedbackBufferxOES"); + glad_glGetConvolutionParameterxvOES = (PFNGLGETCONVOLUTIONPARAMETERXVOESPROC)load("glGetConvolutionParameterxvOES"); + glad_glGetHistogramParameterxvOES = (PFNGLGETHISTOGRAMPARAMETERXVOESPROC)load("glGetHistogramParameterxvOES"); + glad_glGetLightxOES = (PFNGLGETLIGHTXOESPROC)load("glGetLightxOES"); + glad_glGetMapxvOES = (PFNGLGETMAPXVOESPROC)load("glGetMapxvOES"); + glad_glGetMaterialxOES = (PFNGLGETMATERIALXOESPROC)load("glGetMaterialxOES"); + glad_glGetPixelMapxv = (PFNGLGETPIXELMAPXVPROC)load("glGetPixelMapxv"); + glad_glGetTexGenxvOES = (PFNGLGETTEXGENXVOESPROC)load("glGetTexGenxvOES"); + glad_glGetTexLevelParameterxvOES = (PFNGLGETTEXLEVELPARAMETERXVOESPROC)load("glGetTexLevelParameterxvOES"); + glad_glIndexxOES = (PFNGLINDEXXOESPROC)load("glIndexxOES"); + glad_glIndexxvOES = (PFNGLINDEXXVOESPROC)load("glIndexxvOES"); + glad_glLoadTransposeMatrixxOES = (PFNGLLOADTRANSPOSEMATRIXXOESPROC)load("glLoadTransposeMatrixxOES"); + glad_glMap1xOES = (PFNGLMAP1XOESPROC)load("glMap1xOES"); + glad_glMap2xOES = (PFNGLMAP2XOESPROC)load("glMap2xOES"); + glad_glMapGrid1xOES = (PFNGLMAPGRID1XOESPROC)load("glMapGrid1xOES"); + glad_glMapGrid2xOES = (PFNGLMAPGRID2XOESPROC)load("glMapGrid2xOES"); + glad_glMultTransposeMatrixxOES = (PFNGLMULTTRANSPOSEMATRIXXOESPROC)load("glMultTransposeMatrixxOES"); + glad_glMultiTexCoord1xOES = (PFNGLMULTITEXCOORD1XOESPROC)load("glMultiTexCoord1xOES"); + glad_glMultiTexCoord1xvOES = (PFNGLMULTITEXCOORD1XVOESPROC)load("glMultiTexCoord1xvOES"); + glad_glMultiTexCoord2xOES = (PFNGLMULTITEXCOORD2XOESPROC)load("glMultiTexCoord2xOES"); + glad_glMultiTexCoord2xvOES = (PFNGLMULTITEXCOORD2XVOESPROC)load("glMultiTexCoord2xvOES"); + glad_glMultiTexCoord3xOES = (PFNGLMULTITEXCOORD3XOESPROC)load("glMultiTexCoord3xOES"); + glad_glMultiTexCoord3xvOES = (PFNGLMULTITEXCOORD3XVOESPROC)load("glMultiTexCoord3xvOES"); + glad_glMultiTexCoord4xvOES = (PFNGLMULTITEXCOORD4XVOESPROC)load("glMultiTexCoord4xvOES"); + glad_glNormal3xvOES = (PFNGLNORMAL3XVOESPROC)load("glNormal3xvOES"); + glad_glPassThroughxOES = (PFNGLPASSTHROUGHXOESPROC)load("glPassThroughxOES"); + glad_glPixelMapx = (PFNGLPIXELMAPXPROC)load("glPixelMapx"); + glad_glPixelStorex = (PFNGLPIXELSTOREXPROC)load("glPixelStorex"); + glad_glPixelTransferxOES = (PFNGLPIXELTRANSFERXOESPROC)load("glPixelTransferxOES"); + glad_glPixelZoomxOES = (PFNGLPIXELZOOMXOESPROC)load("glPixelZoomxOES"); + glad_glPrioritizeTexturesxOES = (PFNGLPRIORITIZETEXTURESXOESPROC)load("glPrioritizeTexturesxOES"); + glad_glRasterPos2xOES = (PFNGLRASTERPOS2XOESPROC)load("glRasterPos2xOES"); + glad_glRasterPos2xvOES = (PFNGLRASTERPOS2XVOESPROC)load("glRasterPos2xvOES"); + glad_glRasterPos3xOES = (PFNGLRASTERPOS3XOESPROC)load("glRasterPos3xOES"); + glad_glRasterPos3xvOES = (PFNGLRASTERPOS3XVOESPROC)load("glRasterPos3xvOES"); + glad_glRasterPos4xOES = (PFNGLRASTERPOS4XOESPROC)load("glRasterPos4xOES"); + glad_glRasterPos4xvOES = (PFNGLRASTERPOS4XVOESPROC)load("glRasterPos4xvOES"); + glad_glRectxOES = (PFNGLRECTXOESPROC)load("glRectxOES"); + glad_glRectxvOES = (PFNGLRECTXVOESPROC)load("glRectxvOES"); + glad_glTexCoord1xOES = (PFNGLTEXCOORD1XOESPROC)load("glTexCoord1xOES"); + glad_glTexCoord1xvOES = (PFNGLTEXCOORD1XVOESPROC)load("glTexCoord1xvOES"); + glad_glTexCoord2xOES = (PFNGLTEXCOORD2XOESPROC)load("glTexCoord2xOES"); + glad_glTexCoord2xvOES = (PFNGLTEXCOORD2XVOESPROC)load("glTexCoord2xvOES"); + glad_glTexCoord3xOES = (PFNGLTEXCOORD3XOESPROC)load("glTexCoord3xOES"); + glad_glTexCoord3xvOES = (PFNGLTEXCOORD3XVOESPROC)load("glTexCoord3xvOES"); + glad_glTexCoord4xOES = (PFNGLTEXCOORD4XOESPROC)load("glTexCoord4xOES"); + glad_glTexCoord4xvOES = (PFNGLTEXCOORD4XVOESPROC)load("glTexCoord4xvOES"); + glad_glTexGenxOES = (PFNGLTEXGENXOESPROC)load("glTexGenxOES"); + glad_glTexGenxvOES = (PFNGLTEXGENXVOESPROC)load("glTexGenxvOES"); + glad_glVertex2xOES = (PFNGLVERTEX2XOESPROC)load("glVertex2xOES"); + glad_glVertex2xvOES = (PFNGLVERTEX2XVOESPROC)load("glVertex2xvOES"); + glad_glVertex3xOES = (PFNGLVERTEX3XOESPROC)load("glVertex3xOES"); + glad_glVertex3xvOES = (PFNGLVERTEX3XVOESPROC)load("glVertex3xvOES"); + glad_glVertex4xOES = (PFNGLVERTEX4XOESPROC)load("glVertex4xOES"); + glad_glVertex4xvOES = (PFNGLVERTEX4XVOESPROC)load("glVertex4xvOES"); +} +static void load_GL_OES_query_matrix(GLADloadproc load) { + if(!GLAD_GL_OES_query_matrix) return; + glad_glQueryMatrixxOES = (PFNGLQUERYMATRIXXOESPROC)load("glQueryMatrixxOES"); +} +static void load_GL_OES_single_precision(GLADloadproc load) { + if(!GLAD_GL_OES_single_precision) return; + glad_glClearDepthfOES = (PFNGLCLEARDEPTHFOESPROC)load("glClearDepthfOES"); + glad_glClipPlanefOES = (PFNGLCLIPPLANEFOESPROC)load("glClipPlanefOES"); + glad_glDepthRangefOES = (PFNGLDEPTHRANGEFOESPROC)load("glDepthRangefOES"); + glad_glFrustumfOES = (PFNGLFRUSTUMFOESPROC)load("glFrustumfOES"); + glad_glGetClipPlanefOES = (PFNGLGETCLIPPLANEFOESPROC)load("glGetClipPlanefOES"); + glad_glOrthofOES = (PFNGLORTHOFOESPROC)load("glOrthofOES"); +} +static void load_GL_OVR_multiview(GLADloadproc load) { + if(!GLAD_GL_OVR_multiview) return; + glad_glFramebufferTextureMultiviewOVR = (PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC)load("glFramebufferTextureMultiviewOVR"); + glad_glNamedFramebufferTextureMultiviewOVR = (PFNGLNAMEDFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC)load("glNamedFramebufferTextureMultiviewOVR"); +} +static void load_GL_PGI_misc_hints(GLADloadproc load) { + if(!GLAD_GL_PGI_misc_hints) return; + glad_glHintPGI = (PFNGLHINTPGIPROC)load("glHintPGI"); +} +static void load_GL_SGIS_detail_texture(GLADloadproc load) { + if(!GLAD_GL_SGIS_detail_texture) return; + glad_glDetailTexFuncSGIS = (PFNGLDETAILTEXFUNCSGISPROC)load("glDetailTexFuncSGIS"); + glad_glGetDetailTexFuncSGIS = (PFNGLGETDETAILTEXFUNCSGISPROC)load("glGetDetailTexFuncSGIS"); +} +static void load_GL_SGIS_fog_function(GLADloadproc load) { + if(!GLAD_GL_SGIS_fog_function) return; + glad_glFogFuncSGIS = (PFNGLFOGFUNCSGISPROC)load("glFogFuncSGIS"); + glad_glGetFogFuncSGIS = (PFNGLGETFOGFUNCSGISPROC)load("glGetFogFuncSGIS"); +} +static void load_GL_SGIS_multisample(GLADloadproc load) { + if(!GLAD_GL_SGIS_multisample) return; + glad_glSampleMaskSGIS = (PFNGLSAMPLEMASKSGISPROC)load("glSampleMaskSGIS"); + glad_glSamplePatternSGIS = (PFNGLSAMPLEPATTERNSGISPROC)load("glSamplePatternSGIS"); +} +static void load_GL_SGIS_pixel_texture(GLADloadproc load) { + if(!GLAD_GL_SGIS_pixel_texture) return; + glad_glPixelTexGenParameteriSGIS = (PFNGLPIXELTEXGENPARAMETERISGISPROC)load("glPixelTexGenParameteriSGIS"); + glad_glPixelTexGenParameterivSGIS = (PFNGLPIXELTEXGENPARAMETERIVSGISPROC)load("glPixelTexGenParameterivSGIS"); + glad_glPixelTexGenParameterfSGIS = (PFNGLPIXELTEXGENPARAMETERFSGISPROC)load("glPixelTexGenParameterfSGIS"); + glad_glPixelTexGenParameterfvSGIS = (PFNGLPIXELTEXGENPARAMETERFVSGISPROC)load("glPixelTexGenParameterfvSGIS"); + glad_glGetPixelTexGenParameterivSGIS = (PFNGLGETPIXELTEXGENPARAMETERIVSGISPROC)load("glGetPixelTexGenParameterivSGIS"); + glad_glGetPixelTexGenParameterfvSGIS = (PFNGLGETPIXELTEXGENPARAMETERFVSGISPROC)load("glGetPixelTexGenParameterfvSGIS"); +} +static void load_GL_SGIS_point_parameters(GLADloadproc load) { + if(!GLAD_GL_SGIS_point_parameters) return; + glad_glPointParameterfSGIS = (PFNGLPOINTPARAMETERFSGISPROC)load("glPointParameterfSGIS"); + glad_glPointParameterfvSGIS = (PFNGLPOINTPARAMETERFVSGISPROC)load("glPointParameterfvSGIS"); +} +static void load_GL_SGIS_sharpen_texture(GLADloadproc load) { + if(!GLAD_GL_SGIS_sharpen_texture) return; + glad_glSharpenTexFuncSGIS = (PFNGLSHARPENTEXFUNCSGISPROC)load("glSharpenTexFuncSGIS"); + glad_glGetSharpenTexFuncSGIS = (PFNGLGETSHARPENTEXFUNCSGISPROC)load("glGetSharpenTexFuncSGIS"); +} +static void load_GL_SGIS_texture4D(GLADloadproc load) { + if(!GLAD_GL_SGIS_texture4D) return; + glad_glTexImage4DSGIS = (PFNGLTEXIMAGE4DSGISPROC)load("glTexImage4DSGIS"); + glad_glTexSubImage4DSGIS = (PFNGLTEXSUBIMAGE4DSGISPROC)load("glTexSubImage4DSGIS"); +} +static void load_GL_SGIS_texture_color_mask(GLADloadproc load) { + if(!GLAD_GL_SGIS_texture_color_mask) return; + glad_glTextureColorMaskSGIS = (PFNGLTEXTURECOLORMASKSGISPROC)load("glTextureColorMaskSGIS"); +} +static void load_GL_SGIS_texture_filter4(GLADloadproc load) { + if(!GLAD_GL_SGIS_texture_filter4) return; + glad_glGetTexFilterFuncSGIS = (PFNGLGETTEXFILTERFUNCSGISPROC)load("glGetTexFilterFuncSGIS"); + glad_glTexFilterFuncSGIS = (PFNGLTEXFILTERFUNCSGISPROC)load("glTexFilterFuncSGIS"); +} +static void load_GL_SGIX_async(GLADloadproc load) { + if(!GLAD_GL_SGIX_async) return; + glad_glAsyncMarkerSGIX = (PFNGLASYNCMARKERSGIXPROC)load("glAsyncMarkerSGIX"); + glad_glFinishAsyncSGIX = (PFNGLFINISHASYNCSGIXPROC)load("glFinishAsyncSGIX"); + glad_glPollAsyncSGIX = (PFNGLPOLLASYNCSGIXPROC)load("glPollAsyncSGIX"); + glad_glGenAsyncMarkersSGIX = (PFNGLGENASYNCMARKERSSGIXPROC)load("glGenAsyncMarkersSGIX"); + glad_glDeleteAsyncMarkersSGIX = (PFNGLDELETEASYNCMARKERSSGIXPROC)load("glDeleteAsyncMarkersSGIX"); + glad_glIsAsyncMarkerSGIX = (PFNGLISASYNCMARKERSGIXPROC)load("glIsAsyncMarkerSGIX"); +} +static void load_GL_SGIX_flush_raster(GLADloadproc load) { + if(!GLAD_GL_SGIX_flush_raster) return; + glad_glFlushRasterSGIX = (PFNGLFLUSHRASTERSGIXPROC)load("glFlushRasterSGIX"); +} +static void load_GL_SGIX_fragment_lighting(GLADloadproc load) { + if(!GLAD_GL_SGIX_fragment_lighting) return; + glad_glFragmentColorMaterialSGIX = (PFNGLFRAGMENTCOLORMATERIALSGIXPROC)load("glFragmentColorMaterialSGIX"); + glad_glFragmentLightfSGIX = (PFNGLFRAGMENTLIGHTFSGIXPROC)load("glFragmentLightfSGIX"); + glad_glFragmentLightfvSGIX = (PFNGLFRAGMENTLIGHTFVSGIXPROC)load("glFragmentLightfvSGIX"); + glad_glFragmentLightiSGIX = (PFNGLFRAGMENTLIGHTISGIXPROC)load("glFragmentLightiSGIX"); + glad_glFragmentLightivSGIX = (PFNGLFRAGMENTLIGHTIVSGIXPROC)load("glFragmentLightivSGIX"); + glad_glFragmentLightModelfSGIX = (PFNGLFRAGMENTLIGHTMODELFSGIXPROC)load("glFragmentLightModelfSGIX"); + glad_glFragmentLightModelfvSGIX = (PFNGLFRAGMENTLIGHTMODELFVSGIXPROC)load("glFragmentLightModelfvSGIX"); + glad_glFragmentLightModeliSGIX = (PFNGLFRAGMENTLIGHTMODELISGIXPROC)load("glFragmentLightModeliSGIX"); + glad_glFragmentLightModelivSGIX = (PFNGLFRAGMENTLIGHTMODELIVSGIXPROC)load("glFragmentLightModelivSGIX"); + glad_glFragmentMaterialfSGIX = (PFNGLFRAGMENTMATERIALFSGIXPROC)load("glFragmentMaterialfSGIX"); + glad_glFragmentMaterialfvSGIX = (PFNGLFRAGMENTMATERIALFVSGIXPROC)load("glFragmentMaterialfvSGIX"); + glad_glFragmentMaterialiSGIX = (PFNGLFRAGMENTMATERIALISGIXPROC)load("glFragmentMaterialiSGIX"); + glad_glFragmentMaterialivSGIX = (PFNGLFRAGMENTMATERIALIVSGIXPROC)load("glFragmentMaterialivSGIX"); + glad_glGetFragmentLightfvSGIX = (PFNGLGETFRAGMENTLIGHTFVSGIXPROC)load("glGetFragmentLightfvSGIX"); + glad_glGetFragmentLightivSGIX = (PFNGLGETFRAGMENTLIGHTIVSGIXPROC)load("glGetFragmentLightivSGIX"); + glad_glGetFragmentMaterialfvSGIX = (PFNGLGETFRAGMENTMATERIALFVSGIXPROC)load("glGetFragmentMaterialfvSGIX"); + glad_glGetFragmentMaterialivSGIX = (PFNGLGETFRAGMENTMATERIALIVSGIXPROC)load("glGetFragmentMaterialivSGIX"); + glad_glLightEnviSGIX = (PFNGLLIGHTENVISGIXPROC)load("glLightEnviSGIX"); +} +static void load_GL_SGIX_framezoom(GLADloadproc load) { + if(!GLAD_GL_SGIX_framezoom) return; + glad_glFrameZoomSGIX = (PFNGLFRAMEZOOMSGIXPROC)load("glFrameZoomSGIX"); +} +static void load_GL_SGIX_igloo_interface(GLADloadproc load) { + if(!GLAD_GL_SGIX_igloo_interface) return; + glad_glIglooInterfaceSGIX = (PFNGLIGLOOINTERFACESGIXPROC)load("glIglooInterfaceSGIX"); +} +static void load_GL_SGIX_instruments(GLADloadproc load) { + if(!GLAD_GL_SGIX_instruments) return; + glad_glGetInstrumentsSGIX = (PFNGLGETINSTRUMENTSSGIXPROC)load("glGetInstrumentsSGIX"); + glad_glInstrumentsBufferSGIX = (PFNGLINSTRUMENTSBUFFERSGIXPROC)load("glInstrumentsBufferSGIX"); + glad_glPollInstrumentsSGIX = (PFNGLPOLLINSTRUMENTSSGIXPROC)load("glPollInstrumentsSGIX"); + glad_glReadInstrumentsSGIX = (PFNGLREADINSTRUMENTSSGIXPROC)load("glReadInstrumentsSGIX"); + glad_glStartInstrumentsSGIX = (PFNGLSTARTINSTRUMENTSSGIXPROC)load("glStartInstrumentsSGIX"); + glad_glStopInstrumentsSGIX = (PFNGLSTOPINSTRUMENTSSGIXPROC)load("glStopInstrumentsSGIX"); +} +static void load_GL_SGIX_list_priority(GLADloadproc load) { + if(!GLAD_GL_SGIX_list_priority) return; + glad_glGetListParameterfvSGIX = (PFNGLGETLISTPARAMETERFVSGIXPROC)load("glGetListParameterfvSGIX"); + glad_glGetListParameterivSGIX = (PFNGLGETLISTPARAMETERIVSGIXPROC)load("glGetListParameterivSGIX"); + glad_glListParameterfSGIX = (PFNGLLISTPARAMETERFSGIXPROC)load("glListParameterfSGIX"); + glad_glListParameterfvSGIX = (PFNGLLISTPARAMETERFVSGIXPROC)load("glListParameterfvSGIX"); + glad_glListParameteriSGIX = (PFNGLLISTPARAMETERISGIXPROC)load("glListParameteriSGIX"); + glad_glListParameterivSGIX = (PFNGLLISTPARAMETERIVSGIXPROC)load("glListParameterivSGIX"); +} +static void load_GL_SGIX_pixel_texture(GLADloadproc load) { + if(!GLAD_GL_SGIX_pixel_texture) return; + glad_glPixelTexGenSGIX = (PFNGLPIXELTEXGENSGIXPROC)load("glPixelTexGenSGIX"); +} +static void load_GL_SGIX_polynomial_ffd(GLADloadproc load) { + if(!GLAD_GL_SGIX_polynomial_ffd) return; + glad_glDeformationMap3dSGIX = (PFNGLDEFORMATIONMAP3DSGIXPROC)load("glDeformationMap3dSGIX"); + glad_glDeformationMap3fSGIX = (PFNGLDEFORMATIONMAP3FSGIXPROC)load("glDeformationMap3fSGIX"); + glad_glDeformSGIX = (PFNGLDEFORMSGIXPROC)load("glDeformSGIX"); + glad_glLoadIdentityDeformationMapSGIX = (PFNGLLOADIDENTITYDEFORMATIONMAPSGIXPROC)load("glLoadIdentityDeformationMapSGIX"); +} +static void load_GL_SGIX_reference_plane(GLADloadproc load) { + if(!GLAD_GL_SGIX_reference_plane) return; + glad_glReferencePlaneSGIX = (PFNGLREFERENCEPLANESGIXPROC)load("glReferencePlaneSGIX"); +} +static void load_GL_SGIX_sprite(GLADloadproc load) { + if(!GLAD_GL_SGIX_sprite) return; + glad_glSpriteParameterfSGIX = (PFNGLSPRITEPARAMETERFSGIXPROC)load("glSpriteParameterfSGIX"); + glad_glSpriteParameterfvSGIX = (PFNGLSPRITEPARAMETERFVSGIXPROC)load("glSpriteParameterfvSGIX"); + glad_glSpriteParameteriSGIX = (PFNGLSPRITEPARAMETERISGIXPROC)load("glSpriteParameteriSGIX"); + glad_glSpriteParameterivSGIX = (PFNGLSPRITEPARAMETERIVSGIXPROC)load("glSpriteParameterivSGIX"); +} +static void load_GL_SGIX_tag_sample_buffer(GLADloadproc load) { + if(!GLAD_GL_SGIX_tag_sample_buffer) return; + glad_glTagSampleBufferSGIX = (PFNGLTAGSAMPLEBUFFERSGIXPROC)load("glTagSampleBufferSGIX"); +} +static void load_GL_SGI_color_table(GLADloadproc load) { + if(!GLAD_GL_SGI_color_table) return; + glad_glColorTableSGI = (PFNGLCOLORTABLESGIPROC)load("glColorTableSGI"); + glad_glColorTableParameterfvSGI = (PFNGLCOLORTABLEPARAMETERFVSGIPROC)load("glColorTableParameterfvSGI"); + glad_glColorTableParameterivSGI = (PFNGLCOLORTABLEPARAMETERIVSGIPROC)load("glColorTableParameterivSGI"); + glad_glCopyColorTableSGI = (PFNGLCOPYCOLORTABLESGIPROC)load("glCopyColorTableSGI"); + glad_glGetColorTableSGI = (PFNGLGETCOLORTABLESGIPROC)load("glGetColorTableSGI"); + glad_glGetColorTableParameterfvSGI = (PFNGLGETCOLORTABLEPARAMETERFVSGIPROC)load("glGetColorTableParameterfvSGI"); + glad_glGetColorTableParameterivSGI = (PFNGLGETCOLORTABLEPARAMETERIVSGIPROC)load("glGetColorTableParameterivSGI"); +} +static void load_GL_SUNX_constant_data(GLADloadproc load) { + if(!GLAD_GL_SUNX_constant_data) return; + glad_glFinishTextureSUNX = (PFNGLFINISHTEXTURESUNXPROC)load("glFinishTextureSUNX"); +} +static void load_GL_SUN_global_alpha(GLADloadproc load) { + if(!GLAD_GL_SUN_global_alpha) return; + glad_glGlobalAlphaFactorbSUN = (PFNGLGLOBALALPHAFACTORBSUNPROC)load("glGlobalAlphaFactorbSUN"); + glad_glGlobalAlphaFactorsSUN = (PFNGLGLOBALALPHAFACTORSSUNPROC)load("glGlobalAlphaFactorsSUN"); + glad_glGlobalAlphaFactoriSUN = (PFNGLGLOBALALPHAFACTORISUNPROC)load("glGlobalAlphaFactoriSUN"); + glad_glGlobalAlphaFactorfSUN = (PFNGLGLOBALALPHAFACTORFSUNPROC)load("glGlobalAlphaFactorfSUN"); + glad_glGlobalAlphaFactordSUN = (PFNGLGLOBALALPHAFACTORDSUNPROC)load("glGlobalAlphaFactordSUN"); + glad_glGlobalAlphaFactorubSUN = (PFNGLGLOBALALPHAFACTORUBSUNPROC)load("glGlobalAlphaFactorubSUN"); + glad_glGlobalAlphaFactorusSUN = (PFNGLGLOBALALPHAFACTORUSSUNPROC)load("glGlobalAlphaFactorusSUN"); + glad_glGlobalAlphaFactoruiSUN = (PFNGLGLOBALALPHAFACTORUISUNPROC)load("glGlobalAlphaFactoruiSUN"); +} +static void load_GL_SUN_mesh_array(GLADloadproc load) { + if(!GLAD_GL_SUN_mesh_array) return; + glad_glDrawMeshArraysSUN = (PFNGLDRAWMESHARRAYSSUNPROC)load("glDrawMeshArraysSUN"); +} +static void load_GL_SUN_triangle_list(GLADloadproc load) { + if(!GLAD_GL_SUN_triangle_list) return; + glad_glReplacementCodeuiSUN = (PFNGLREPLACEMENTCODEUISUNPROC)load("glReplacementCodeuiSUN"); + glad_glReplacementCodeusSUN = (PFNGLREPLACEMENTCODEUSSUNPROC)load("glReplacementCodeusSUN"); + glad_glReplacementCodeubSUN = (PFNGLREPLACEMENTCODEUBSUNPROC)load("glReplacementCodeubSUN"); + glad_glReplacementCodeuivSUN = (PFNGLREPLACEMENTCODEUIVSUNPROC)load("glReplacementCodeuivSUN"); + glad_glReplacementCodeusvSUN = (PFNGLREPLACEMENTCODEUSVSUNPROC)load("glReplacementCodeusvSUN"); + glad_glReplacementCodeubvSUN = (PFNGLREPLACEMENTCODEUBVSUNPROC)load("glReplacementCodeubvSUN"); + glad_glReplacementCodePointerSUN = (PFNGLREPLACEMENTCODEPOINTERSUNPROC)load("glReplacementCodePointerSUN"); +} +static void load_GL_SUN_vertex(GLADloadproc load) { + if(!GLAD_GL_SUN_vertex) return; + glad_glColor4ubVertex2fSUN = (PFNGLCOLOR4UBVERTEX2FSUNPROC)load("glColor4ubVertex2fSUN"); + glad_glColor4ubVertex2fvSUN = (PFNGLCOLOR4UBVERTEX2FVSUNPROC)load("glColor4ubVertex2fvSUN"); + glad_glColor4ubVertex3fSUN = (PFNGLCOLOR4UBVERTEX3FSUNPROC)load("glColor4ubVertex3fSUN"); + glad_glColor4ubVertex3fvSUN = (PFNGLCOLOR4UBVERTEX3FVSUNPROC)load("glColor4ubVertex3fvSUN"); + glad_glColor3fVertex3fSUN = (PFNGLCOLOR3FVERTEX3FSUNPROC)load("glColor3fVertex3fSUN"); + glad_glColor3fVertex3fvSUN = (PFNGLCOLOR3FVERTEX3FVSUNPROC)load("glColor3fVertex3fvSUN"); + glad_glNormal3fVertex3fSUN = (PFNGLNORMAL3FVERTEX3FSUNPROC)load("glNormal3fVertex3fSUN"); + glad_glNormal3fVertex3fvSUN = (PFNGLNORMAL3FVERTEX3FVSUNPROC)load("glNormal3fVertex3fvSUN"); + glad_glColor4fNormal3fVertex3fSUN = (PFNGLCOLOR4FNORMAL3FVERTEX3FSUNPROC)load("glColor4fNormal3fVertex3fSUN"); + glad_glColor4fNormal3fVertex3fvSUN = (PFNGLCOLOR4FNORMAL3FVERTEX3FVSUNPROC)load("glColor4fNormal3fVertex3fvSUN"); + glad_glTexCoord2fVertex3fSUN = (PFNGLTEXCOORD2FVERTEX3FSUNPROC)load("glTexCoord2fVertex3fSUN"); + glad_glTexCoord2fVertex3fvSUN = (PFNGLTEXCOORD2FVERTEX3FVSUNPROC)load("glTexCoord2fVertex3fvSUN"); + glad_glTexCoord4fVertex4fSUN = (PFNGLTEXCOORD4FVERTEX4FSUNPROC)load("glTexCoord4fVertex4fSUN"); + glad_glTexCoord4fVertex4fvSUN = (PFNGLTEXCOORD4FVERTEX4FVSUNPROC)load("glTexCoord4fVertex4fvSUN"); + glad_glTexCoord2fColor4ubVertex3fSUN = (PFNGLTEXCOORD2FCOLOR4UBVERTEX3FSUNPROC)load("glTexCoord2fColor4ubVertex3fSUN"); + glad_glTexCoord2fColor4ubVertex3fvSUN = (PFNGLTEXCOORD2FCOLOR4UBVERTEX3FVSUNPROC)load("glTexCoord2fColor4ubVertex3fvSUN"); + glad_glTexCoord2fColor3fVertex3fSUN = (PFNGLTEXCOORD2FCOLOR3FVERTEX3FSUNPROC)load("glTexCoord2fColor3fVertex3fSUN"); + glad_glTexCoord2fColor3fVertex3fvSUN = (PFNGLTEXCOORD2FCOLOR3FVERTEX3FVSUNPROC)load("glTexCoord2fColor3fVertex3fvSUN"); + glad_glTexCoord2fNormal3fVertex3fSUN = (PFNGLTEXCOORD2FNORMAL3FVERTEX3FSUNPROC)load("glTexCoord2fNormal3fVertex3fSUN"); + glad_glTexCoord2fNormal3fVertex3fvSUN = (PFNGLTEXCOORD2FNORMAL3FVERTEX3FVSUNPROC)load("glTexCoord2fNormal3fVertex3fvSUN"); + glad_glTexCoord2fColor4fNormal3fVertex3fSUN = (PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC)load("glTexCoord2fColor4fNormal3fVertex3fSUN"); + glad_glTexCoord2fColor4fNormal3fVertex3fvSUN = (PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC)load("glTexCoord2fColor4fNormal3fVertex3fvSUN"); + glad_glTexCoord4fColor4fNormal3fVertex4fSUN = (PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FSUNPROC)load("glTexCoord4fColor4fNormal3fVertex4fSUN"); + glad_glTexCoord4fColor4fNormal3fVertex4fvSUN = (PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FVSUNPROC)load("glTexCoord4fColor4fNormal3fVertex4fvSUN"); + glad_glReplacementCodeuiVertex3fSUN = (PFNGLREPLACEMENTCODEUIVERTEX3FSUNPROC)load("glReplacementCodeuiVertex3fSUN"); + glad_glReplacementCodeuiVertex3fvSUN = (PFNGLREPLACEMENTCODEUIVERTEX3FVSUNPROC)load("glReplacementCodeuiVertex3fvSUN"); + glad_glReplacementCodeuiColor4ubVertex3fSUN = (PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FSUNPROC)load("glReplacementCodeuiColor4ubVertex3fSUN"); + glad_glReplacementCodeuiColor4ubVertex3fvSUN = (PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FVSUNPROC)load("glReplacementCodeuiColor4ubVertex3fvSUN"); + glad_glReplacementCodeuiColor3fVertex3fSUN = (PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FSUNPROC)load("glReplacementCodeuiColor3fVertex3fSUN"); + glad_glReplacementCodeuiColor3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FVSUNPROC)load("glReplacementCodeuiColor3fVertex3fvSUN"); + glad_glReplacementCodeuiNormal3fVertex3fSUN = (PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FSUNPROC)load("glReplacementCodeuiNormal3fVertex3fSUN"); + glad_glReplacementCodeuiNormal3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FVSUNPROC)load("glReplacementCodeuiNormal3fVertex3fvSUN"); + glad_glReplacementCodeuiColor4fNormal3fVertex3fSUN = (PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FSUNPROC)load("glReplacementCodeuiColor4fNormal3fVertex3fSUN"); + glad_glReplacementCodeuiColor4fNormal3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FVSUNPROC)load("glReplacementCodeuiColor4fNormal3fVertex3fvSUN"); + glad_glReplacementCodeuiTexCoord2fVertex3fSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FSUNPROC)load("glReplacementCodeuiTexCoord2fVertex3fSUN"); + glad_glReplacementCodeuiTexCoord2fVertex3fvSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FVSUNPROC)load("glReplacementCodeuiTexCoord2fVertex3fvSUN"); + glad_glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FSUNPROC)load("glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN"); + glad_glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FVSUNPROC)load("glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN"); + glad_glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC)load("glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN"); + glad_glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC)load("glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN"); +} +static int find_extensionsGL(void) { + if (!get_exts()) return 0; + GLAD_GL_3DFX_multisample = has_ext("GL_3DFX_multisample"); + GLAD_GL_3DFX_tbuffer = has_ext("GL_3DFX_tbuffer"); + GLAD_GL_3DFX_texture_compression_FXT1 = has_ext("GL_3DFX_texture_compression_FXT1"); + GLAD_GL_AMD_blend_minmax_factor = has_ext("GL_AMD_blend_minmax_factor"); + GLAD_GL_AMD_conservative_depth = has_ext("GL_AMD_conservative_depth"); + GLAD_GL_AMD_debug_output = has_ext("GL_AMD_debug_output"); + GLAD_GL_AMD_depth_clamp_separate = has_ext("GL_AMD_depth_clamp_separate"); + GLAD_GL_AMD_draw_buffers_blend = has_ext("GL_AMD_draw_buffers_blend"); + GLAD_GL_AMD_framebuffer_multisample_advanced = has_ext("GL_AMD_framebuffer_multisample_advanced"); + GLAD_GL_AMD_framebuffer_sample_positions = has_ext("GL_AMD_framebuffer_sample_positions"); + GLAD_GL_AMD_gcn_shader = has_ext("GL_AMD_gcn_shader"); + GLAD_GL_AMD_gpu_shader_half_float = has_ext("GL_AMD_gpu_shader_half_float"); + GLAD_GL_AMD_gpu_shader_int16 = has_ext("GL_AMD_gpu_shader_int16"); + GLAD_GL_AMD_gpu_shader_int64 = has_ext("GL_AMD_gpu_shader_int64"); + GLAD_GL_AMD_interleaved_elements = has_ext("GL_AMD_interleaved_elements"); + GLAD_GL_AMD_multi_draw_indirect = has_ext("GL_AMD_multi_draw_indirect"); + GLAD_GL_AMD_name_gen_delete = has_ext("GL_AMD_name_gen_delete"); + GLAD_GL_AMD_occlusion_query_event = has_ext("GL_AMD_occlusion_query_event"); + GLAD_GL_AMD_performance_monitor = has_ext("GL_AMD_performance_monitor"); + GLAD_GL_AMD_pinned_memory = has_ext("GL_AMD_pinned_memory"); + GLAD_GL_AMD_query_buffer_object = has_ext("GL_AMD_query_buffer_object"); + GLAD_GL_AMD_sample_positions = has_ext("GL_AMD_sample_positions"); + GLAD_GL_AMD_seamless_cubemap_per_texture = has_ext("GL_AMD_seamless_cubemap_per_texture"); + GLAD_GL_AMD_shader_atomic_counter_ops = has_ext("GL_AMD_shader_atomic_counter_ops"); + GLAD_GL_AMD_shader_ballot = has_ext("GL_AMD_shader_ballot"); + GLAD_GL_AMD_shader_explicit_vertex_parameter = has_ext("GL_AMD_shader_explicit_vertex_parameter"); + GLAD_GL_AMD_shader_gpu_shader_half_float_fetch = has_ext("GL_AMD_shader_gpu_shader_half_float_fetch"); + GLAD_GL_AMD_shader_image_load_store_lod = has_ext("GL_AMD_shader_image_load_store_lod"); + GLAD_GL_AMD_shader_stencil_export = has_ext("GL_AMD_shader_stencil_export"); + GLAD_GL_AMD_shader_trinary_minmax = has_ext("GL_AMD_shader_trinary_minmax"); + GLAD_GL_AMD_sparse_texture = has_ext("GL_AMD_sparse_texture"); + GLAD_GL_AMD_stencil_operation_extended = has_ext("GL_AMD_stencil_operation_extended"); + GLAD_GL_AMD_texture_gather_bias_lod = has_ext("GL_AMD_texture_gather_bias_lod"); + GLAD_GL_AMD_texture_texture4 = has_ext("GL_AMD_texture_texture4"); + GLAD_GL_AMD_transform_feedback3_lines_triangles = has_ext("GL_AMD_transform_feedback3_lines_triangles"); + GLAD_GL_AMD_transform_feedback4 = has_ext("GL_AMD_transform_feedback4"); + GLAD_GL_AMD_vertex_shader_layer = has_ext("GL_AMD_vertex_shader_layer"); + GLAD_GL_AMD_vertex_shader_tessellator = has_ext("GL_AMD_vertex_shader_tessellator"); + GLAD_GL_AMD_vertex_shader_viewport_index = has_ext("GL_AMD_vertex_shader_viewport_index"); + GLAD_GL_APPLE_aux_depth_stencil = has_ext("GL_APPLE_aux_depth_stencil"); + GLAD_GL_APPLE_client_storage = has_ext("GL_APPLE_client_storage"); + GLAD_GL_APPLE_element_array = has_ext("GL_APPLE_element_array"); + GLAD_GL_APPLE_fence = has_ext("GL_APPLE_fence"); + GLAD_GL_APPLE_float_pixels = has_ext("GL_APPLE_float_pixels"); + GLAD_GL_APPLE_flush_buffer_range = has_ext("GL_APPLE_flush_buffer_range"); + GLAD_GL_APPLE_object_purgeable = has_ext("GL_APPLE_object_purgeable"); + GLAD_GL_APPLE_rgb_422 = has_ext("GL_APPLE_rgb_422"); + GLAD_GL_APPLE_row_bytes = has_ext("GL_APPLE_row_bytes"); + GLAD_GL_APPLE_specular_vector = has_ext("GL_APPLE_specular_vector"); + GLAD_GL_APPLE_texture_range = has_ext("GL_APPLE_texture_range"); + GLAD_GL_APPLE_transform_hint = has_ext("GL_APPLE_transform_hint"); + GLAD_GL_APPLE_vertex_array_object = has_ext("GL_APPLE_vertex_array_object"); + GLAD_GL_APPLE_vertex_array_range = has_ext("GL_APPLE_vertex_array_range"); + GLAD_GL_APPLE_vertex_program_evaluators = has_ext("GL_APPLE_vertex_program_evaluators"); + GLAD_GL_APPLE_ycbcr_422 = has_ext("GL_APPLE_ycbcr_422"); + GLAD_GL_ARB_ES2_compatibility = has_ext("GL_ARB_ES2_compatibility"); + GLAD_GL_ARB_ES3_1_compatibility = has_ext("GL_ARB_ES3_1_compatibility"); + GLAD_GL_ARB_ES3_2_compatibility = has_ext("GL_ARB_ES3_2_compatibility"); + GLAD_GL_ARB_ES3_compatibility = has_ext("GL_ARB_ES3_compatibility"); + GLAD_GL_ARB_arrays_of_arrays = has_ext("GL_ARB_arrays_of_arrays"); + GLAD_GL_ARB_base_instance = has_ext("GL_ARB_base_instance"); + GLAD_GL_ARB_bindless_texture = has_ext("GL_ARB_bindless_texture"); + GLAD_GL_ARB_blend_func_extended = has_ext("GL_ARB_blend_func_extended"); + GLAD_GL_ARB_buffer_storage = has_ext("GL_ARB_buffer_storage"); + GLAD_GL_ARB_cl_event = has_ext("GL_ARB_cl_event"); + GLAD_GL_ARB_clear_buffer_object = has_ext("GL_ARB_clear_buffer_object"); + GLAD_GL_ARB_clear_texture = has_ext("GL_ARB_clear_texture"); + GLAD_GL_ARB_clip_control = has_ext("GL_ARB_clip_control"); + GLAD_GL_ARB_color_buffer_float = has_ext("GL_ARB_color_buffer_float"); + GLAD_GL_ARB_compatibility = has_ext("GL_ARB_compatibility"); + GLAD_GL_ARB_compressed_texture_pixel_storage = has_ext("GL_ARB_compressed_texture_pixel_storage"); + GLAD_GL_ARB_compute_shader = has_ext("GL_ARB_compute_shader"); + GLAD_GL_ARB_compute_variable_group_size = has_ext("GL_ARB_compute_variable_group_size"); + GLAD_GL_ARB_conditional_render_inverted = has_ext("GL_ARB_conditional_render_inverted"); + GLAD_GL_ARB_conservative_depth = has_ext("GL_ARB_conservative_depth"); + GLAD_GL_ARB_copy_buffer = has_ext("GL_ARB_copy_buffer"); + GLAD_GL_ARB_copy_image = has_ext("GL_ARB_copy_image"); + GLAD_GL_ARB_cull_distance = has_ext("GL_ARB_cull_distance"); + GLAD_GL_ARB_debug_output = has_ext("GL_ARB_debug_output"); + GLAD_GL_ARB_depth_buffer_float = has_ext("GL_ARB_depth_buffer_float"); + GLAD_GL_ARB_depth_clamp = has_ext("GL_ARB_depth_clamp"); + GLAD_GL_ARB_depth_texture = has_ext("GL_ARB_depth_texture"); + GLAD_GL_ARB_derivative_control = has_ext("GL_ARB_derivative_control"); + GLAD_GL_ARB_direct_state_access = has_ext("GL_ARB_direct_state_access"); + GLAD_GL_ARB_draw_buffers = has_ext("GL_ARB_draw_buffers"); + GLAD_GL_ARB_draw_buffers_blend = has_ext("GL_ARB_draw_buffers_blend"); + GLAD_GL_ARB_draw_elements_base_vertex = has_ext("GL_ARB_draw_elements_base_vertex"); + GLAD_GL_ARB_draw_indirect = has_ext("GL_ARB_draw_indirect"); + GLAD_GL_ARB_draw_instanced = has_ext("GL_ARB_draw_instanced"); + GLAD_GL_ARB_enhanced_layouts = has_ext("GL_ARB_enhanced_layouts"); + GLAD_GL_ARB_explicit_attrib_location = has_ext("GL_ARB_explicit_attrib_location"); + GLAD_GL_ARB_explicit_uniform_location = has_ext("GL_ARB_explicit_uniform_location"); + GLAD_GL_ARB_fragment_coord_conventions = has_ext("GL_ARB_fragment_coord_conventions"); + GLAD_GL_ARB_fragment_layer_viewport = has_ext("GL_ARB_fragment_layer_viewport"); + GLAD_GL_ARB_fragment_program = has_ext("GL_ARB_fragment_program"); + GLAD_GL_ARB_fragment_program_shadow = has_ext("GL_ARB_fragment_program_shadow"); + GLAD_GL_ARB_fragment_shader = has_ext("GL_ARB_fragment_shader"); + GLAD_GL_ARB_fragment_shader_interlock = has_ext("GL_ARB_fragment_shader_interlock"); + GLAD_GL_ARB_framebuffer_no_attachments = has_ext("GL_ARB_framebuffer_no_attachments"); + GLAD_GL_ARB_framebuffer_object = has_ext("GL_ARB_framebuffer_object"); + GLAD_GL_ARB_framebuffer_sRGB = has_ext("GL_ARB_framebuffer_sRGB"); + GLAD_GL_ARB_geometry_shader4 = has_ext("GL_ARB_geometry_shader4"); + GLAD_GL_ARB_get_program_binary = has_ext("GL_ARB_get_program_binary"); + GLAD_GL_ARB_get_texture_sub_image = has_ext("GL_ARB_get_texture_sub_image"); + GLAD_GL_ARB_gl_spirv = has_ext("GL_ARB_gl_spirv"); + GLAD_GL_ARB_gpu_shader5 = has_ext("GL_ARB_gpu_shader5"); + GLAD_GL_ARB_gpu_shader_fp64 = has_ext("GL_ARB_gpu_shader_fp64"); + GLAD_GL_ARB_gpu_shader_int64 = has_ext("GL_ARB_gpu_shader_int64"); + GLAD_GL_ARB_half_float_pixel = has_ext("GL_ARB_half_float_pixel"); + GLAD_GL_ARB_half_float_vertex = has_ext("GL_ARB_half_float_vertex"); + GLAD_GL_ARB_imaging = has_ext("GL_ARB_imaging"); + GLAD_GL_ARB_indirect_parameters = has_ext("GL_ARB_indirect_parameters"); + GLAD_GL_ARB_instanced_arrays = has_ext("GL_ARB_instanced_arrays"); + GLAD_GL_ARB_internalformat_query = has_ext("GL_ARB_internalformat_query"); + GLAD_GL_ARB_internalformat_query2 = has_ext("GL_ARB_internalformat_query2"); + GLAD_GL_ARB_invalidate_subdata = has_ext("GL_ARB_invalidate_subdata"); + GLAD_GL_ARB_map_buffer_alignment = has_ext("GL_ARB_map_buffer_alignment"); + GLAD_GL_ARB_map_buffer_range = has_ext("GL_ARB_map_buffer_range"); + GLAD_GL_ARB_matrix_palette = has_ext("GL_ARB_matrix_palette"); + GLAD_GL_ARB_multi_bind = has_ext("GL_ARB_multi_bind"); + GLAD_GL_ARB_multi_draw_indirect = has_ext("GL_ARB_multi_draw_indirect"); + GLAD_GL_ARB_multisample = has_ext("GL_ARB_multisample"); + GLAD_GL_ARB_multitexture = has_ext("GL_ARB_multitexture"); + GLAD_GL_ARB_occlusion_query = has_ext("GL_ARB_occlusion_query"); + GLAD_GL_ARB_occlusion_query2 = has_ext("GL_ARB_occlusion_query2"); + GLAD_GL_ARB_parallel_shader_compile = has_ext("GL_ARB_parallel_shader_compile"); + GLAD_GL_ARB_pipeline_statistics_query = has_ext("GL_ARB_pipeline_statistics_query"); + GLAD_GL_ARB_pixel_buffer_object = has_ext("GL_ARB_pixel_buffer_object"); + GLAD_GL_ARB_point_parameters = has_ext("GL_ARB_point_parameters"); + GLAD_GL_ARB_point_sprite = has_ext("GL_ARB_point_sprite"); + GLAD_GL_ARB_polygon_offset_clamp = has_ext("GL_ARB_polygon_offset_clamp"); + GLAD_GL_ARB_post_depth_coverage = has_ext("GL_ARB_post_depth_coverage"); + GLAD_GL_ARB_program_interface_query = has_ext("GL_ARB_program_interface_query"); + GLAD_GL_ARB_provoking_vertex = has_ext("GL_ARB_provoking_vertex"); + GLAD_GL_ARB_query_buffer_object = has_ext("GL_ARB_query_buffer_object"); + GLAD_GL_ARB_robust_buffer_access_behavior = has_ext("GL_ARB_robust_buffer_access_behavior"); + GLAD_GL_ARB_robustness = has_ext("GL_ARB_robustness"); + GLAD_GL_ARB_robustness_isolation = has_ext("GL_ARB_robustness_isolation"); + GLAD_GL_ARB_sample_locations = has_ext("GL_ARB_sample_locations"); + GLAD_GL_ARB_sample_shading = has_ext("GL_ARB_sample_shading"); + GLAD_GL_ARB_sampler_objects = has_ext("GL_ARB_sampler_objects"); + GLAD_GL_ARB_seamless_cube_map = has_ext("GL_ARB_seamless_cube_map"); + GLAD_GL_ARB_seamless_cubemap_per_texture = has_ext("GL_ARB_seamless_cubemap_per_texture"); + GLAD_GL_ARB_separate_shader_objects = has_ext("GL_ARB_separate_shader_objects"); + GLAD_GL_ARB_shader_atomic_counter_ops = has_ext("GL_ARB_shader_atomic_counter_ops"); + GLAD_GL_ARB_shader_atomic_counters = has_ext("GL_ARB_shader_atomic_counters"); + GLAD_GL_ARB_shader_ballot = has_ext("GL_ARB_shader_ballot"); + GLAD_GL_ARB_shader_bit_encoding = has_ext("GL_ARB_shader_bit_encoding"); + GLAD_GL_ARB_shader_clock = has_ext("GL_ARB_shader_clock"); + GLAD_GL_ARB_shader_draw_parameters = has_ext("GL_ARB_shader_draw_parameters"); + GLAD_GL_ARB_shader_group_vote = has_ext("GL_ARB_shader_group_vote"); + GLAD_GL_ARB_shader_image_load_store = has_ext("GL_ARB_shader_image_load_store"); + GLAD_GL_ARB_shader_image_size = has_ext("GL_ARB_shader_image_size"); + GLAD_GL_ARB_shader_objects = has_ext("GL_ARB_shader_objects"); + GLAD_GL_ARB_shader_precision = has_ext("GL_ARB_shader_precision"); + GLAD_GL_ARB_shader_stencil_export = has_ext("GL_ARB_shader_stencil_export"); + GLAD_GL_ARB_shader_storage_buffer_object = has_ext("GL_ARB_shader_storage_buffer_object"); + GLAD_GL_ARB_shader_subroutine = has_ext("GL_ARB_shader_subroutine"); + GLAD_GL_ARB_shader_texture_image_samples = has_ext("GL_ARB_shader_texture_image_samples"); + GLAD_GL_ARB_shader_texture_lod = has_ext("GL_ARB_shader_texture_lod"); + GLAD_GL_ARB_shader_viewport_layer_array = has_ext("GL_ARB_shader_viewport_layer_array"); + GLAD_GL_ARB_shading_language_100 = has_ext("GL_ARB_shading_language_100"); + GLAD_GL_ARB_shading_language_420pack = has_ext("GL_ARB_shading_language_420pack"); + GLAD_GL_ARB_shading_language_include = has_ext("GL_ARB_shading_language_include"); + GLAD_GL_ARB_shading_language_packing = has_ext("GL_ARB_shading_language_packing"); + GLAD_GL_ARB_shadow = has_ext("GL_ARB_shadow"); + GLAD_GL_ARB_shadow_ambient = has_ext("GL_ARB_shadow_ambient"); + GLAD_GL_ARB_sparse_buffer = has_ext("GL_ARB_sparse_buffer"); + GLAD_GL_ARB_sparse_texture = has_ext("GL_ARB_sparse_texture"); + GLAD_GL_ARB_sparse_texture2 = has_ext("GL_ARB_sparse_texture2"); + GLAD_GL_ARB_sparse_texture_clamp = has_ext("GL_ARB_sparse_texture_clamp"); + GLAD_GL_ARB_spirv_extensions = has_ext("GL_ARB_spirv_extensions"); + GLAD_GL_ARB_stencil_texturing = has_ext("GL_ARB_stencil_texturing"); + GLAD_GL_ARB_sync = has_ext("GL_ARB_sync"); + GLAD_GL_ARB_tessellation_shader = has_ext("GL_ARB_tessellation_shader"); + GLAD_GL_ARB_texture_barrier = has_ext("GL_ARB_texture_barrier"); + GLAD_GL_ARB_texture_border_clamp = has_ext("GL_ARB_texture_border_clamp"); + GLAD_GL_ARB_texture_buffer_object = has_ext("GL_ARB_texture_buffer_object"); + GLAD_GL_ARB_texture_buffer_object_rgb32 = has_ext("GL_ARB_texture_buffer_object_rgb32"); + GLAD_GL_ARB_texture_buffer_range = has_ext("GL_ARB_texture_buffer_range"); + GLAD_GL_ARB_texture_compression = has_ext("GL_ARB_texture_compression"); + GLAD_GL_ARB_texture_compression_bptc = has_ext("GL_ARB_texture_compression_bptc"); + GLAD_GL_ARB_texture_compression_rgtc = has_ext("GL_ARB_texture_compression_rgtc"); + GLAD_GL_ARB_texture_cube_map = has_ext("GL_ARB_texture_cube_map"); + GLAD_GL_ARB_texture_cube_map_array = has_ext("GL_ARB_texture_cube_map_array"); + GLAD_GL_ARB_texture_env_add = has_ext("GL_ARB_texture_env_add"); + GLAD_GL_ARB_texture_env_combine = has_ext("GL_ARB_texture_env_combine"); + GLAD_GL_ARB_texture_env_crossbar = has_ext("GL_ARB_texture_env_crossbar"); + GLAD_GL_ARB_texture_env_dot3 = has_ext("GL_ARB_texture_env_dot3"); + GLAD_GL_ARB_texture_filter_anisotropic = has_ext("GL_ARB_texture_filter_anisotropic"); + GLAD_GL_ARB_texture_filter_minmax = has_ext("GL_ARB_texture_filter_minmax"); + GLAD_GL_ARB_texture_float = has_ext("GL_ARB_texture_float"); + GLAD_GL_ARB_texture_gather = has_ext("GL_ARB_texture_gather"); + GLAD_GL_ARB_texture_mirror_clamp_to_edge = has_ext("GL_ARB_texture_mirror_clamp_to_edge"); + GLAD_GL_ARB_texture_mirrored_repeat = has_ext("GL_ARB_texture_mirrored_repeat"); + GLAD_GL_ARB_texture_multisample = has_ext("GL_ARB_texture_multisample"); + GLAD_GL_ARB_texture_non_power_of_two = has_ext("GL_ARB_texture_non_power_of_two"); + GLAD_GL_ARB_texture_query_levels = has_ext("GL_ARB_texture_query_levels"); + GLAD_GL_ARB_texture_query_lod = has_ext("GL_ARB_texture_query_lod"); + GLAD_GL_ARB_texture_rectangle = has_ext("GL_ARB_texture_rectangle"); + GLAD_GL_ARB_texture_rg = has_ext("GL_ARB_texture_rg"); + GLAD_GL_ARB_texture_rgb10_a2ui = has_ext("GL_ARB_texture_rgb10_a2ui"); + GLAD_GL_ARB_texture_stencil8 = has_ext("GL_ARB_texture_stencil8"); + GLAD_GL_ARB_texture_storage = has_ext("GL_ARB_texture_storage"); + GLAD_GL_ARB_texture_storage_multisample = has_ext("GL_ARB_texture_storage_multisample"); + GLAD_GL_ARB_texture_swizzle = has_ext("GL_ARB_texture_swizzle"); + GLAD_GL_ARB_texture_view = has_ext("GL_ARB_texture_view"); + GLAD_GL_ARB_timer_query = has_ext("GL_ARB_timer_query"); + GLAD_GL_ARB_transform_feedback2 = has_ext("GL_ARB_transform_feedback2"); + GLAD_GL_ARB_transform_feedback3 = has_ext("GL_ARB_transform_feedback3"); + GLAD_GL_ARB_transform_feedback_instanced = has_ext("GL_ARB_transform_feedback_instanced"); + GLAD_GL_ARB_transform_feedback_overflow_query = has_ext("GL_ARB_transform_feedback_overflow_query"); + GLAD_GL_ARB_transpose_matrix = has_ext("GL_ARB_transpose_matrix"); + GLAD_GL_ARB_uniform_buffer_object = has_ext("GL_ARB_uniform_buffer_object"); + GLAD_GL_ARB_vertex_array_bgra = has_ext("GL_ARB_vertex_array_bgra"); + GLAD_GL_ARB_vertex_array_object = has_ext("GL_ARB_vertex_array_object"); + GLAD_GL_ARB_vertex_attrib_64bit = has_ext("GL_ARB_vertex_attrib_64bit"); + GLAD_GL_ARB_vertex_attrib_binding = has_ext("GL_ARB_vertex_attrib_binding"); + GLAD_GL_ARB_vertex_blend = has_ext("GL_ARB_vertex_blend"); + GLAD_GL_ARB_vertex_buffer_object = has_ext("GL_ARB_vertex_buffer_object"); + GLAD_GL_ARB_vertex_program = has_ext("GL_ARB_vertex_program"); + GLAD_GL_ARB_vertex_shader = has_ext("GL_ARB_vertex_shader"); + GLAD_GL_ARB_vertex_type_10f_11f_11f_rev = has_ext("GL_ARB_vertex_type_10f_11f_11f_rev"); + GLAD_GL_ARB_vertex_type_2_10_10_10_rev = has_ext("GL_ARB_vertex_type_2_10_10_10_rev"); + GLAD_GL_ARB_viewport_array = has_ext("GL_ARB_viewport_array"); + GLAD_GL_ARB_window_pos = has_ext("GL_ARB_window_pos"); + GLAD_GL_ATI_draw_buffers = has_ext("GL_ATI_draw_buffers"); + GLAD_GL_ATI_element_array = has_ext("GL_ATI_element_array"); + GLAD_GL_ATI_envmap_bumpmap = has_ext("GL_ATI_envmap_bumpmap"); + GLAD_GL_ATI_fragment_shader = has_ext("GL_ATI_fragment_shader"); + GLAD_GL_ATI_map_object_buffer = has_ext("GL_ATI_map_object_buffer"); + GLAD_GL_ATI_meminfo = has_ext("GL_ATI_meminfo"); + GLAD_GL_ATI_pixel_format_float = has_ext("GL_ATI_pixel_format_float"); + GLAD_GL_ATI_pn_triangles = has_ext("GL_ATI_pn_triangles"); + GLAD_GL_ATI_separate_stencil = has_ext("GL_ATI_separate_stencil"); + GLAD_GL_ATI_text_fragment_shader = has_ext("GL_ATI_text_fragment_shader"); + GLAD_GL_ATI_texture_env_combine3 = has_ext("GL_ATI_texture_env_combine3"); + GLAD_GL_ATI_texture_float = has_ext("GL_ATI_texture_float"); + GLAD_GL_ATI_texture_mirror_once = has_ext("GL_ATI_texture_mirror_once"); + GLAD_GL_ATI_vertex_array_object = has_ext("GL_ATI_vertex_array_object"); + GLAD_GL_ATI_vertex_attrib_array_object = has_ext("GL_ATI_vertex_attrib_array_object"); + GLAD_GL_ATI_vertex_streams = has_ext("GL_ATI_vertex_streams"); + GLAD_GL_EXT_422_pixels = has_ext("GL_EXT_422_pixels"); + GLAD_GL_EXT_EGL_image_storage = has_ext("GL_EXT_EGL_image_storage"); + GLAD_GL_EXT_EGL_sync = has_ext("GL_EXT_EGL_sync"); + GLAD_GL_EXT_abgr = has_ext("GL_EXT_abgr"); + GLAD_GL_EXT_bgra = has_ext("GL_EXT_bgra"); + GLAD_GL_EXT_bindable_uniform = has_ext("GL_EXT_bindable_uniform"); + GLAD_GL_EXT_blend_color = has_ext("GL_EXT_blend_color"); + GLAD_GL_EXT_blend_equation_separate = has_ext("GL_EXT_blend_equation_separate"); + GLAD_GL_EXT_blend_func_separate = has_ext("GL_EXT_blend_func_separate"); + GLAD_GL_EXT_blend_logic_op = has_ext("GL_EXT_blend_logic_op"); + GLAD_GL_EXT_blend_minmax = has_ext("GL_EXT_blend_minmax"); + GLAD_GL_EXT_blend_subtract = has_ext("GL_EXT_blend_subtract"); + GLAD_GL_EXT_clip_volume_hint = has_ext("GL_EXT_clip_volume_hint"); + GLAD_GL_EXT_cmyka = has_ext("GL_EXT_cmyka"); + GLAD_GL_EXT_color_subtable = has_ext("GL_EXT_color_subtable"); + GLAD_GL_EXT_compiled_vertex_array = has_ext("GL_EXT_compiled_vertex_array"); + GLAD_GL_EXT_convolution = has_ext("GL_EXT_convolution"); + GLAD_GL_EXT_coordinate_frame = has_ext("GL_EXT_coordinate_frame"); + GLAD_GL_EXT_copy_texture = has_ext("GL_EXT_copy_texture"); + GLAD_GL_EXT_cull_vertex = has_ext("GL_EXT_cull_vertex"); + GLAD_GL_EXT_debug_label = has_ext("GL_EXT_debug_label"); + GLAD_GL_EXT_debug_marker = has_ext("GL_EXT_debug_marker"); + GLAD_GL_EXT_depth_bounds_test = has_ext("GL_EXT_depth_bounds_test"); + GLAD_GL_EXT_direct_state_access = has_ext("GL_EXT_direct_state_access"); + GLAD_GL_EXT_draw_buffers2 = has_ext("GL_EXT_draw_buffers2"); + GLAD_GL_EXT_draw_instanced = has_ext("GL_EXT_draw_instanced"); + GLAD_GL_EXT_draw_range_elements = has_ext("GL_EXT_draw_range_elements"); + GLAD_GL_EXT_external_buffer = has_ext("GL_EXT_external_buffer"); + GLAD_GL_EXT_fog_coord = has_ext("GL_EXT_fog_coord"); + GLAD_GL_EXT_fragment_shading_rate = has_ext("GL_EXT_fragment_shading_rate"); + GLAD_GL_EXT_framebuffer_blit = has_ext("GL_EXT_framebuffer_blit"); + GLAD_GL_EXT_framebuffer_blit_layers = has_ext("GL_EXT_framebuffer_blit_layers"); + GLAD_GL_EXT_framebuffer_multisample = has_ext("GL_EXT_framebuffer_multisample"); + GLAD_GL_EXT_framebuffer_multisample_blit_scaled = has_ext("GL_EXT_framebuffer_multisample_blit_scaled"); + GLAD_GL_EXT_framebuffer_object = has_ext("GL_EXT_framebuffer_object"); + GLAD_GL_EXT_framebuffer_sRGB = has_ext("GL_EXT_framebuffer_sRGB"); + GLAD_GL_EXT_geometry_shader4 = has_ext("GL_EXT_geometry_shader4"); + GLAD_GL_EXT_gpu_program_parameters = has_ext("GL_EXT_gpu_program_parameters"); + GLAD_GL_EXT_gpu_shader4 = has_ext("GL_EXT_gpu_shader4"); + GLAD_GL_EXT_histogram = has_ext("GL_EXT_histogram"); + GLAD_GL_EXT_index_array_formats = has_ext("GL_EXT_index_array_formats"); + GLAD_GL_EXT_index_func = has_ext("GL_EXT_index_func"); + GLAD_GL_EXT_index_material = has_ext("GL_EXT_index_material"); + GLAD_GL_EXT_index_texture = has_ext("GL_EXT_index_texture"); + GLAD_GL_EXT_light_texture = has_ext("GL_EXT_light_texture"); + GLAD_GL_EXT_memory_object = has_ext("GL_EXT_memory_object"); + GLAD_GL_EXT_memory_object_fd = has_ext("GL_EXT_memory_object_fd"); + GLAD_GL_EXT_memory_object_win32 = has_ext("GL_EXT_memory_object_win32"); + GLAD_GL_EXT_mesh_shader = has_ext("GL_EXT_mesh_shader"); + GLAD_GL_EXT_misc_attribute = has_ext("GL_EXT_misc_attribute"); + GLAD_GL_EXT_multi_draw_arrays = has_ext("GL_EXT_multi_draw_arrays"); + GLAD_GL_EXT_multisample = has_ext("GL_EXT_multisample"); + GLAD_GL_EXT_multiview_tessellation_geometry_shader = has_ext("GL_EXT_multiview_tessellation_geometry_shader"); + GLAD_GL_EXT_multiview_texture_multisample = has_ext("GL_EXT_multiview_texture_multisample"); + GLAD_GL_EXT_multiview_timer_query = has_ext("GL_EXT_multiview_timer_query"); + GLAD_GL_EXT_packed_depth_stencil = has_ext("GL_EXT_packed_depth_stencil"); + GLAD_GL_EXT_packed_float = has_ext("GL_EXT_packed_float"); + GLAD_GL_EXT_packed_pixels = has_ext("GL_EXT_packed_pixels"); + GLAD_GL_EXT_paletted_texture = has_ext("GL_EXT_paletted_texture"); + GLAD_GL_EXT_pixel_buffer_object = has_ext("GL_EXT_pixel_buffer_object"); + GLAD_GL_EXT_pixel_transform = has_ext("GL_EXT_pixel_transform"); + GLAD_GL_EXT_pixel_transform_color_table = has_ext("GL_EXT_pixel_transform_color_table"); + GLAD_GL_EXT_point_parameters = has_ext("GL_EXT_point_parameters"); + GLAD_GL_EXT_polygon_offset = has_ext("GL_EXT_polygon_offset"); + GLAD_GL_EXT_polygon_offset_clamp = has_ext("GL_EXT_polygon_offset_clamp"); + GLAD_GL_EXT_post_depth_coverage = has_ext("GL_EXT_post_depth_coverage"); + GLAD_GL_EXT_provoking_vertex = has_ext("GL_EXT_provoking_vertex"); + GLAD_GL_EXT_raster_multisample = has_ext("GL_EXT_raster_multisample"); + GLAD_GL_EXT_rescale_normal = has_ext("GL_EXT_rescale_normal"); + GLAD_GL_EXT_secondary_color = has_ext("GL_EXT_secondary_color"); + GLAD_GL_EXT_semaphore = has_ext("GL_EXT_semaphore"); + GLAD_GL_EXT_semaphore_fd = has_ext("GL_EXT_semaphore_fd"); + GLAD_GL_EXT_semaphore_win32 = has_ext("GL_EXT_semaphore_win32"); + GLAD_GL_EXT_separate_shader_objects = has_ext("GL_EXT_separate_shader_objects"); + GLAD_GL_EXT_separate_specular_color = has_ext("GL_EXT_separate_specular_color"); + GLAD_GL_EXT_shader_framebuffer_fetch = has_ext("GL_EXT_shader_framebuffer_fetch"); + GLAD_GL_EXT_shader_framebuffer_fetch_non_coherent = has_ext("GL_EXT_shader_framebuffer_fetch_non_coherent"); + GLAD_GL_EXT_shader_image_load_formatted = has_ext("GL_EXT_shader_image_load_formatted"); + GLAD_GL_EXT_shader_image_load_store = has_ext("GL_EXT_shader_image_load_store"); + GLAD_GL_EXT_shader_integer_mix = has_ext("GL_EXT_shader_integer_mix"); + GLAD_GL_EXT_shader_samples_identical = has_ext("GL_EXT_shader_samples_identical"); + GLAD_GL_EXT_shadow_funcs = has_ext("GL_EXT_shadow_funcs"); + GLAD_GL_EXT_shared_texture_palette = has_ext("GL_EXT_shared_texture_palette"); + GLAD_GL_EXT_sparse_texture2 = has_ext("GL_EXT_sparse_texture2"); + GLAD_GL_EXT_stencil_clear_tag = has_ext("GL_EXT_stencil_clear_tag"); + GLAD_GL_EXT_stencil_two_side = has_ext("GL_EXT_stencil_two_side"); + GLAD_GL_EXT_stencil_wrap = has_ext("GL_EXT_stencil_wrap"); + GLAD_GL_EXT_subtexture = has_ext("GL_EXT_subtexture"); + GLAD_GL_EXT_texture = has_ext("GL_EXT_texture"); + GLAD_GL_EXT_texture3D = has_ext("GL_EXT_texture3D"); + GLAD_GL_EXT_texture_array = has_ext("GL_EXT_texture_array"); + GLAD_GL_EXT_texture_buffer_object = has_ext("GL_EXT_texture_buffer_object"); + GLAD_GL_EXT_texture_compression_latc = has_ext("GL_EXT_texture_compression_latc"); + GLAD_GL_EXT_texture_compression_rgtc = has_ext("GL_EXT_texture_compression_rgtc"); + GLAD_GL_EXT_texture_compression_s3tc = has_ext("GL_EXT_texture_compression_s3tc"); + GLAD_GL_EXT_texture_cube_map = has_ext("GL_EXT_texture_cube_map"); + GLAD_GL_EXT_texture_env_add = has_ext("GL_EXT_texture_env_add"); + GLAD_GL_EXT_texture_env_combine = has_ext("GL_EXT_texture_env_combine"); + GLAD_GL_EXT_texture_env_dot3 = has_ext("GL_EXT_texture_env_dot3"); + GLAD_GL_EXT_texture_filter_anisotropic = has_ext("GL_EXT_texture_filter_anisotropic"); + GLAD_GL_EXT_texture_filter_minmax = has_ext("GL_EXT_texture_filter_minmax"); + GLAD_GL_EXT_texture_integer = has_ext("GL_EXT_texture_integer"); + GLAD_GL_EXT_texture_lod_bias = has_ext("GL_EXT_texture_lod_bias"); + GLAD_GL_EXT_texture_mirror_clamp = has_ext("GL_EXT_texture_mirror_clamp"); + GLAD_GL_EXT_texture_object = has_ext("GL_EXT_texture_object"); + GLAD_GL_EXT_texture_perturb_normal = has_ext("GL_EXT_texture_perturb_normal"); + GLAD_GL_EXT_texture_sRGB = has_ext("GL_EXT_texture_sRGB"); + GLAD_GL_EXT_texture_sRGB_R8 = has_ext("GL_EXT_texture_sRGB_R8"); + GLAD_GL_EXT_texture_sRGB_RG8 = has_ext("GL_EXT_texture_sRGB_RG8"); + GLAD_GL_EXT_texture_sRGB_decode = has_ext("GL_EXT_texture_sRGB_decode"); + GLAD_GL_EXT_texture_shadow_lod = has_ext("GL_EXT_texture_shadow_lod"); + GLAD_GL_EXT_texture_shared_exponent = has_ext("GL_EXT_texture_shared_exponent"); + GLAD_GL_EXT_texture_snorm = has_ext("GL_EXT_texture_snorm"); + GLAD_GL_EXT_texture_storage = has_ext("GL_EXT_texture_storage"); + GLAD_GL_EXT_texture_swizzle = has_ext("GL_EXT_texture_swizzle"); + GLAD_GL_EXT_timer_query = has_ext("GL_EXT_timer_query"); + GLAD_GL_EXT_transform_feedback = has_ext("GL_EXT_transform_feedback"); + GLAD_GL_EXT_vertex_array = has_ext("GL_EXT_vertex_array"); + GLAD_GL_EXT_vertex_array_bgra = has_ext("GL_EXT_vertex_array_bgra"); + GLAD_GL_EXT_vertex_attrib_64bit = has_ext("GL_EXT_vertex_attrib_64bit"); + GLAD_GL_EXT_vertex_shader = has_ext("GL_EXT_vertex_shader"); + GLAD_GL_EXT_vertex_weighting = has_ext("GL_EXT_vertex_weighting"); + GLAD_GL_EXT_win32_keyed_mutex = has_ext("GL_EXT_win32_keyed_mutex"); + GLAD_GL_EXT_window_rectangles = has_ext("GL_EXT_window_rectangles"); + GLAD_GL_EXT_x11_sync_object = has_ext("GL_EXT_x11_sync_object"); + GLAD_GL_GREMEDY_frame_terminator = has_ext("GL_GREMEDY_frame_terminator"); + GLAD_GL_GREMEDY_string_marker = has_ext("GL_GREMEDY_string_marker"); + GLAD_GL_HP_convolution_border_modes = has_ext("GL_HP_convolution_border_modes"); + GLAD_GL_HP_image_transform = has_ext("GL_HP_image_transform"); + GLAD_GL_HP_occlusion_test = has_ext("GL_HP_occlusion_test"); + GLAD_GL_HP_texture_lighting = has_ext("GL_HP_texture_lighting"); + GLAD_GL_IBM_cull_vertex = has_ext("GL_IBM_cull_vertex"); + GLAD_GL_IBM_multimode_draw_arrays = has_ext("GL_IBM_multimode_draw_arrays"); + GLAD_GL_IBM_rasterpos_clip = has_ext("GL_IBM_rasterpos_clip"); + GLAD_GL_IBM_static_data = has_ext("GL_IBM_static_data"); + GLAD_GL_IBM_texture_mirrored_repeat = has_ext("GL_IBM_texture_mirrored_repeat"); + GLAD_GL_IBM_vertex_array_lists = has_ext("GL_IBM_vertex_array_lists"); + GLAD_GL_INGR_blend_func_separate = has_ext("GL_INGR_blend_func_separate"); + GLAD_GL_INGR_color_clamp = has_ext("GL_INGR_color_clamp"); + GLAD_GL_INGR_interlace_read = has_ext("GL_INGR_interlace_read"); + GLAD_GL_INTEL_blackhole_render = has_ext("GL_INTEL_blackhole_render"); + GLAD_GL_INTEL_conservative_rasterization = has_ext("GL_INTEL_conservative_rasterization"); + GLAD_GL_INTEL_fragment_shader_ordering = has_ext("GL_INTEL_fragment_shader_ordering"); + GLAD_GL_INTEL_framebuffer_CMAA = has_ext("GL_INTEL_framebuffer_CMAA"); + GLAD_GL_INTEL_map_texture = has_ext("GL_INTEL_map_texture"); + GLAD_GL_INTEL_parallel_arrays = has_ext("GL_INTEL_parallel_arrays"); + GLAD_GL_INTEL_performance_query = has_ext("GL_INTEL_performance_query"); + GLAD_GL_KHR_blend_equation_advanced = has_ext("GL_KHR_blend_equation_advanced"); + GLAD_GL_KHR_blend_equation_advanced_coherent = has_ext("GL_KHR_blend_equation_advanced_coherent"); + GLAD_GL_KHR_context_flush_control = has_ext("GL_KHR_context_flush_control"); + GLAD_GL_KHR_debug = has_ext("GL_KHR_debug"); + GLAD_GL_KHR_no_error = has_ext("GL_KHR_no_error"); + GLAD_GL_KHR_parallel_shader_compile = has_ext("GL_KHR_parallel_shader_compile"); + GLAD_GL_KHR_robust_buffer_access_behavior = has_ext("GL_KHR_robust_buffer_access_behavior"); + GLAD_GL_KHR_robustness = has_ext("GL_KHR_robustness"); + GLAD_GL_KHR_shader_subgroup = has_ext("GL_KHR_shader_subgroup"); + GLAD_GL_KHR_texture_compression_astc_hdr = has_ext("GL_KHR_texture_compression_astc_hdr"); + GLAD_GL_KHR_texture_compression_astc_ldr = has_ext("GL_KHR_texture_compression_astc_ldr"); + GLAD_GL_KHR_texture_compression_astc_sliced_3d = has_ext("GL_KHR_texture_compression_astc_sliced_3d"); + GLAD_GL_MESAX_texture_stack = has_ext("GL_MESAX_texture_stack"); + GLAD_GL_MESA_framebuffer_flip_x = has_ext("GL_MESA_framebuffer_flip_x"); + GLAD_GL_MESA_framebuffer_flip_y = has_ext("GL_MESA_framebuffer_flip_y"); + GLAD_GL_MESA_framebuffer_swap_xy = has_ext("GL_MESA_framebuffer_swap_xy"); + GLAD_GL_MESA_pack_invert = has_ext("GL_MESA_pack_invert"); + GLAD_GL_MESA_program_binary_formats = has_ext("GL_MESA_program_binary_formats"); + GLAD_GL_MESA_resize_buffers = has_ext("GL_MESA_resize_buffers"); + GLAD_GL_MESA_shader_integer_functions = has_ext("GL_MESA_shader_integer_functions"); + GLAD_GL_MESA_texture_const_bandwidth = has_ext("GL_MESA_texture_const_bandwidth"); + GLAD_GL_MESA_tile_raster_order = has_ext("GL_MESA_tile_raster_order"); + GLAD_GL_MESA_window_pos = has_ext("GL_MESA_window_pos"); + GLAD_GL_MESA_ycbcr_texture = has_ext("GL_MESA_ycbcr_texture"); + GLAD_GL_NVX_blend_equation_advanced_multi_draw_buffers = has_ext("GL_NVX_blend_equation_advanced_multi_draw_buffers"); + GLAD_GL_NVX_conditional_render = has_ext("GL_NVX_conditional_render"); + GLAD_GL_NVX_gpu_memory_info = has_ext("GL_NVX_gpu_memory_info"); + GLAD_GL_NVX_gpu_multicast2 = has_ext("GL_NVX_gpu_multicast2"); + GLAD_GL_NVX_linked_gpu_multicast = has_ext("GL_NVX_linked_gpu_multicast"); + GLAD_GL_NVX_progress_fence = has_ext("GL_NVX_progress_fence"); + GLAD_GL_NV_alpha_to_coverage_dither_control = has_ext("GL_NV_alpha_to_coverage_dither_control"); + GLAD_GL_NV_bindless_multi_draw_indirect = has_ext("GL_NV_bindless_multi_draw_indirect"); + GLAD_GL_NV_bindless_multi_draw_indirect_count = has_ext("GL_NV_bindless_multi_draw_indirect_count"); + GLAD_GL_NV_bindless_texture = has_ext("GL_NV_bindless_texture"); + GLAD_GL_NV_blend_equation_advanced = has_ext("GL_NV_blend_equation_advanced"); + GLAD_GL_NV_blend_equation_advanced_coherent = has_ext("GL_NV_blend_equation_advanced_coherent"); + GLAD_GL_NV_blend_minmax_factor = has_ext("GL_NV_blend_minmax_factor"); + GLAD_GL_NV_blend_square = has_ext("GL_NV_blend_square"); + GLAD_GL_NV_clip_space_w_scaling = has_ext("GL_NV_clip_space_w_scaling"); + GLAD_GL_NV_command_list = has_ext("GL_NV_command_list"); + GLAD_GL_NV_compute_program5 = has_ext("GL_NV_compute_program5"); + GLAD_GL_NV_compute_shader_derivatives = has_ext("GL_NV_compute_shader_derivatives"); + GLAD_GL_NV_conditional_render = has_ext("GL_NV_conditional_render"); + GLAD_GL_NV_conservative_raster = has_ext("GL_NV_conservative_raster"); + GLAD_GL_NV_conservative_raster_dilate = has_ext("GL_NV_conservative_raster_dilate"); + GLAD_GL_NV_conservative_raster_pre_snap = has_ext("GL_NV_conservative_raster_pre_snap"); + GLAD_GL_NV_conservative_raster_pre_snap_triangles = has_ext("GL_NV_conservative_raster_pre_snap_triangles"); + GLAD_GL_NV_conservative_raster_underestimation = has_ext("GL_NV_conservative_raster_underestimation"); + GLAD_GL_NV_copy_depth_to_color = has_ext("GL_NV_copy_depth_to_color"); + GLAD_GL_NV_copy_image = has_ext("GL_NV_copy_image"); + GLAD_GL_NV_deep_texture3D = has_ext("GL_NV_deep_texture3D"); + GLAD_GL_NV_depth_buffer_float = has_ext("GL_NV_depth_buffer_float"); + GLAD_GL_NV_depth_clamp = has_ext("GL_NV_depth_clamp"); + GLAD_GL_NV_draw_texture = has_ext("GL_NV_draw_texture"); + GLAD_GL_NV_draw_vulkan_image = has_ext("GL_NV_draw_vulkan_image"); + GLAD_GL_NV_evaluators = has_ext("GL_NV_evaluators"); + GLAD_GL_NV_explicit_multisample = has_ext("GL_NV_explicit_multisample"); + GLAD_GL_NV_fence = has_ext("GL_NV_fence"); + GLAD_GL_NV_fill_rectangle = has_ext("GL_NV_fill_rectangle"); + GLAD_GL_NV_float_buffer = has_ext("GL_NV_float_buffer"); + GLAD_GL_NV_fog_distance = has_ext("GL_NV_fog_distance"); + GLAD_GL_NV_fragment_coverage_to_color = has_ext("GL_NV_fragment_coverage_to_color"); + GLAD_GL_NV_fragment_program = has_ext("GL_NV_fragment_program"); + GLAD_GL_NV_fragment_program2 = has_ext("GL_NV_fragment_program2"); + GLAD_GL_NV_fragment_program4 = has_ext("GL_NV_fragment_program4"); + GLAD_GL_NV_fragment_program_option = has_ext("GL_NV_fragment_program_option"); + GLAD_GL_NV_fragment_shader_barycentric = has_ext("GL_NV_fragment_shader_barycentric"); + GLAD_GL_NV_fragment_shader_interlock = has_ext("GL_NV_fragment_shader_interlock"); + GLAD_GL_NV_framebuffer_mixed_samples = has_ext("GL_NV_framebuffer_mixed_samples"); + GLAD_GL_NV_framebuffer_multisample_coverage = has_ext("GL_NV_framebuffer_multisample_coverage"); + GLAD_GL_NV_geometry_program4 = has_ext("GL_NV_geometry_program4"); + GLAD_GL_NV_geometry_shader4 = has_ext("GL_NV_geometry_shader4"); + GLAD_GL_NV_geometry_shader_passthrough = has_ext("GL_NV_geometry_shader_passthrough"); + GLAD_GL_NV_gpu_multicast = has_ext("GL_NV_gpu_multicast"); + GLAD_GL_NV_gpu_program4 = has_ext("GL_NV_gpu_program4"); + GLAD_GL_NV_gpu_program5 = has_ext("GL_NV_gpu_program5"); + GLAD_GL_NV_gpu_program5_mem_extended = has_ext("GL_NV_gpu_program5_mem_extended"); + GLAD_GL_NV_gpu_shader5 = has_ext("GL_NV_gpu_shader5"); + GLAD_GL_NV_half_float = has_ext("GL_NV_half_float"); + GLAD_GL_NV_internalformat_sample_query = has_ext("GL_NV_internalformat_sample_query"); + GLAD_GL_NV_light_max_exponent = has_ext("GL_NV_light_max_exponent"); + GLAD_GL_NV_memory_attachment = has_ext("GL_NV_memory_attachment"); + GLAD_GL_NV_memory_object_sparse = has_ext("GL_NV_memory_object_sparse"); + GLAD_GL_NV_mesh_shader = has_ext("GL_NV_mesh_shader"); + GLAD_GL_NV_multisample_coverage = has_ext("GL_NV_multisample_coverage"); + GLAD_GL_NV_multisample_filter_hint = has_ext("GL_NV_multisample_filter_hint"); + GLAD_GL_NV_occlusion_query = has_ext("GL_NV_occlusion_query"); + GLAD_GL_NV_packed_depth_stencil = has_ext("GL_NV_packed_depth_stencil"); + GLAD_GL_NV_parameter_buffer_object = has_ext("GL_NV_parameter_buffer_object"); + GLAD_GL_NV_parameter_buffer_object2 = has_ext("GL_NV_parameter_buffer_object2"); + GLAD_GL_NV_path_rendering = has_ext("GL_NV_path_rendering"); + GLAD_GL_NV_path_rendering_shared_edge = has_ext("GL_NV_path_rendering_shared_edge"); + GLAD_GL_NV_pixel_data_range = has_ext("GL_NV_pixel_data_range"); + GLAD_GL_NV_point_sprite = has_ext("GL_NV_point_sprite"); + GLAD_GL_NV_present_video = has_ext("GL_NV_present_video"); + GLAD_GL_NV_primitive_restart = has_ext("GL_NV_primitive_restart"); + GLAD_GL_NV_primitive_shading_rate = has_ext("GL_NV_primitive_shading_rate"); + GLAD_GL_NV_query_resource = has_ext("GL_NV_query_resource"); + GLAD_GL_NV_query_resource_tag = has_ext("GL_NV_query_resource_tag"); + GLAD_GL_NV_register_combiners = has_ext("GL_NV_register_combiners"); + GLAD_GL_NV_register_combiners2 = has_ext("GL_NV_register_combiners2"); + GLAD_GL_NV_representative_fragment_test = has_ext("GL_NV_representative_fragment_test"); + GLAD_GL_NV_robustness_video_memory_purge = has_ext("GL_NV_robustness_video_memory_purge"); + GLAD_GL_NV_sample_locations = has_ext("GL_NV_sample_locations"); + GLAD_GL_NV_sample_mask_override_coverage = has_ext("GL_NV_sample_mask_override_coverage"); + GLAD_GL_NV_scissor_exclusive = has_ext("GL_NV_scissor_exclusive"); + GLAD_GL_NV_shader_atomic_counters = has_ext("GL_NV_shader_atomic_counters"); + GLAD_GL_NV_shader_atomic_float = has_ext("GL_NV_shader_atomic_float"); + GLAD_GL_NV_shader_atomic_float64 = has_ext("GL_NV_shader_atomic_float64"); + GLAD_GL_NV_shader_atomic_fp16_vector = has_ext("GL_NV_shader_atomic_fp16_vector"); + GLAD_GL_NV_shader_atomic_int64 = has_ext("GL_NV_shader_atomic_int64"); + GLAD_GL_NV_shader_buffer_load = has_ext("GL_NV_shader_buffer_load"); + GLAD_GL_NV_shader_buffer_store = has_ext("GL_NV_shader_buffer_store"); + GLAD_GL_NV_shader_storage_buffer_object = has_ext("GL_NV_shader_storage_buffer_object"); + GLAD_GL_NV_shader_subgroup_partitioned = has_ext("GL_NV_shader_subgroup_partitioned"); + GLAD_GL_NV_shader_texture_footprint = has_ext("GL_NV_shader_texture_footprint"); + GLAD_GL_NV_shader_thread_group = has_ext("GL_NV_shader_thread_group"); + GLAD_GL_NV_shader_thread_shuffle = has_ext("GL_NV_shader_thread_shuffle"); + GLAD_GL_NV_shading_rate_image = has_ext("GL_NV_shading_rate_image"); + GLAD_GL_NV_stereo_view_rendering = has_ext("GL_NV_stereo_view_rendering"); + GLAD_GL_NV_tessellation_program5 = has_ext("GL_NV_tessellation_program5"); + GLAD_GL_NV_texgen_emboss = has_ext("GL_NV_texgen_emboss"); + GLAD_GL_NV_texgen_reflection = has_ext("GL_NV_texgen_reflection"); + GLAD_GL_NV_texture_barrier = has_ext("GL_NV_texture_barrier"); + GLAD_GL_NV_texture_compression_vtc = has_ext("GL_NV_texture_compression_vtc"); + GLAD_GL_NV_texture_env_combine4 = has_ext("GL_NV_texture_env_combine4"); + GLAD_GL_NV_texture_expand_normal = has_ext("GL_NV_texture_expand_normal"); + GLAD_GL_NV_texture_multisample = has_ext("GL_NV_texture_multisample"); + GLAD_GL_NV_texture_rectangle = has_ext("GL_NV_texture_rectangle"); + GLAD_GL_NV_texture_rectangle_compressed = has_ext("GL_NV_texture_rectangle_compressed"); + GLAD_GL_NV_texture_shader = has_ext("GL_NV_texture_shader"); + GLAD_GL_NV_texture_shader2 = has_ext("GL_NV_texture_shader2"); + GLAD_GL_NV_texture_shader3 = has_ext("GL_NV_texture_shader3"); + GLAD_GL_NV_timeline_semaphore = has_ext("GL_NV_timeline_semaphore"); + GLAD_GL_NV_transform_feedback = has_ext("GL_NV_transform_feedback"); + GLAD_GL_NV_transform_feedback2 = has_ext("GL_NV_transform_feedback2"); + GLAD_GL_NV_uniform_buffer_std430_layout = has_ext("GL_NV_uniform_buffer_std430_layout"); + GLAD_GL_NV_uniform_buffer_unified_memory = has_ext("GL_NV_uniform_buffer_unified_memory"); + GLAD_GL_NV_vdpau_interop = has_ext("GL_NV_vdpau_interop"); + GLAD_GL_NV_vdpau_interop2 = has_ext("GL_NV_vdpau_interop2"); + GLAD_GL_NV_vertex_array_range = has_ext("GL_NV_vertex_array_range"); + GLAD_GL_NV_vertex_array_range2 = has_ext("GL_NV_vertex_array_range2"); + GLAD_GL_NV_vertex_attrib_integer_64bit = has_ext("GL_NV_vertex_attrib_integer_64bit"); + GLAD_GL_NV_vertex_buffer_unified_memory = has_ext("GL_NV_vertex_buffer_unified_memory"); + GLAD_GL_NV_vertex_program = has_ext("GL_NV_vertex_program"); + GLAD_GL_NV_vertex_program1_1 = has_ext("GL_NV_vertex_program1_1"); + GLAD_GL_NV_vertex_program2 = has_ext("GL_NV_vertex_program2"); + GLAD_GL_NV_vertex_program2_option = has_ext("GL_NV_vertex_program2_option"); + GLAD_GL_NV_vertex_program3 = has_ext("GL_NV_vertex_program3"); + GLAD_GL_NV_vertex_program4 = has_ext("GL_NV_vertex_program4"); + GLAD_GL_NV_video_capture = has_ext("GL_NV_video_capture"); + GLAD_GL_NV_viewport_array2 = has_ext("GL_NV_viewport_array2"); + GLAD_GL_NV_viewport_swizzle = has_ext("GL_NV_viewport_swizzle"); + GLAD_GL_OES_byte_coordinates = has_ext("GL_OES_byte_coordinates"); + GLAD_GL_OES_compressed_paletted_texture = has_ext("GL_OES_compressed_paletted_texture"); + GLAD_GL_OES_fixed_point = has_ext("GL_OES_fixed_point"); + GLAD_GL_OES_query_matrix = has_ext("GL_OES_query_matrix"); + GLAD_GL_OES_read_format = has_ext("GL_OES_read_format"); + GLAD_GL_OES_single_precision = has_ext("GL_OES_single_precision"); + GLAD_GL_OML_interlace = has_ext("GL_OML_interlace"); + GLAD_GL_OML_resample = has_ext("GL_OML_resample"); + GLAD_GL_OML_subsample = has_ext("GL_OML_subsample"); + GLAD_GL_OVR_multiview = has_ext("GL_OVR_multiview"); + GLAD_GL_OVR_multiview2 = has_ext("GL_OVR_multiview2"); + GLAD_GL_PGI_misc_hints = has_ext("GL_PGI_misc_hints"); + GLAD_GL_PGI_vertex_hints = has_ext("GL_PGI_vertex_hints"); + GLAD_GL_REND_screen_coordinates = has_ext("GL_REND_screen_coordinates"); + GLAD_GL_S3_s3tc = has_ext("GL_S3_s3tc"); + GLAD_GL_SGIS_detail_texture = has_ext("GL_SGIS_detail_texture"); + GLAD_GL_SGIS_fog_function = has_ext("GL_SGIS_fog_function"); + GLAD_GL_SGIS_generate_mipmap = has_ext("GL_SGIS_generate_mipmap"); + GLAD_GL_SGIS_multisample = has_ext("GL_SGIS_multisample"); + GLAD_GL_SGIS_pixel_texture = has_ext("GL_SGIS_pixel_texture"); + GLAD_GL_SGIS_point_line_texgen = has_ext("GL_SGIS_point_line_texgen"); + GLAD_GL_SGIS_point_parameters = has_ext("GL_SGIS_point_parameters"); + GLAD_GL_SGIS_sharpen_texture = has_ext("GL_SGIS_sharpen_texture"); + GLAD_GL_SGIS_texture4D = has_ext("GL_SGIS_texture4D"); + GLAD_GL_SGIS_texture_border_clamp = has_ext("GL_SGIS_texture_border_clamp"); + GLAD_GL_SGIS_texture_color_mask = has_ext("GL_SGIS_texture_color_mask"); + GLAD_GL_SGIS_texture_edge_clamp = has_ext("GL_SGIS_texture_edge_clamp"); + GLAD_GL_SGIS_texture_filter4 = has_ext("GL_SGIS_texture_filter4"); + GLAD_GL_SGIS_texture_lod = has_ext("GL_SGIS_texture_lod"); + GLAD_GL_SGIS_texture_select = has_ext("GL_SGIS_texture_select"); + GLAD_GL_SGIX_async = has_ext("GL_SGIX_async"); + GLAD_GL_SGIX_async_histogram = has_ext("GL_SGIX_async_histogram"); + GLAD_GL_SGIX_async_pixel = has_ext("GL_SGIX_async_pixel"); + GLAD_GL_SGIX_blend_alpha_minmax = has_ext("GL_SGIX_blend_alpha_minmax"); + GLAD_GL_SGIX_calligraphic_fragment = has_ext("GL_SGIX_calligraphic_fragment"); + GLAD_GL_SGIX_clipmap = has_ext("GL_SGIX_clipmap"); + GLAD_GL_SGIX_convolution_accuracy = has_ext("GL_SGIX_convolution_accuracy"); + GLAD_GL_SGIX_depth_pass_instrument = has_ext("GL_SGIX_depth_pass_instrument"); + GLAD_GL_SGIX_depth_texture = has_ext("GL_SGIX_depth_texture"); + GLAD_GL_SGIX_flush_raster = has_ext("GL_SGIX_flush_raster"); + GLAD_GL_SGIX_fog_offset = has_ext("GL_SGIX_fog_offset"); + GLAD_GL_SGIX_fragment_lighting = has_ext("GL_SGIX_fragment_lighting"); + GLAD_GL_SGIX_framezoom = has_ext("GL_SGIX_framezoom"); + GLAD_GL_SGIX_igloo_interface = has_ext("GL_SGIX_igloo_interface"); + GLAD_GL_SGIX_instruments = has_ext("GL_SGIX_instruments"); + GLAD_GL_SGIX_interlace = has_ext("GL_SGIX_interlace"); + GLAD_GL_SGIX_ir_instrument1 = has_ext("GL_SGIX_ir_instrument1"); + GLAD_GL_SGIX_list_priority = has_ext("GL_SGIX_list_priority"); + GLAD_GL_SGIX_pixel_texture = has_ext("GL_SGIX_pixel_texture"); + GLAD_GL_SGIX_pixel_tiles = has_ext("GL_SGIX_pixel_tiles"); + GLAD_GL_SGIX_polynomial_ffd = has_ext("GL_SGIX_polynomial_ffd"); + GLAD_GL_SGIX_reference_plane = has_ext("GL_SGIX_reference_plane"); + GLAD_GL_SGIX_resample = has_ext("GL_SGIX_resample"); + GLAD_GL_SGIX_scalebias_hint = has_ext("GL_SGIX_scalebias_hint"); + GLAD_GL_SGIX_shadow = has_ext("GL_SGIX_shadow"); + GLAD_GL_SGIX_shadow_ambient = has_ext("GL_SGIX_shadow_ambient"); + GLAD_GL_SGIX_sprite = has_ext("GL_SGIX_sprite"); + GLAD_GL_SGIX_subsample = has_ext("GL_SGIX_subsample"); + GLAD_GL_SGIX_tag_sample_buffer = has_ext("GL_SGIX_tag_sample_buffer"); + GLAD_GL_SGIX_texture_add_env = has_ext("GL_SGIX_texture_add_env"); + GLAD_GL_SGIX_texture_coordinate_clamp = has_ext("GL_SGIX_texture_coordinate_clamp"); + GLAD_GL_SGIX_texture_lod_bias = has_ext("GL_SGIX_texture_lod_bias"); + GLAD_GL_SGIX_texture_multi_buffer = has_ext("GL_SGIX_texture_multi_buffer"); + GLAD_GL_SGIX_texture_scale_bias = has_ext("GL_SGIX_texture_scale_bias"); + GLAD_GL_SGIX_vertex_preclip = has_ext("GL_SGIX_vertex_preclip"); + GLAD_GL_SGIX_ycrcb = has_ext("GL_SGIX_ycrcb"); + GLAD_GL_SGIX_ycrcb_subsample = has_ext("GL_SGIX_ycrcb_subsample"); + GLAD_GL_SGIX_ycrcba = has_ext("GL_SGIX_ycrcba"); + GLAD_GL_SGI_color_matrix = has_ext("GL_SGI_color_matrix"); + GLAD_GL_SGI_color_table = has_ext("GL_SGI_color_table"); + GLAD_GL_SGI_texture_color_table = has_ext("GL_SGI_texture_color_table"); + GLAD_GL_SUNX_constant_data = has_ext("GL_SUNX_constant_data"); + GLAD_GL_SUN_convolution_border_modes = has_ext("GL_SUN_convolution_border_modes"); + GLAD_GL_SUN_global_alpha = has_ext("GL_SUN_global_alpha"); + GLAD_GL_SUN_mesh_array = has_ext("GL_SUN_mesh_array"); + GLAD_GL_SUN_slice_accum = has_ext("GL_SUN_slice_accum"); + GLAD_GL_SUN_triangle_list = has_ext("GL_SUN_triangle_list"); + GLAD_GL_SUN_vertex = has_ext("GL_SUN_vertex"); + GLAD_GL_WIN_phong_shading = has_ext("GL_WIN_phong_shading"); + GLAD_GL_WIN_specular_fog = has_ext("GL_WIN_specular_fog"); + free_exts(); + return 1; +} + +static void find_coreGL(void) { + + /* Thank you @elmindreda + * https://github.com/elmindreda/greg/blob/master/templates/greg.c.in#L176 + * https://github.com/glfw/glfw/blob/master/src/context.c#L36 + */ + int i, major, minor; + + const char* version; + const char* prefixes[] = { + "OpenGL ES-CM ", + "OpenGL ES-CL ", + "OpenGL ES ", + NULL + }; + + version = (const char*) glGetString(GL_VERSION); + if (!version) return; + + for (i = 0; prefixes[i]; i++) { + const size_t length = strlen(prefixes[i]); + if (strncmp(version, prefixes[i], length) == 0) { + version += length; + break; + } + } + +/* PR #18 */ +#ifdef _MSC_VER + sscanf_s(version, "%d.%d", &major, &minor); +#else + sscanf(version, "%d.%d", &major, &minor); +#endif + + GLVersion.major = major; GLVersion.minor = minor; + max_loaded_major = major; max_loaded_minor = minor; + GLAD_GL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1; + GLAD_GL_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1; + GLAD_GL_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1; + GLAD_GL_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1; + GLAD_GL_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1; + GLAD_GL_VERSION_1_5 = (major == 1 && minor >= 5) || major > 1; + GLAD_GL_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2; + GLAD_GL_VERSION_2_1 = (major == 2 && minor >= 1) || major > 2; + GLAD_GL_VERSION_3_0 = (major == 3 && minor >= 0) || major > 3; + GLAD_GL_VERSION_3_1 = (major == 3 && minor >= 1) || major > 3; + GLAD_GL_VERSION_3_2 = (major == 3 && minor >= 2) || major > 3; + GLAD_GL_VERSION_3_3 = (major == 3 && minor >= 3) || major > 3; + GLAD_GL_VERSION_4_0 = (major == 4 && minor >= 0) || major > 4; + GLAD_GL_VERSION_4_1 = (major == 4 && minor >= 1) || major > 4; + GLAD_GL_VERSION_4_2 = (major == 4 && minor >= 2) || major > 4; + GLAD_GL_VERSION_4_3 = (major == 4 && minor >= 3) || major > 4; + GLAD_GL_VERSION_4_4 = (major == 4 && minor >= 4) || major > 4; + GLAD_GL_VERSION_4_5 = (major == 4 && minor >= 5) || major > 4; + if (GLVersion.major > 4 || (GLVersion.major >= 4 && GLVersion.minor >= 5)) { + max_loaded_major = 4; + max_loaded_minor = 5; + } +} + +int gladLoadGLLoader(GLADloadproc load) { + GLVersion.major = 0; GLVersion.minor = 0; + glGetString = (PFNGLGETSTRINGPROC)load("glGetString"); + if(glGetString == NULL) return 0; + if(glGetString(GL_VERSION) == NULL) return 0; + find_coreGL(); + load_GL_VERSION_1_0(load); + load_GL_VERSION_1_1(load); + load_GL_VERSION_1_2(load); + load_GL_VERSION_1_3(load); + load_GL_VERSION_1_4(load); + load_GL_VERSION_1_5(load); + load_GL_VERSION_2_0(load); + load_GL_VERSION_2_1(load); + load_GL_VERSION_3_0(load); + load_GL_VERSION_3_1(load); + load_GL_VERSION_3_2(load); + load_GL_VERSION_3_3(load); + load_GL_VERSION_4_0(load); + load_GL_VERSION_4_1(load); + load_GL_VERSION_4_2(load); + load_GL_VERSION_4_3(load); + load_GL_VERSION_4_4(load); + load_GL_VERSION_4_5(load); + + if (!find_extensionsGL()) return 0; + load_GL_3DFX_tbuffer(load); + load_GL_AMD_debug_output(load); + load_GL_AMD_draw_buffers_blend(load); + load_GL_AMD_framebuffer_multisample_advanced(load); + load_GL_AMD_framebuffer_sample_positions(load); + load_GL_AMD_gpu_shader_int64(load); + load_GL_AMD_interleaved_elements(load); + load_GL_AMD_multi_draw_indirect(load); + load_GL_AMD_name_gen_delete(load); + load_GL_AMD_occlusion_query_event(load); + load_GL_AMD_performance_monitor(load); + load_GL_AMD_sample_positions(load); + load_GL_AMD_sparse_texture(load); + load_GL_AMD_stencil_operation_extended(load); + load_GL_AMD_vertex_shader_tessellator(load); + load_GL_APPLE_element_array(load); + load_GL_APPLE_fence(load); + load_GL_APPLE_flush_buffer_range(load); + load_GL_APPLE_object_purgeable(load); + load_GL_APPLE_texture_range(load); + load_GL_APPLE_vertex_array_object(load); + load_GL_APPLE_vertex_array_range(load); + load_GL_APPLE_vertex_program_evaluators(load); + load_GL_ARB_ES2_compatibility(load); + load_GL_ARB_ES3_1_compatibility(load); + load_GL_ARB_ES3_2_compatibility(load); + load_GL_ARB_base_instance(load); + load_GL_ARB_bindless_texture(load); + load_GL_ARB_blend_func_extended(load); + load_GL_ARB_buffer_storage(load); + load_GL_ARB_cl_event(load); + load_GL_ARB_clear_buffer_object(load); + load_GL_ARB_clear_texture(load); + load_GL_ARB_clip_control(load); + load_GL_ARB_color_buffer_float(load); + load_GL_ARB_compute_shader(load); + load_GL_ARB_compute_variable_group_size(load); + load_GL_ARB_copy_buffer(load); + load_GL_ARB_copy_image(load); + load_GL_ARB_debug_output(load); + load_GL_ARB_direct_state_access(load); + load_GL_ARB_draw_buffers(load); + load_GL_ARB_draw_buffers_blend(load); + load_GL_ARB_draw_elements_base_vertex(load); + load_GL_ARB_draw_indirect(load); + load_GL_ARB_draw_instanced(load); + load_GL_ARB_fragment_program(load); + load_GL_ARB_framebuffer_no_attachments(load); + load_GL_ARB_framebuffer_object(load); + load_GL_ARB_geometry_shader4(load); + load_GL_ARB_get_program_binary(load); + load_GL_ARB_get_texture_sub_image(load); + load_GL_ARB_gl_spirv(load); + load_GL_ARB_gpu_shader_fp64(load); + load_GL_ARB_gpu_shader_int64(load); + load_GL_ARB_imaging(load); + load_GL_ARB_indirect_parameters(load); + load_GL_ARB_instanced_arrays(load); + load_GL_ARB_internalformat_query(load); + load_GL_ARB_internalformat_query2(load); + load_GL_ARB_invalidate_subdata(load); + load_GL_ARB_map_buffer_range(load); + load_GL_ARB_matrix_palette(load); + load_GL_ARB_multi_bind(load); + load_GL_ARB_multi_draw_indirect(load); + load_GL_ARB_multisample(load); + load_GL_ARB_multitexture(load); + load_GL_ARB_occlusion_query(load); + load_GL_ARB_parallel_shader_compile(load); + load_GL_ARB_point_parameters(load); + load_GL_ARB_polygon_offset_clamp(load); + load_GL_ARB_program_interface_query(load); + load_GL_ARB_provoking_vertex(load); + load_GL_ARB_robustness(load); + load_GL_ARB_sample_locations(load); + load_GL_ARB_sample_shading(load); + load_GL_ARB_sampler_objects(load); + load_GL_ARB_separate_shader_objects(load); + load_GL_ARB_shader_atomic_counters(load); + load_GL_ARB_shader_image_load_store(load); + load_GL_ARB_shader_objects(load); + load_GL_ARB_shader_storage_buffer_object(load); + load_GL_ARB_shader_subroutine(load); + load_GL_ARB_shading_language_include(load); + load_GL_ARB_sparse_buffer(load); + load_GL_ARB_sparse_texture(load); + load_GL_ARB_sync(load); + load_GL_ARB_tessellation_shader(load); + load_GL_ARB_texture_barrier(load); + load_GL_ARB_texture_buffer_object(load); + load_GL_ARB_texture_buffer_range(load); + load_GL_ARB_texture_compression(load); + load_GL_ARB_texture_multisample(load); + load_GL_ARB_texture_storage(load); + load_GL_ARB_texture_storage_multisample(load); + load_GL_ARB_texture_view(load); + load_GL_ARB_timer_query(load); + load_GL_ARB_transform_feedback2(load); + load_GL_ARB_transform_feedback3(load); + load_GL_ARB_transform_feedback_instanced(load); + load_GL_ARB_transpose_matrix(load); + load_GL_ARB_uniform_buffer_object(load); + load_GL_ARB_vertex_array_object(load); + load_GL_ARB_vertex_attrib_64bit(load); + load_GL_ARB_vertex_attrib_binding(load); + load_GL_ARB_vertex_blend(load); + load_GL_ARB_vertex_buffer_object(load); + load_GL_ARB_vertex_program(load); + load_GL_ARB_vertex_shader(load); + load_GL_ARB_vertex_type_2_10_10_10_rev(load); + load_GL_ARB_viewport_array(load); + load_GL_ARB_window_pos(load); + load_GL_ATI_draw_buffers(load); + load_GL_ATI_element_array(load); + load_GL_ATI_envmap_bumpmap(load); + load_GL_ATI_fragment_shader(load); + load_GL_ATI_map_object_buffer(load); + load_GL_ATI_pn_triangles(load); + load_GL_ATI_separate_stencil(load); + load_GL_ATI_vertex_array_object(load); + load_GL_ATI_vertex_attrib_array_object(load); + load_GL_ATI_vertex_streams(load); + load_GL_EXT_EGL_image_storage(load); + load_GL_EXT_bindable_uniform(load); + load_GL_EXT_blend_color(load); + load_GL_EXT_blend_equation_separate(load); + load_GL_EXT_blend_func_separate(load); + load_GL_EXT_blend_minmax(load); + load_GL_EXT_color_subtable(load); + load_GL_EXT_compiled_vertex_array(load); + load_GL_EXT_convolution(load); + load_GL_EXT_coordinate_frame(load); + load_GL_EXT_copy_texture(load); + load_GL_EXT_cull_vertex(load); + load_GL_EXT_debug_label(load); + load_GL_EXT_debug_marker(load); + load_GL_EXT_depth_bounds_test(load); + load_GL_EXT_direct_state_access(load); + load_GL_EXT_draw_buffers2(load); + load_GL_EXT_draw_instanced(load); + load_GL_EXT_draw_range_elements(load); + load_GL_EXT_external_buffer(load); + load_GL_EXT_fog_coord(load); + load_GL_EXT_fragment_shading_rate(load); + load_GL_EXT_framebuffer_blit(load); + load_GL_EXT_framebuffer_blit_layers(load); + load_GL_EXT_framebuffer_multisample(load); + load_GL_EXT_framebuffer_object(load); + load_GL_EXT_geometry_shader4(load); + load_GL_EXT_gpu_program_parameters(load); + load_GL_EXT_gpu_shader4(load); + load_GL_EXT_histogram(load); + load_GL_EXT_index_func(load); + load_GL_EXT_index_material(load); + load_GL_EXT_light_texture(load); + load_GL_EXT_memory_object(load); + load_GL_EXT_memory_object_fd(load); + load_GL_EXT_memory_object_win32(load); + load_GL_EXT_mesh_shader(load); + load_GL_EXT_multi_draw_arrays(load); + load_GL_EXT_multisample(load); + load_GL_EXT_paletted_texture(load); + load_GL_EXT_pixel_transform(load); + load_GL_EXT_point_parameters(load); + load_GL_EXT_polygon_offset(load); + load_GL_EXT_polygon_offset_clamp(load); + load_GL_EXT_provoking_vertex(load); + load_GL_EXT_raster_multisample(load); + load_GL_EXT_secondary_color(load); + load_GL_EXT_semaphore(load); + load_GL_EXT_semaphore_fd(load); + load_GL_EXT_semaphore_win32(load); + load_GL_EXT_separate_shader_objects(load); + load_GL_EXT_shader_framebuffer_fetch_non_coherent(load); + load_GL_EXT_shader_image_load_store(load); + load_GL_EXT_stencil_clear_tag(load); + load_GL_EXT_stencil_two_side(load); + load_GL_EXT_subtexture(load); + load_GL_EXT_texture3D(load); + load_GL_EXT_texture_array(load); + load_GL_EXT_texture_buffer_object(load); + load_GL_EXT_texture_integer(load); + load_GL_EXT_texture_object(load); + load_GL_EXT_texture_perturb_normal(load); + load_GL_EXT_texture_storage(load); + load_GL_EXT_timer_query(load); + load_GL_EXT_transform_feedback(load); + load_GL_EXT_vertex_array(load); + load_GL_EXT_vertex_attrib_64bit(load); + load_GL_EXT_vertex_shader(load); + load_GL_EXT_vertex_weighting(load); + load_GL_EXT_win32_keyed_mutex(load); + load_GL_EXT_window_rectangles(load); + load_GL_EXT_x11_sync_object(load); + load_GL_GREMEDY_frame_terminator(load); + load_GL_GREMEDY_string_marker(load); + load_GL_HP_image_transform(load); + load_GL_IBM_multimode_draw_arrays(load); + load_GL_IBM_static_data(load); + load_GL_IBM_vertex_array_lists(load); + load_GL_INGR_blend_func_separate(load); + load_GL_INTEL_framebuffer_CMAA(load); + load_GL_INTEL_map_texture(load); + load_GL_INTEL_parallel_arrays(load); + load_GL_INTEL_performance_query(load); + load_GL_KHR_blend_equation_advanced(load); + load_GL_KHR_debug(load); + load_GL_KHR_parallel_shader_compile(load); + load_GL_KHR_robustness(load); + load_GL_MESA_framebuffer_flip_y(load); + load_GL_MESA_resize_buffers(load); + load_GL_MESA_window_pos(load); + load_GL_NVX_conditional_render(load); + load_GL_NVX_gpu_multicast2(load); + load_GL_NVX_linked_gpu_multicast(load); + load_GL_NVX_progress_fence(load); + load_GL_NV_alpha_to_coverage_dither_control(load); + load_GL_NV_bindless_multi_draw_indirect(load); + load_GL_NV_bindless_multi_draw_indirect_count(load); + load_GL_NV_bindless_texture(load); + load_GL_NV_blend_equation_advanced(load); + load_GL_NV_clip_space_w_scaling(load); + load_GL_NV_command_list(load); + load_GL_NV_conditional_render(load); + load_GL_NV_conservative_raster(load); + load_GL_NV_conservative_raster_dilate(load); + load_GL_NV_conservative_raster_pre_snap_triangles(load); + load_GL_NV_copy_image(load); + load_GL_NV_depth_buffer_float(load); + load_GL_NV_draw_texture(load); + load_GL_NV_draw_vulkan_image(load); + load_GL_NV_evaluators(load); + load_GL_NV_explicit_multisample(load); + load_GL_NV_fence(load); + load_GL_NV_fragment_coverage_to_color(load); + load_GL_NV_fragment_program(load); + load_GL_NV_framebuffer_mixed_samples(load); + load_GL_NV_framebuffer_multisample_coverage(load); + load_GL_NV_geometry_program4(load); + load_GL_NV_gpu_multicast(load); + load_GL_NV_gpu_program4(load); + load_GL_NV_gpu_program5(load); + load_GL_NV_gpu_shader5(load); + load_GL_NV_half_float(load); + load_GL_NV_internalformat_sample_query(load); + load_GL_NV_memory_attachment(load); + load_GL_NV_memory_object_sparse(load); + load_GL_NV_mesh_shader(load); + load_GL_NV_occlusion_query(load); + load_GL_NV_parameter_buffer_object(load); + load_GL_NV_path_rendering(load); + load_GL_NV_pixel_data_range(load); + load_GL_NV_point_sprite(load); + load_GL_NV_present_video(load); + load_GL_NV_primitive_restart(load); + load_GL_NV_query_resource(load); + load_GL_NV_query_resource_tag(load); + load_GL_NV_register_combiners(load); + load_GL_NV_register_combiners2(load); + load_GL_NV_sample_locations(load); + load_GL_NV_scissor_exclusive(load); + load_GL_NV_shader_buffer_load(load); + load_GL_NV_shading_rate_image(load); + load_GL_NV_texture_barrier(load); + load_GL_NV_texture_multisample(load); + load_GL_NV_timeline_semaphore(load); + load_GL_NV_transform_feedback(load); + load_GL_NV_transform_feedback2(load); + load_GL_NV_vdpau_interop(load); + load_GL_NV_vdpau_interop2(load); + load_GL_NV_vertex_array_range(load); + load_GL_NV_vertex_attrib_integer_64bit(load); + load_GL_NV_vertex_buffer_unified_memory(load); + load_GL_NV_vertex_program(load); + load_GL_NV_vertex_program4(load); + load_GL_NV_video_capture(load); + load_GL_NV_viewport_swizzle(load); + load_GL_OES_byte_coordinates(load); + load_GL_OES_fixed_point(load); + load_GL_OES_query_matrix(load); + load_GL_OES_single_precision(load); + load_GL_OVR_multiview(load); + load_GL_PGI_misc_hints(load); + load_GL_SGIS_detail_texture(load); + load_GL_SGIS_fog_function(load); + load_GL_SGIS_multisample(load); + load_GL_SGIS_pixel_texture(load); + load_GL_SGIS_point_parameters(load); + load_GL_SGIS_sharpen_texture(load); + load_GL_SGIS_texture4D(load); + load_GL_SGIS_texture_color_mask(load); + load_GL_SGIS_texture_filter4(load); + load_GL_SGIX_async(load); + load_GL_SGIX_flush_raster(load); + load_GL_SGIX_fragment_lighting(load); + load_GL_SGIX_framezoom(load); + load_GL_SGIX_igloo_interface(load); + load_GL_SGIX_instruments(load); + load_GL_SGIX_list_priority(load); + load_GL_SGIX_pixel_texture(load); + load_GL_SGIX_polynomial_ffd(load); + load_GL_SGIX_reference_plane(load); + load_GL_SGIX_sprite(load); + load_GL_SGIX_tag_sample_buffer(load); + load_GL_SGI_color_table(load); + load_GL_SUNX_constant_data(load); + load_GL_SUN_global_alpha(load); + load_GL_SUN_mesh_array(load); + load_GL_SUN_triangle_list(load); + load_GL_SUN_vertex(load); + return GLVersion.major != 0 || GLVersion.minor != 0; +} + +static void load_GL_VERSION_ES_CM_1_0(GLADloadproc load) { + if(!GLAD_GL_VERSION_ES_CM_1_0) return; + glad_glAlphaFunc = (PFNGLALPHAFUNCPROC)load("glAlphaFunc"); + glad_glClearColor = (PFNGLCLEARCOLORPROC)load("glClearColor"); + glad_glClearDepthf = (PFNGLCLEARDEPTHFPROC)load("glClearDepthf"); + glad_glClipPlanef = (PFNGLCLIPPLANEFPROC)load("glClipPlanef"); + glad_glColor4f = (PFNGLCOLOR4FPROC)load("glColor4f"); + glad_glDepthRangef = (PFNGLDEPTHRANGEFPROC)load("glDepthRangef"); + glad_glFogf = (PFNGLFOGFPROC)load("glFogf"); + glad_glFogfv = (PFNGLFOGFVPROC)load("glFogfv"); + glad_glFrustumf = (PFNGLFRUSTUMFPROC)load("glFrustumf"); + glad_glGetClipPlanef = (PFNGLGETCLIPPLANEFPROC)load("glGetClipPlanef"); + glad_glGetFloatv = (PFNGLGETFLOATVPROC)load("glGetFloatv"); + glad_glGetLightfv = (PFNGLGETLIGHTFVPROC)load("glGetLightfv"); + glad_glGetMaterialfv = (PFNGLGETMATERIALFVPROC)load("glGetMaterialfv"); + glad_glGetTexEnvfv = (PFNGLGETTEXENVFVPROC)load("glGetTexEnvfv"); + glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC)load("glGetTexParameterfv"); + glad_glLightModelf = (PFNGLLIGHTMODELFPROC)load("glLightModelf"); + glad_glLightModelfv = (PFNGLLIGHTMODELFVPROC)load("glLightModelfv"); + glad_glLightf = (PFNGLLIGHTFPROC)load("glLightf"); + glad_glLightfv = (PFNGLLIGHTFVPROC)load("glLightfv"); + glad_glLineWidth = (PFNGLLINEWIDTHPROC)load("glLineWidth"); + glad_glLoadMatrixf = (PFNGLLOADMATRIXFPROC)load("glLoadMatrixf"); + glad_glMaterialf = (PFNGLMATERIALFPROC)load("glMaterialf"); + glad_glMaterialfv = (PFNGLMATERIALFVPROC)load("glMaterialfv"); + glad_glMultMatrixf = (PFNGLMULTMATRIXFPROC)load("glMultMatrixf"); + glad_glMultiTexCoord4f = (PFNGLMULTITEXCOORD4FPROC)load("glMultiTexCoord4f"); + glad_glNormal3f = (PFNGLNORMAL3FPROC)load("glNormal3f"); + glad_glOrthof = (PFNGLORTHOFPROC)load("glOrthof"); + glad_glPointParameterf = (PFNGLPOINTPARAMETERFPROC)load("glPointParameterf"); + glad_glPointParameterfv = (PFNGLPOINTPARAMETERFVPROC)load("glPointParameterfv"); + glad_glPointSize = (PFNGLPOINTSIZEPROC)load("glPointSize"); + glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC)load("glPolygonOffset"); + glad_glRotatef = (PFNGLROTATEFPROC)load("glRotatef"); + glad_glScalef = (PFNGLSCALEFPROC)load("glScalef"); + glad_glTexEnvf = (PFNGLTEXENVFPROC)load("glTexEnvf"); + glad_glTexEnvfv = (PFNGLTEXENVFVPROC)load("glTexEnvfv"); + glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC)load("glTexParameterf"); + glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC)load("glTexParameterfv"); + glad_glTranslatef = (PFNGLTRANSLATEFPROC)load("glTranslatef"); + glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC)load("glActiveTexture"); + glad_glAlphaFuncx = (PFNGLALPHAFUNCXPROC)load("glAlphaFuncx"); + glad_glBindBuffer = (PFNGLBINDBUFFERPROC)load("glBindBuffer"); + glad_glBindTexture = (PFNGLBINDTEXTUREPROC)load("glBindTexture"); + glad_glBlendFunc = (PFNGLBLENDFUNCPROC)load("glBlendFunc"); + glad_glBufferData = (PFNGLBUFFERDATAPROC)load("glBufferData"); + glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC)load("glBufferSubData"); + glad_glClear = (PFNGLCLEARPROC)load("glClear"); + glad_glClearColorx = (PFNGLCLEARCOLORXPROC)load("glClearColorx"); + glad_glClearDepthx = (PFNGLCLEARDEPTHXPROC)load("glClearDepthx"); + glad_glClearStencil = (PFNGLCLEARSTENCILPROC)load("glClearStencil"); + glad_glClientActiveTexture = (PFNGLCLIENTACTIVETEXTUREPROC)load("glClientActiveTexture"); + glad_glClipPlanex = (PFNGLCLIPPLANEXPROC)load("glClipPlanex"); + glad_glColor4ub = (PFNGLCOLOR4UBPROC)load("glColor4ub"); + glad_glColor4x = (PFNGLCOLOR4XPROC)load("glColor4x"); + glad_glColorMask = (PFNGLCOLORMASKPROC)load("glColorMask"); + glad_glColorPointer = (PFNGLCOLORPOINTERPROC)load("glColorPointer"); + glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)load("glCompressedTexImage2D"); + glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)load("glCompressedTexSubImage2D"); + glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC)load("glCopyTexImage2D"); + glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC)load("glCopyTexSubImage2D"); + glad_glCullFace = (PFNGLCULLFACEPROC)load("glCullFace"); + glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)load("glDeleteBuffers"); + glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC)load("glDeleteTextures"); + glad_glDepthFunc = (PFNGLDEPTHFUNCPROC)load("glDepthFunc"); + glad_glDepthMask = (PFNGLDEPTHMASKPROC)load("glDepthMask"); + glad_glDepthRangex = (PFNGLDEPTHRANGEXPROC)load("glDepthRangex"); + glad_glDisable = (PFNGLDISABLEPROC)load("glDisable"); + glad_glDisableClientState = (PFNGLDISABLECLIENTSTATEPROC)load("glDisableClientState"); + glad_glDrawArrays = (PFNGLDRAWARRAYSPROC)load("glDrawArrays"); + glad_glDrawElements = (PFNGLDRAWELEMENTSPROC)load("glDrawElements"); + glad_glEnable = (PFNGLENABLEPROC)load("glEnable"); + glad_glEnableClientState = (PFNGLENABLECLIENTSTATEPROC)load("glEnableClientState"); + glad_glFinish = (PFNGLFINISHPROC)load("glFinish"); + glad_glFlush = (PFNGLFLUSHPROC)load("glFlush"); + glad_glFogx = (PFNGLFOGXPROC)load("glFogx"); + glad_glFogxv = (PFNGLFOGXVPROC)load("glFogxv"); + glad_glFrontFace = (PFNGLFRONTFACEPROC)load("glFrontFace"); + glad_glFrustumx = (PFNGLFRUSTUMXPROC)load("glFrustumx"); + glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC)load("glGetBooleanv"); + glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC)load("glGetBufferParameteriv"); + glad_glGetClipPlanex = (PFNGLGETCLIPPLANEXPROC)load("glGetClipPlanex"); + glad_glGenBuffers = (PFNGLGENBUFFERSPROC)load("glGenBuffers"); + glad_glGenTextures = (PFNGLGENTEXTURESPROC)load("glGenTextures"); + glad_glGetError = (PFNGLGETERRORPROC)load("glGetError"); + glad_glGetFixedv = (PFNGLGETFIXEDVPROC)load("glGetFixedv"); + glad_glGetIntegerv = (PFNGLGETINTEGERVPROC)load("glGetIntegerv"); + glad_glGetLightxv = (PFNGLGETLIGHTXVPROC)load("glGetLightxv"); + glad_glGetMaterialxv = (PFNGLGETMATERIALXVPROC)load("glGetMaterialxv"); + glad_glGetPointerv = (PFNGLGETPOINTERVPROC)load("glGetPointerv"); + glad_glGetString = (PFNGLGETSTRINGPROC)load("glGetString"); + glad_glGetTexEnviv = (PFNGLGETTEXENVIVPROC)load("glGetTexEnviv"); + glad_glGetTexEnvxv = (PFNGLGETTEXENVXVPROC)load("glGetTexEnvxv"); + glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC)load("glGetTexParameteriv"); + glad_glGetTexParameterxv = (PFNGLGETTEXPARAMETERXVPROC)load("glGetTexParameterxv"); + glad_glHint = (PFNGLHINTPROC)load("glHint"); + glad_glIsBuffer = (PFNGLISBUFFERPROC)load("glIsBuffer"); + glad_glIsEnabled = (PFNGLISENABLEDPROC)load("glIsEnabled"); + glad_glIsTexture = (PFNGLISTEXTUREPROC)load("glIsTexture"); + glad_glLightModelx = (PFNGLLIGHTMODELXPROC)load("glLightModelx"); + glad_glLightModelxv = (PFNGLLIGHTMODELXVPROC)load("glLightModelxv"); + glad_glLightx = (PFNGLLIGHTXPROC)load("glLightx"); + glad_glLightxv = (PFNGLLIGHTXVPROC)load("glLightxv"); + glad_glLineWidthx = (PFNGLLINEWIDTHXPROC)load("glLineWidthx"); + glad_glLoadIdentity = (PFNGLLOADIDENTITYPROC)load("glLoadIdentity"); + glad_glLoadMatrixx = (PFNGLLOADMATRIXXPROC)load("glLoadMatrixx"); + glad_glLogicOp = (PFNGLLOGICOPPROC)load("glLogicOp"); + glad_glMaterialx = (PFNGLMATERIALXPROC)load("glMaterialx"); + glad_glMaterialxv = (PFNGLMATERIALXVPROC)load("glMaterialxv"); + glad_glMatrixMode = (PFNGLMATRIXMODEPROC)load("glMatrixMode"); + glad_glMultMatrixx = (PFNGLMULTMATRIXXPROC)load("glMultMatrixx"); + glad_glMultiTexCoord4x = (PFNGLMULTITEXCOORD4XPROC)load("glMultiTexCoord4x"); + glad_glNormal3x = (PFNGLNORMAL3XPROC)load("glNormal3x"); + glad_glNormalPointer = (PFNGLNORMALPOINTERPROC)load("glNormalPointer"); + glad_glOrthox = (PFNGLORTHOXPROC)load("glOrthox"); + glad_glPixelStorei = (PFNGLPIXELSTOREIPROC)load("glPixelStorei"); + glad_glPointParameterx = (PFNGLPOINTPARAMETERXPROC)load("glPointParameterx"); + glad_glPointParameterxv = (PFNGLPOINTPARAMETERXVPROC)load("glPointParameterxv"); + glad_glPointSizex = (PFNGLPOINTSIZEXPROC)load("glPointSizex"); + glad_glPolygonOffsetx = (PFNGLPOLYGONOFFSETXPROC)load("glPolygonOffsetx"); + glad_glPopMatrix = (PFNGLPOPMATRIXPROC)load("glPopMatrix"); + glad_glPushMatrix = (PFNGLPUSHMATRIXPROC)load("glPushMatrix"); + glad_glReadPixels = (PFNGLREADPIXELSPROC)load("glReadPixels"); + glad_glRotatex = (PFNGLROTATEXPROC)load("glRotatex"); + glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC)load("glSampleCoverage"); + glad_glSampleCoveragex = (PFNGLSAMPLECOVERAGEXPROC)load("glSampleCoveragex"); + glad_glScalex = (PFNGLSCALEXPROC)load("glScalex"); + glad_glScissor = (PFNGLSCISSORPROC)load("glScissor"); + glad_glShadeModel = (PFNGLSHADEMODELPROC)load("glShadeModel"); + glad_glStencilFunc = (PFNGLSTENCILFUNCPROC)load("glStencilFunc"); + glad_glStencilMask = (PFNGLSTENCILMASKPROC)load("glStencilMask"); + glad_glStencilOp = (PFNGLSTENCILOPPROC)load("glStencilOp"); + glad_glTexCoordPointer = (PFNGLTEXCOORDPOINTERPROC)load("glTexCoordPointer"); + glad_glTexEnvi = (PFNGLTEXENVIPROC)load("glTexEnvi"); + glad_glTexEnvx = (PFNGLTEXENVXPROC)load("glTexEnvx"); + glad_glTexEnviv = (PFNGLTEXENVIVPROC)load("glTexEnviv"); + glad_glTexEnvxv = (PFNGLTEXENVXVPROC)load("glTexEnvxv"); + glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC)load("glTexImage2D"); + glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC)load("glTexParameteri"); + glad_glTexParameterx = (PFNGLTEXPARAMETERXPROC)load("glTexParameterx"); + glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC)load("glTexParameteriv"); + glad_glTexParameterxv = (PFNGLTEXPARAMETERXVPROC)load("glTexParameterxv"); + glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC)load("glTexSubImage2D"); + glad_glTranslatex = (PFNGLTRANSLATEXPROC)load("glTranslatex"); + glad_glVertexPointer = (PFNGLVERTEXPOINTERPROC)load("glVertexPointer"); + glad_glViewport = (PFNGLVIEWPORTPROC)load("glViewport"); +} +static void load_GL_APPLE_copy_texture_levels(GLADloadproc load) { + if(!GLAD_GL_APPLE_copy_texture_levels) return; + glad_glCopyTextureLevelsAPPLE = (PFNGLCOPYTEXTURELEVELSAPPLEPROC)load("glCopyTextureLevelsAPPLE"); +} +static void load_GL_APPLE_framebuffer_multisample(GLADloadproc load) { + if(!GLAD_GL_APPLE_framebuffer_multisample) return; + glad_glRenderbufferStorageMultisampleAPPLE = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEAPPLEPROC)load("glRenderbufferStorageMultisampleAPPLE"); + glad_glResolveMultisampleFramebufferAPPLE = (PFNGLRESOLVEMULTISAMPLEFRAMEBUFFERAPPLEPROC)load("glResolveMultisampleFramebufferAPPLE"); +} +static void load_GL_APPLE_sync(GLADloadproc load) { + if(!GLAD_GL_APPLE_sync) return; + glad_glFenceSyncAPPLE = (PFNGLFENCESYNCAPPLEPROC)load("glFenceSyncAPPLE"); + glad_glIsSyncAPPLE = (PFNGLISSYNCAPPLEPROC)load("glIsSyncAPPLE"); + glad_glDeleteSyncAPPLE = (PFNGLDELETESYNCAPPLEPROC)load("glDeleteSyncAPPLE"); + glad_glClientWaitSyncAPPLE = (PFNGLCLIENTWAITSYNCAPPLEPROC)load("glClientWaitSyncAPPLE"); + glad_glWaitSyncAPPLE = (PFNGLWAITSYNCAPPLEPROC)load("glWaitSyncAPPLE"); + glad_glGetInteger64vAPPLE = (PFNGLGETINTEGER64VAPPLEPROC)load("glGetInteger64vAPPLE"); + glad_glGetSyncivAPPLE = (PFNGLGETSYNCIVAPPLEPROC)load("glGetSyncivAPPLE"); +} +static void load_GL_EXT_discard_framebuffer(GLADloadproc load) { + if(!GLAD_GL_EXT_discard_framebuffer) return; + glad_glDiscardFramebufferEXT = (PFNGLDISCARDFRAMEBUFFEREXTPROC)load("glDiscardFramebufferEXT"); +} +static void load_GL_EXT_map_buffer_range(GLADloadproc load) { + if(!GLAD_GL_EXT_map_buffer_range) return; + glad_glMapBufferRangeEXT = (PFNGLMAPBUFFERRANGEEXTPROC)load("glMapBufferRangeEXT"); + glad_glFlushMappedBufferRangeEXT = (PFNGLFLUSHMAPPEDBUFFERRANGEEXTPROC)load("glFlushMappedBufferRangeEXT"); +} +static void load_GL_EXT_multisampled_render_to_texture(GLADloadproc load) { + if(!GLAD_GL_EXT_multisampled_render_to_texture) return; + glad_glRenderbufferStorageMultisampleEXT = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC)load("glRenderbufferStorageMultisampleEXT"); + glad_glFramebufferTexture2DMultisampleEXT = (PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC)load("glFramebufferTexture2DMultisampleEXT"); +} +static void load_GL_EXT_robustness(GLADloadproc load) { + if(!GLAD_GL_EXT_robustness) return; + glad_glGetGraphicsResetStatusEXT = (PFNGLGETGRAPHICSRESETSTATUSEXTPROC)load("glGetGraphicsResetStatusEXT"); + glad_glReadnPixelsEXT = (PFNGLREADNPIXELSEXTPROC)load("glReadnPixelsEXT"); + glad_glGetnUniformfvEXT = (PFNGLGETNUNIFORMFVEXTPROC)load("glGetnUniformfvEXT"); + glad_glGetnUniformivEXT = (PFNGLGETNUNIFORMIVEXTPROC)load("glGetnUniformivEXT"); +} +static void load_GL_IMG_multisampled_render_to_texture(GLADloadproc load) { + if(!GLAD_GL_IMG_multisampled_render_to_texture) return; + glad_glRenderbufferStorageMultisampleIMG = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEIMGPROC)load("glRenderbufferStorageMultisampleIMG"); + glad_glFramebufferTexture2DMultisampleIMG = (PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEIMGPROC)load("glFramebufferTexture2DMultisampleIMG"); +} +static void load_GL_IMG_user_clip_plane(GLADloadproc load) { + if(!GLAD_GL_IMG_user_clip_plane) return; + glad_glClipPlanefIMG = (PFNGLCLIPPLANEFIMGPROC)load("glClipPlanefIMG"); + glad_glClipPlanexIMG = (PFNGLCLIPPLANEXIMGPROC)load("glClipPlanexIMG"); +} +static void load_GL_OES_EGL_image(GLADloadproc load) { + if(!GLAD_GL_OES_EGL_image) return; + glad_glEGLImageTargetTexture2DOES = (PFNGLEGLIMAGETARGETTEXTURE2DOESPROC)load("glEGLImageTargetTexture2DOES"); + glad_glEGLImageTargetRenderbufferStorageOES = (PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC)load("glEGLImageTargetRenderbufferStorageOES"); +} +static void load_GL_OES_EGL_image_external(GLADloadproc load) { + if(!GLAD_GL_OES_EGL_image_external) return; + glad_glEGLImageTargetTexture2DOES = (PFNGLEGLIMAGETARGETTEXTURE2DOESPROC)load("glEGLImageTargetTexture2DOES"); +} +static void load_GL_OES_blend_equation_separate(GLADloadproc load) { + if(!GLAD_GL_OES_blend_equation_separate) return; + glad_glBlendEquationSeparateOES = (PFNGLBLENDEQUATIONSEPARATEOESPROC)load("glBlendEquationSeparateOES"); +} +static void load_GL_OES_blend_func_separate(GLADloadproc load) { + if(!GLAD_GL_OES_blend_func_separate) return; + glad_glBlendFuncSeparateOES = (PFNGLBLENDFUNCSEPARATEOESPROC)load("glBlendFuncSeparateOES"); +} +static void load_GL_OES_blend_subtract(GLADloadproc load) { + if(!GLAD_GL_OES_blend_subtract) return; + glad_glBlendEquationOES = (PFNGLBLENDEQUATIONOESPROC)load("glBlendEquationOES"); +} +static void load_GL_OES_draw_texture(GLADloadproc load) { + if(!GLAD_GL_OES_draw_texture) return; + glad_glDrawTexsOES = (PFNGLDRAWTEXSOESPROC)load("glDrawTexsOES"); + glad_glDrawTexiOES = (PFNGLDRAWTEXIOESPROC)load("glDrawTexiOES"); + glad_glDrawTexxOES = (PFNGLDRAWTEXXOESPROC)load("glDrawTexxOES"); + glad_glDrawTexsvOES = (PFNGLDRAWTEXSVOESPROC)load("glDrawTexsvOES"); + glad_glDrawTexivOES = (PFNGLDRAWTEXIVOESPROC)load("glDrawTexivOES"); + glad_glDrawTexxvOES = (PFNGLDRAWTEXXVOESPROC)load("glDrawTexxvOES"); + glad_glDrawTexfOES = (PFNGLDRAWTEXFOESPROC)load("glDrawTexfOES"); + glad_glDrawTexfvOES = (PFNGLDRAWTEXFVOESPROC)load("glDrawTexfvOES"); +} +static void load_GL_OES_framebuffer_object(GLADloadproc load) { + if(!GLAD_GL_OES_framebuffer_object) return; + glad_glIsRenderbufferOES = (PFNGLISRENDERBUFFEROESPROC)load("glIsRenderbufferOES"); + glad_glBindRenderbufferOES = (PFNGLBINDRENDERBUFFEROESPROC)load("glBindRenderbufferOES"); + glad_glDeleteRenderbuffersOES = (PFNGLDELETERENDERBUFFERSOESPROC)load("glDeleteRenderbuffersOES"); + glad_glGenRenderbuffersOES = (PFNGLGENRENDERBUFFERSOESPROC)load("glGenRenderbuffersOES"); + glad_glRenderbufferStorageOES = (PFNGLRENDERBUFFERSTORAGEOESPROC)load("glRenderbufferStorageOES"); + glad_glGetRenderbufferParameterivOES = (PFNGLGETRENDERBUFFERPARAMETERIVOESPROC)load("glGetRenderbufferParameterivOES"); + glad_glIsFramebufferOES = (PFNGLISFRAMEBUFFEROESPROC)load("glIsFramebufferOES"); + glad_glBindFramebufferOES = (PFNGLBINDFRAMEBUFFEROESPROC)load("glBindFramebufferOES"); + glad_glDeleteFramebuffersOES = (PFNGLDELETEFRAMEBUFFERSOESPROC)load("glDeleteFramebuffersOES"); + glad_glGenFramebuffersOES = (PFNGLGENFRAMEBUFFERSOESPROC)load("glGenFramebuffersOES"); + glad_glCheckFramebufferStatusOES = (PFNGLCHECKFRAMEBUFFERSTATUSOESPROC)load("glCheckFramebufferStatusOES"); + glad_glFramebufferRenderbufferOES = (PFNGLFRAMEBUFFERRENDERBUFFEROESPROC)load("glFramebufferRenderbufferOES"); + glad_glFramebufferTexture2DOES = (PFNGLFRAMEBUFFERTEXTURE2DOESPROC)load("glFramebufferTexture2DOES"); + glad_glGetFramebufferAttachmentParameterivOES = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVOESPROC)load("glGetFramebufferAttachmentParameterivOES"); + glad_glGenerateMipmapOES = (PFNGLGENERATEMIPMAPOESPROC)load("glGenerateMipmapOES"); +} +static void load_GL_OES_mapbuffer(GLADloadproc load) { + if(!GLAD_GL_OES_mapbuffer) return; + glad_glMapBufferOES = (PFNGLMAPBUFFEROESPROC)load("glMapBufferOES"); + glad_glUnmapBufferOES = (PFNGLUNMAPBUFFEROESPROC)load("glUnmapBufferOES"); + glad_glGetBufferPointervOES = (PFNGLGETBUFFERPOINTERVOESPROC)load("glGetBufferPointervOES"); +} +static void load_GL_OES_matrix_palette(GLADloadproc load) { + if(!GLAD_GL_OES_matrix_palette) return; + glad_glCurrentPaletteMatrixOES = (PFNGLCURRENTPALETTEMATRIXOESPROC)load("glCurrentPaletteMatrixOES"); + glad_glLoadPaletteFromModelViewMatrixOES = (PFNGLLOADPALETTEFROMMODELVIEWMATRIXOESPROC)load("glLoadPaletteFromModelViewMatrixOES"); + glad_glMatrixIndexPointerOES = (PFNGLMATRIXINDEXPOINTEROESPROC)load("glMatrixIndexPointerOES"); + glad_glWeightPointerOES = (PFNGLWEIGHTPOINTEROESPROC)load("glWeightPointerOES"); +} +static void load_GL_OES_point_size_array(GLADloadproc load) { + if(!GLAD_GL_OES_point_size_array) return; + glad_glPointSizePointerOES = (PFNGLPOINTSIZEPOINTEROESPROC)load("glPointSizePointerOES"); +} +static void load_GL_OES_texture_cube_map(GLADloadproc load) { + if(!GLAD_GL_OES_texture_cube_map) return; + glad_glTexGenfOES = (PFNGLTEXGENFOESPROC)load("glTexGenfOES"); + glad_glTexGenfvOES = (PFNGLTEXGENFVOESPROC)load("glTexGenfvOES"); + glad_glTexGeniOES = (PFNGLTEXGENIOESPROC)load("glTexGeniOES"); + glad_glTexGenivOES = (PFNGLTEXGENIVOESPROC)load("glTexGenivOES"); + glad_glTexGenxOES = (PFNGLTEXGENXOESPROC)load("glTexGenxOES"); + glad_glTexGenxvOES = (PFNGLTEXGENXVOESPROC)load("glTexGenxvOES"); + glad_glGetTexGenfvOES = (PFNGLGETTEXGENFVOESPROC)load("glGetTexGenfvOES"); + glad_glGetTexGenivOES = (PFNGLGETTEXGENIVOESPROC)load("glGetTexGenivOES"); + glad_glGetTexGenxvOES = (PFNGLGETTEXGENXVOESPROC)load("glGetTexGenxvOES"); +} +static void load_GL_OES_vertex_array_object(GLADloadproc load) { + if(!GLAD_GL_OES_vertex_array_object) return; + glad_glBindVertexArrayOES = (PFNGLBINDVERTEXARRAYOESPROC)load("glBindVertexArrayOES"); + glad_glDeleteVertexArraysOES = (PFNGLDELETEVERTEXARRAYSOESPROC)load("glDeleteVertexArraysOES"); + glad_glGenVertexArraysOES = (PFNGLGENVERTEXARRAYSOESPROC)load("glGenVertexArraysOES"); + glad_glIsVertexArrayOES = (PFNGLISVERTEXARRAYOESPROC)load("glIsVertexArrayOES"); +} +static void load_GL_QCOM_driver_control(GLADloadproc load) { + if(!GLAD_GL_QCOM_driver_control) return; + glad_glGetDriverControlsQCOM = (PFNGLGETDRIVERCONTROLSQCOMPROC)load("glGetDriverControlsQCOM"); + glad_glGetDriverControlStringQCOM = (PFNGLGETDRIVERCONTROLSTRINGQCOMPROC)load("glGetDriverControlStringQCOM"); + glad_glEnableDriverControlQCOM = (PFNGLENABLEDRIVERCONTROLQCOMPROC)load("glEnableDriverControlQCOM"); + glad_glDisableDriverControlQCOM = (PFNGLDISABLEDRIVERCONTROLQCOMPROC)load("glDisableDriverControlQCOM"); +} +static void load_GL_QCOM_extended_get(GLADloadproc load) { + if(!GLAD_GL_QCOM_extended_get) return; + glad_glExtGetTexturesQCOM = (PFNGLEXTGETTEXTURESQCOMPROC)load("glExtGetTexturesQCOM"); + glad_glExtGetBuffersQCOM = (PFNGLEXTGETBUFFERSQCOMPROC)load("glExtGetBuffersQCOM"); + glad_glExtGetRenderbuffersQCOM = (PFNGLEXTGETRENDERBUFFERSQCOMPROC)load("glExtGetRenderbuffersQCOM"); + glad_glExtGetFramebuffersQCOM = (PFNGLEXTGETFRAMEBUFFERSQCOMPROC)load("glExtGetFramebuffersQCOM"); + glad_glExtGetTexLevelParameterivQCOM = (PFNGLEXTGETTEXLEVELPARAMETERIVQCOMPROC)load("glExtGetTexLevelParameterivQCOM"); + glad_glExtTexObjectStateOverrideiQCOM = (PFNGLEXTTEXOBJECTSTATEOVERRIDEIQCOMPROC)load("glExtTexObjectStateOverrideiQCOM"); + glad_glExtGetTexSubImageQCOM = (PFNGLEXTGETTEXSUBIMAGEQCOMPROC)load("glExtGetTexSubImageQCOM"); + glad_glExtGetBufferPointervQCOM = (PFNGLEXTGETBUFFERPOINTERVQCOMPROC)load("glExtGetBufferPointervQCOM"); +} +static void load_GL_QCOM_extended_get2(GLADloadproc load) { + if(!GLAD_GL_QCOM_extended_get2) return; + glad_glExtGetShadersQCOM = (PFNGLEXTGETSHADERSQCOMPROC)load("glExtGetShadersQCOM"); + glad_glExtGetProgramsQCOM = (PFNGLEXTGETPROGRAMSQCOMPROC)load("glExtGetProgramsQCOM"); + glad_glExtIsProgramBinaryQCOM = (PFNGLEXTISPROGRAMBINARYQCOMPROC)load("glExtIsProgramBinaryQCOM"); + glad_glExtGetProgramBinarySourceQCOM = (PFNGLEXTGETPROGRAMBINARYSOURCEQCOMPROC)load("glExtGetProgramBinarySourceQCOM"); +} +static void load_GL_QCOM_tiled_rendering(GLADloadproc load) { + if(!GLAD_GL_QCOM_tiled_rendering) return; + glad_glStartTilingQCOM = (PFNGLSTARTTILINGQCOMPROC)load("glStartTilingQCOM"); + glad_glEndTilingQCOM = (PFNGLENDTILINGQCOMPROC)load("glEndTilingQCOM"); +} +static int find_extensionsGLES1(void) { + if (!get_exts()) return 0; + GLAD_GL_AMD_compressed_3DC_texture = has_ext("GL_AMD_compressed_3DC_texture"); + GLAD_GL_AMD_compressed_ATC_texture = has_ext("GL_AMD_compressed_ATC_texture"); + GLAD_GL_APPLE_copy_texture_levels = has_ext("GL_APPLE_copy_texture_levels"); + GLAD_GL_APPLE_framebuffer_multisample = has_ext("GL_APPLE_framebuffer_multisample"); + GLAD_GL_APPLE_sync = has_ext("GL_APPLE_sync"); + GLAD_GL_APPLE_texture_2D_limited_npot = has_ext("GL_APPLE_texture_2D_limited_npot"); + GLAD_GL_APPLE_texture_format_BGRA8888 = has_ext("GL_APPLE_texture_format_BGRA8888"); + GLAD_GL_APPLE_texture_max_level = has_ext("GL_APPLE_texture_max_level"); + GLAD_GL_ARM_rgba8 = has_ext("GL_ARM_rgba8"); + GLAD_GL_EXT_blend_minmax = has_ext("GL_EXT_blend_minmax"); + GLAD_GL_EXT_debug_marker = has_ext("GL_EXT_debug_marker"); + GLAD_GL_EXT_discard_framebuffer = has_ext("GL_EXT_discard_framebuffer"); + GLAD_GL_EXT_map_buffer_range = has_ext("GL_EXT_map_buffer_range"); + GLAD_GL_EXT_multi_draw_arrays = has_ext("GL_EXT_multi_draw_arrays"); + GLAD_GL_EXT_multisampled_render_to_texture = has_ext("GL_EXT_multisampled_render_to_texture"); + GLAD_GL_EXT_read_format_bgra = has_ext("GL_EXT_read_format_bgra"); + GLAD_GL_EXT_robustness = has_ext("GL_EXT_robustness"); + GLAD_GL_EXT_sRGB = has_ext("GL_EXT_sRGB"); + GLAD_GL_EXT_texture_compression_dxt1 = has_ext("GL_EXT_texture_compression_dxt1"); + GLAD_GL_EXT_texture_filter_anisotropic = has_ext("GL_EXT_texture_filter_anisotropic"); + GLAD_GL_EXT_texture_format_BGRA8888 = has_ext("GL_EXT_texture_format_BGRA8888"); + GLAD_GL_EXT_texture_lod_bias = has_ext("GL_EXT_texture_lod_bias"); + GLAD_GL_EXT_texture_storage = has_ext("GL_EXT_texture_storage"); + GLAD_GL_IMG_multisampled_render_to_texture = has_ext("GL_IMG_multisampled_render_to_texture"); + GLAD_GL_IMG_read_format = has_ext("GL_IMG_read_format"); + GLAD_GL_IMG_texture_compression_pvrtc = has_ext("GL_IMG_texture_compression_pvrtc"); + GLAD_GL_IMG_texture_env_enhanced_fixed_function = has_ext("GL_IMG_texture_env_enhanced_fixed_function"); + GLAD_GL_IMG_user_clip_plane = has_ext("GL_IMG_user_clip_plane"); + GLAD_GL_KHR_debug = has_ext("GL_KHR_debug"); + GLAD_GL_NV_fence = has_ext("GL_NV_fence"); + GLAD_GL_OES_EGL_image = has_ext("GL_OES_EGL_image"); + GLAD_GL_OES_EGL_image_external = has_ext("GL_OES_EGL_image_external"); + GLAD_GL_OES_blend_equation_separate = has_ext("GL_OES_blend_equation_separate"); + GLAD_GL_OES_blend_func_separate = has_ext("GL_OES_blend_func_separate"); + GLAD_GL_OES_blend_subtract = has_ext("GL_OES_blend_subtract"); + GLAD_GL_OES_byte_coordinates = has_ext("GL_OES_byte_coordinates"); + GLAD_GL_OES_compressed_ETC1_RGB8_sub_texture = has_ext("GL_OES_compressed_ETC1_RGB8_sub_texture"); + GLAD_GL_OES_compressed_ETC1_RGB8_texture = has_ext("GL_OES_compressed_ETC1_RGB8_texture"); + GLAD_GL_OES_compressed_paletted_texture = has_ext("GL_OES_compressed_paletted_texture"); + GLAD_GL_OES_depth24 = has_ext("GL_OES_depth24"); + GLAD_GL_OES_depth32 = has_ext("GL_OES_depth32"); + GLAD_GL_OES_draw_texture = has_ext("GL_OES_draw_texture"); + GLAD_GL_OES_element_index_uint = has_ext("GL_OES_element_index_uint"); + GLAD_GL_OES_extended_matrix_palette = has_ext("GL_OES_extended_matrix_palette"); + GLAD_GL_OES_fbo_render_mipmap = has_ext("GL_OES_fbo_render_mipmap"); + GLAD_GL_OES_fixed_point = has_ext("GL_OES_fixed_point"); + GLAD_GL_OES_framebuffer_object = has_ext("GL_OES_framebuffer_object"); + GLAD_GL_OES_mapbuffer = has_ext("GL_OES_mapbuffer"); + GLAD_GL_OES_matrix_get = has_ext("GL_OES_matrix_get"); + GLAD_GL_OES_matrix_palette = has_ext("GL_OES_matrix_palette"); + GLAD_GL_OES_packed_depth_stencil = has_ext("GL_OES_packed_depth_stencil"); + GLAD_GL_OES_point_size_array = has_ext("GL_OES_point_size_array"); + GLAD_GL_OES_point_sprite = has_ext("GL_OES_point_sprite"); + GLAD_GL_OES_query_matrix = has_ext("GL_OES_query_matrix"); + GLAD_GL_OES_read_format = has_ext("GL_OES_read_format"); + GLAD_GL_OES_required_internalformat = has_ext("GL_OES_required_internalformat"); + GLAD_GL_OES_rgb8_rgba8 = has_ext("GL_OES_rgb8_rgba8"); + GLAD_GL_OES_single_precision = has_ext("GL_OES_single_precision"); + GLAD_GL_OES_stencil1 = has_ext("GL_OES_stencil1"); + GLAD_GL_OES_stencil4 = has_ext("GL_OES_stencil4"); + GLAD_GL_OES_stencil8 = has_ext("GL_OES_stencil8"); + GLAD_GL_OES_stencil_wrap = has_ext("GL_OES_stencil_wrap"); + GLAD_GL_OES_surfaceless_context = has_ext("GL_OES_surfaceless_context"); + GLAD_GL_OES_texture_cube_map = has_ext("GL_OES_texture_cube_map"); + GLAD_GL_OES_texture_env_crossbar = has_ext("GL_OES_texture_env_crossbar"); + GLAD_GL_OES_texture_mirrored_repeat = has_ext("GL_OES_texture_mirrored_repeat"); + GLAD_GL_OES_texture_npot = has_ext("GL_OES_texture_npot"); + GLAD_GL_OES_vertex_array_object = has_ext("GL_OES_vertex_array_object"); + GLAD_GL_QCOM_driver_control = has_ext("GL_QCOM_driver_control"); + GLAD_GL_QCOM_extended_get = has_ext("GL_QCOM_extended_get"); + GLAD_GL_QCOM_extended_get2 = has_ext("GL_QCOM_extended_get2"); + GLAD_GL_QCOM_perfmon_global_mode = has_ext("GL_QCOM_perfmon_global_mode"); + GLAD_GL_QCOM_tiled_rendering = has_ext("GL_QCOM_tiled_rendering"); + GLAD_GL_QCOM_writeonly_rendering = has_ext("GL_QCOM_writeonly_rendering"); + free_exts(); + return 1; +} + +static void find_coreGLES1(void) { + + /* Thank you @elmindreda + * https://github.com/elmindreda/greg/blob/master/templates/greg.c.in#L176 + * https://github.com/glfw/glfw/blob/master/src/context.c#L36 + */ + int i, major, minor; + + const char* version; + const char* prefixes[] = { + "OpenGL ES-CM ", + "OpenGL ES-CL ", + "OpenGL ES ", + NULL + }; + + version = (const char*) glGetString(GL_VERSION); + if (!version) return; + + for (i = 0; prefixes[i]; i++) { + const size_t length = strlen(prefixes[i]); + if (strncmp(version, prefixes[i], length) == 0) { + version += length; + break; + } + } + +/* PR #18 */ +#ifdef _MSC_VER + sscanf_s(version, "%d.%d", &major, &minor); +#else + sscanf(version, "%d.%d", &major, &minor); +#endif + + GLVersion.major = major; GLVersion.minor = minor; + max_loaded_major = major; max_loaded_minor = minor; + GLAD_GL_VERSION_ES_CM_1_0 = (major == 1 && minor >= 0) || major > 1; + if (GLVersion.major > 1 || (GLVersion.major >= 1 && GLVersion.minor >= 0)) { + max_loaded_major = 1; + max_loaded_minor = 0; + } +} + +int gladLoadGLES1Loader(GLADloadproc load) { + GLVersion.major = 0; GLVersion.minor = 0; + glGetString = (PFNGLGETSTRINGPROC)load("glGetString"); + if(glGetString == NULL) return 0; + if(glGetString(GL_VERSION) == NULL) return 0; + find_coreGLES1(); + load_GL_VERSION_ES_CM_1_0(load); + + if (!find_extensionsGLES1()) return 0; + load_GL_APPLE_copy_texture_levels(load); + load_GL_APPLE_framebuffer_multisample(load); + load_GL_APPLE_sync(load); + load_GL_EXT_blend_minmax(load); + load_GL_EXT_debug_marker(load); + load_GL_EXT_discard_framebuffer(load); + load_GL_EXT_map_buffer_range(load); + load_GL_EXT_multi_draw_arrays(load); + load_GL_EXT_multisampled_render_to_texture(load); + load_GL_EXT_robustness(load); + load_GL_EXT_texture_storage(load); + load_GL_IMG_multisampled_render_to_texture(load); + load_GL_IMG_user_clip_plane(load); + load_GL_KHR_debug(load); + load_GL_NV_fence(load); + load_GL_OES_EGL_image(load); + load_GL_OES_EGL_image_external(load); + load_GL_OES_blend_equation_separate(load); + load_GL_OES_blend_func_separate(load); + load_GL_OES_blend_subtract(load); + load_GL_OES_byte_coordinates(load); + load_GL_OES_draw_texture(load); + load_GL_OES_fixed_point(load); + load_GL_OES_framebuffer_object(load); + load_GL_OES_mapbuffer(load); + load_GL_OES_matrix_palette(load); + load_GL_OES_point_size_array(load); + load_GL_OES_query_matrix(load); + load_GL_OES_single_precision(load); + load_GL_OES_texture_cube_map(load); + load_GL_OES_vertex_array_object(load); + load_GL_QCOM_driver_control(load); + load_GL_QCOM_extended_get(load); + load_GL_QCOM_extended_get2(load); + load_GL_QCOM_tiled_rendering(load); + return GLVersion.major != 0 || GLVersion.minor != 0; +} + static void load_GL_ES_VERSION_2_0(GLADloadproc load) { if(!GLAD_GL_ES_VERSION_2_0) return; glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC)load("glActiveTexture"); @@ -2063,25 +11773,6 @@ static void load_GL_ES_VERSION_3_2(GLADloadproc load) { glad_glTexBufferRange = (PFNGLTEXBUFFERRANGEPROC)load("glTexBufferRange"); glad_glTexStorage3DMultisample = (PFNGLTEXSTORAGE3DMULTISAMPLEPROC)load("glTexStorage3DMultisample"); } -static void load_GL_AMD_framebuffer_multisample_advanced(GLADloadproc load) { - if(!GLAD_GL_AMD_framebuffer_multisample_advanced) return; - glad_glRenderbufferStorageMultisampleAdvancedAMD = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC)load("glRenderbufferStorageMultisampleAdvancedAMD"); - glad_glNamedRenderbufferStorageMultisampleAdvancedAMD = (PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC)load("glNamedRenderbufferStorageMultisampleAdvancedAMD"); -} -static void load_GL_AMD_performance_monitor(GLADloadproc load) { - if(!GLAD_GL_AMD_performance_monitor) return; - glad_glGetPerfMonitorGroupsAMD = (PFNGLGETPERFMONITORGROUPSAMDPROC)load("glGetPerfMonitorGroupsAMD"); - glad_glGetPerfMonitorCountersAMD = (PFNGLGETPERFMONITORCOUNTERSAMDPROC)load("glGetPerfMonitorCountersAMD"); - glad_glGetPerfMonitorGroupStringAMD = (PFNGLGETPERFMONITORGROUPSTRINGAMDPROC)load("glGetPerfMonitorGroupStringAMD"); - glad_glGetPerfMonitorCounterStringAMD = (PFNGLGETPERFMONITORCOUNTERSTRINGAMDPROC)load("glGetPerfMonitorCounterStringAMD"); - glad_glGetPerfMonitorCounterInfoAMD = (PFNGLGETPERFMONITORCOUNTERINFOAMDPROC)load("glGetPerfMonitorCounterInfoAMD"); - glad_glGenPerfMonitorsAMD = (PFNGLGENPERFMONITORSAMDPROC)load("glGenPerfMonitorsAMD"); - glad_glDeletePerfMonitorsAMD = (PFNGLDELETEPERFMONITORSAMDPROC)load("glDeletePerfMonitorsAMD"); - glad_glSelectPerfMonitorCountersAMD = (PFNGLSELECTPERFMONITORCOUNTERSAMDPROC)load("glSelectPerfMonitorCountersAMD"); - glad_glBeginPerfMonitorAMD = (PFNGLBEGINPERFMONITORAMDPROC)load("glBeginPerfMonitorAMD"); - glad_glEndPerfMonitorAMD = (PFNGLENDPERFMONITORAMDPROC)load("glEndPerfMonitorAMD"); - glad_glGetPerfMonitorCounterDataAMD = (PFNGLGETPERFMONITORCOUNTERDATAAMDPROC)load("glGetPerfMonitorCounterDataAMD"); -} static void load_GL_ANGLE_framebuffer_blit(GLADloadproc load) { if(!GLAD_GL_ANGLE_framebuffer_blit) return; glad_glBlitFramebufferANGLE = (PFNGLBLITFRAMEBUFFERANGLEPROC)load("glBlitFramebufferANGLE"); @@ -2100,34 +11791,10 @@ static void load_GL_ANGLE_translated_shader_source(GLADloadproc load) { if(!GLAD_GL_ANGLE_translated_shader_source) return; glad_glGetTranslatedShaderSourceANGLE = (PFNGLGETTRANSLATEDSHADERSOURCEANGLEPROC)load("glGetTranslatedShaderSourceANGLE"); } -static void load_GL_APPLE_copy_texture_levels(GLADloadproc load) { - if(!GLAD_GL_APPLE_copy_texture_levels) return; - glad_glCopyTextureLevelsAPPLE = (PFNGLCOPYTEXTURELEVELSAPPLEPROC)load("glCopyTextureLevelsAPPLE"); -} -static void load_GL_APPLE_framebuffer_multisample(GLADloadproc load) { - if(!GLAD_GL_APPLE_framebuffer_multisample) return; - glad_glRenderbufferStorageMultisampleAPPLE = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEAPPLEPROC)load("glRenderbufferStorageMultisampleAPPLE"); - glad_glResolveMultisampleFramebufferAPPLE = (PFNGLRESOLVEMULTISAMPLEFRAMEBUFFERAPPLEPROC)load("glResolveMultisampleFramebufferAPPLE"); -} -static void load_GL_APPLE_sync(GLADloadproc load) { - if(!GLAD_GL_APPLE_sync) return; - glad_glFenceSyncAPPLE = (PFNGLFENCESYNCAPPLEPROC)load("glFenceSyncAPPLE"); - glad_glIsSyncAPPLE = (PFNGLISSYNCAPPLEPROC)load("glIsSyncAPPLE"); - glad_glDeleteSyncAPPLE = (PFNGLDELETESYNCAPPLEPROC)load("glDeleteSyncAPPLE"); - glad_glClientWaitSyncAPPLE = (PFNGLCLIENTWAITSYNCAPPLEPROC)load("glClientWaitSyncAPPLE"); - glad_glWaitSyncAPPLE = (PFNGLWAITSYNCAPPLEPROC)load("glWaitSyncAPPLE"); - glad_glGetInteger64vAPPLE = (PFNGLGETINTEGER64VAPPLEPROC)load("glGetInteger64vAPPLE"); - glad_glGetSyncivAPPLE = (PFNGLGETSYNCIVAPPLEPROC)load("glGetSyncivAPPLE"); -} static void load_GL_ARM_shader_core_properties(GLADloadproc load) { if(!GLAD_GL_ARM_shader_core_properties) return; glad_glMaxActiveShaderCoresARM = (PFNGLMAXACTIVESHADERCORESARMPROC)load("glMaxActiveShaderCoresARM"); } -static void load_GL_EXT_EGL_image_storage(GLADloadproc load) { - if(!GLAD_GL_EXT_EGL_image_storage) return; - glad_glEGLImageTargetTexStorageEXT = (PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC)load("glEGLImageTargetTexStorageEXT"); - glad_glEGLImageTargetTextureStorageEXT = (PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC)load("glEGLImageTargetTextureStorageEXT"); -} static void load_GL_EXT_base_instance(GLADloadproc load) { if(!GLAD_GL_EXT_base_instance) return; glad_glDrawArraysInstancedBaseInstanceEXT = (PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEEXTPROC)load("glDrawArraysInstancedBaseInstanceEXT"); @@ -2141,10 +11808,6 @@ static void load_GL_EXT_blend_func_extended(GLADloadproc load) { glad_glGetProgramResourceLocationIndexEXT = (PFNGLGETPROGRAMRESOURCELOCATIONINDEXEXTPROC)load("glGetProgramResourceLocationIndexEXT"); glad_glGetFragDataIndexEXT = (PFNGLGETFRAGDATAINDEXEXTPROC)load("glGetFragDataIndexEXT"); } -static void load_GL_EXT_blend_minmax(GLADloadproc load) { - if(!GLAD_GL_EXT_blend_minmax) return; - glad_glBlendEquationEXT = (PFNGLBLENDEQUATIONEXTPROC)load("glBlendEquationEXT"); -} static void load_GL_EXT_buffer_storage(GLADloadproc load) { if(!GLAD_GL_EXT_buffer_storage) return; glad_glBufferStorageEXT = (PFNGLBUFFERSTORAGEEXTPROC)load("glBufferStorageEXT"); @@ -2162,21 +11825,6 @@ static void load_GL_EXT_copy_image(GLADloadproc load) { if(!GLAD_GL_EXT_copy_image) return; glad_glCopyImageSubDataEXT = (PFNGLCOPYIMAGESUBDATAEXTPROC)load("glCopyImageSubDataEXT"); } -static void load_GL_EXT_debug_label(GLADloadproc load) { - if(!GLAD_GL_EXT_debug_label) return; - glad_glLabelObjectEXT = (PFNGLLABELOBJECTEXTPROC)load("glLabelObjectEXT"); - glad_glGetObjectLabelEXT = (PFNGLGETOBJECTLABELEXTPROC)load("glGetObjectLabelEXT"); -} -static void load_GL_EXT_debug_marker(GLADloadproc load) { - if(!GLAD_GL_EXT_debug_marker) return; - glad_glInsertEventMarkerEXT = (PFNGLINSERTEVENTMARKEREXTPROC)load("glInsertEventMarkerEXT"); - glad_glPushGroupMarkerEXT = (PFNGLPUSHGROUPMARKEREXTPROC)load("glPushGroupMarkerEXT"); - glad_glPopGroupMarkerEXT = (PFNGLPOPGROUPMARKEREXTPROC)load("glPopGroupMarkerEXT"); -} -static void load_GL_EXT_discard_framebuffer(GLADloadproc load) { - if(!GLAD_GL_EXT_discard_framebuffer) return; - glad_glDiscardFramebufferEXT = (PFNGLDISCARDFRAMEBUFFEREXTPROC)load("glDiscardFramebufferEXT"); -} static void load_GL_EXT_disjoint_timer_query(GLADloadproc load) { if(!GLAD_GL_EXT_disjoint_timer_query) return; glad_glGenQueriesEXT = (PFNGLGENQUERIESEXTPROC)load("glGenQueriesEXT"); @@ -2214,33 +11862,11 @@ static void load_GL_EXT_draw_elements_base_vertex(GLADloadproc load) { glad_glDrawElementsInstancedBaseVertexEXT = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXEXTPROC)load("glDrawElementsInstancedBaseVertexEXT"); glad_glMultiDrawElementsBaseVertexEXT = (PFNGLMULTIDRAWELEMENTSBASEVERTEXEXTPROC)load("glMultiDrawElementsBaseVertexEXT"); } -static void load_GL_EXT_draw_instanced(GLADloadproc load) { - if(!GLAD_GL_EXT_draw_instanced) return; - glad_glDrawArraysInstancedEXT = (PFNGLDRAWARRAYSINSTANCEDEXTPROC)load("glDrawArraysInstancedEXT"); - glad_glDrawElementsInstancedEXT = (PFNGLDRAWELEMENTSINSTANCEDEXTPROC)load("glDrawElementsInstancedEXT"); -} static void load_GL_EXT_draw_transform_feedback(GLADloadproc load) { if(!GLAD_GL_EXT_draw_transform_feedback) return; glad_glDrawTransformFeedbackEXT = (PFNGLDRAWTRANSFORMFEEDBACKEXTPROC)load("glDrawTransformFeedbackEXT"); glad_glDrawTransformFeedbackInstancedEXT = (PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDEXTPROC)load("glDrawTransformFeedbackInstancedEXT"); } -static void load_GL_EXT_external_buffer(GLADloadproc load) { - if(!GLAD_GL_EXT_external_buffer) return; - glad_glBufferStorageExternalEXT = (PFNGLBUFFERSTORAGEEXTERNALEXTPROC)load("glBufferStorageExternalEXT"); - glad_glNamedBufferStorageExternalEXT = (PFNGLNAMEDBUFFERSTORAGEEXTERNALEXTPROC)load("glNamedBufferStorageExternalEXT"); -} -static void load_GL_EXT_fragment_shading_rate(GLADloadproc load) { - if(!GLAD_GL_EXT_fragment_shading_rate) return; - glad_glGetFragmentShadingRatesEXT = (PFNGLGETFRAGMENTSHADINGRATESEXTPROC)load("glGetFragmentShadingRatesEXT"); - glad_glShadingRateEXT = (PFNGLSHADINGRATEEXTPROC)load("glShadingRateEXT"); - glad_glShadingRateCombinerOpsEXT = (PFNGLSHADINGRATECOMBINEROPSEXTPROC)load("glShadingRateCombinerOpsEXT"); - glad_glFramebufferShadingRateEXT = (PFNGLFRAMEBUFFERSHADINGRATEEXTPROC)load("glFramebufferShadingRateEXT"); -} -static void load_GL_EXT_framebuffer_blit_layers(GLADloadproc load) { - if(!GLAD_GL_EXT_framebuffer_blit_layers) return; - glad_glBlitFramebufferLayersEXT = (PFNGLBLITFRAMEBUFFERLAYERSEXTPROC)load("glBlitFramebufferLayersEXT"); - glad_glBlitFramebufferLayerEXT = (PFNGLBLITFRAMEBUFFERLAYEREXTPROC)load("glBlitFramebufferLayerEXT"); -} static void load_GL_EXT_geometry_shader(GLADloadproc load) { if(!GLAD_GL_EXT_geometry_shader) return; glad_glFramebufferTextureEXT = (PFNGLFRAMEBUFFERTEXTUREEXTPROC)load("glFramebufferTextureEXT"); @@ -2251,64 +11877,11 @@ static void load_GL_EXT_instanced_arrays(GLADloadproc load) { glad_glDrawElementsInstancedEXT = (PFNGLDRAWELEMENTSINSTANCEDEXTPROC)load("glDrawElementsInstancedEXT"); glad_glVertexAttribDivisorEXT = (PFNGLVERTEXATTRIBDIVISOREXTPROC)load("glVertexAttribDivisorEXT"); } -static void load_GL_EXT_map_buffer_range(GLADloadproc load) { - if(!GLAD_GL_EXT_map_buffer_range) return; - glad_glMapBufferRangeEXT = (PFNGLMAPBUFFERRANGEEXTPROC)load("glMapBufferRangeEXT"); - glad_glFlushMappedBufferRangeEXT = (PFNGLFLUSHMAPPEDBUFFERRANGEEXTPROC)load("glFlushMappedBufferRangeEXT"); -} -static void load_GL_EXT_memory_object(GLADloadproc load) { - if(!GLAD_GL_EXT_memory_object) return; - glad_glGetUnsignedBytevEXT = (PFNGLGETUNSIGNEDBYTEVEXTPROC)load("glGetUnsignedBytevEXT"); - glad_glGetUnsignedBytei_vEXT = (PFNGLGETUNSIGNEDBYTEI_VEXTPROC)load("glGetUnsignedBytei_vEXT"); - glad_glDeleteMemoryObjectsEXT = (PFNGLDELETEMEMORYOBJECTSEXTPROC)load("glDeleteMemoryObjectsEXT"); - glad_glIsMemoryObjectEXT = (PFNGLISMEMORYOBJECTEXTPROC)load("glIsMemoryObjectEXT"); - glad_glCreateMemoryObjectsEXT = (PFNGLCREATEMEMORYOBJECTSEXTPROC)load("glCreateMemoryObjectsEXT"); - glad_glMemoryObjectParameterivEXT = (PFNGLMEMORYOBJECTPARAMETERIVEXTPROC)load("glMemoryObjectParameterivEXT"); - glad_glGetMemoryObjectParameterivEXT = (PFNGLGETMEMORYOBJECTPARAMETERIVEXTPROC)load("glGetMemoryObjectParameterivEXT"); - glad_glTexStorageMem2DEXT = (PFNGLTEXSTORAGEMEM2DEXTPROC)load("glTexStorageMem2DEXT"); - glad_glTexStorageMem2DMultisampleEXT = (PFNGLTEXSTORAGEMEM2DMULTISAMPLEEXTPROC)load("glTexStorageMem2DMultisampleEXT"); - glad_glTexStorageMem3DEXT = (PFNGLTEXSTORAGEMEM3DEXTPROC)load("glTexStorageMem3DEXT"); - glad_glTexStorageMem3DMultisampleEXT = (PFNGLTEXSTORAGEMEM3DMULTISAMPLEEXTPROC)load("glTexStorageMem3DMultisampleEXT"); - glad_glBufferStorageMemEXT = (PFNGLBUFFERSTORAGEMEMEXTPROC)load("glBufferStorageMemEXT"); - glad_glTextureStorageMem2DEXT = (PFNGLTEXTURESTORAGEMEM2DEXTPROC)load("glTextureStorageMem2DEXT"); - glad_glTextureStorageMem2DMultisampleEXT = (PFNGLTEXTURESTORAGEMEM2DMULTISAMPLEEXTPROC)load("glTextureStorageMem2DMultisampleEXT"); - glad_glTextureStorageMem3DEXT = (PFNGLTEXTURESTORAGEMEM3DEXTPROC)load("glTextureStorageMem3DEXT"); - glad_glTextureStorageMem3DMultisampleEXT = (PFNGLTEXTURESTORAGEMEM3DMULTISAMPLEEXTPROC)load("glTextureStorageMem3DMultisampleEXT"); - glad_glNamedBufferStorageMemEXT = (PFNGLNAMEDBUFFERSTORAGEMEMEXTPROC)load("glNamedBufferStorageMemEXT"); - glad_glTexStorageMem1DEXT = (PFNGLTEXSTORAGEMEM1DEXTPROC)load("glTexStorageMem1DEXT"); - glad_glTextureStorageMem1DEXT = (PFNGLTEXTURESTORAGEMEM1DEXTPROC)load("glTextureStorageMem1DEXT"); -} -static void load_GL_EXT_memory_object_fd(GLADloadproc load) { - if(!GLAD_GL_EXT_memory_object_fd) return; - glad_glImportMemoryFdEXT = (PFNGLIMPORTMEMORYFDEXTPROC)load("glImportMemoryFdEXT"); -} -static void load_GL_EXT_memory_object_win32(GLADloadproc load) { - if(!GLAD_GL_EXT_memory_object_win32) return; - glad_glImportMemoryWin32HandleEXT = (PFNGLIMPORTMEMORYWIN32HANDLEEXTPROC)load("glImportMemoryWin32HandleEXT"); - glad_glImportMemoryWin32NameEXT = (PFNGLIMPORTMEMORYWIN32NAMEEXTPROC)load("glImportMemoryWin32NameEXT"); -} -static void load_GL_EXT_mesh_shader(GLADloadproc load) { - if(!GLAD_GL_EXT_mesh_shader) return; - glad_glDrawMeshTasksEXT = (PFNGLDRAWMESHTASKSEXTPROC)load("glDrawMeshTasksEXT"); - glad_glDrawMeshTasksIndirectEXT = (PFNGLDRAWMESHTASKSINDIRECTEXTPROC)load("glDrawMeshTasksIndirectEXT"); - glad_glMultiDrawMeshTasksIndirectEXT = (PFNGLMULTIDRAWMESHTASKSINDIRECTEXTPROC)load("glMultiDrawMeshTasksIndirectEXT"); - glad_glMultiDrawMeshTasksIndirectCountEXT = (PFNGLMULTIDRAWMESHTASKSINDIRECTCOUNTEXTPROC)load("glMultiDrawMeshTasksIndirectCountEXT"); -} -static void load_GL_EXT_multi_draw_arrays(GLADloadproc load) { - if(!GLAD_GL_EXT_multi_draw_arrays) return; - glad_glMultiDrawArraysEXT = (PFNGLMULTIDRAWARRAYSEXTPROC)load("glMultiDrawArraysEXT"); - glad_glMultiDrawElementsEXT = (PFNGLMULTIDRAWELEMENTSEXTPROC)load("glMultiDrawElementsEXT"); -} static void load_GL_EXT_multi_draw_indirect(GLADloadproc load) { if(!GLAD_GL_EXT_multi_draw_indirect) return; glad_glMultiDrawArraysIndirectEXT = (PFNGLMULTIDRAWARRAYSINDIRECTEXTPROC)load("glMultiDrawArraysIndirectEXT"); glad_glMultiDrawElementsIndirectEXT = (PFNGLMULTIDRAWELEMENTSINDIRECTEXTPROC)load("glMultiDrawElementsIndirectEXT"); } -static void load_GL_EXT_multisampled_render_to_texture(GLADloadproc load) { - if(!GLAD_GL_EXT_multisampled_render_to_texture) return; - glad_glRenderbufferStorageMultisampleEXT = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC)load("glRenderbufferStorageMultisampleEXT"); - glad_glFramebufferTexture2DMultisampleEXT = (PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC)load("glFramebufferTexture2DMultisampleEXT"); -} static void load_GL_EXT_multiview_draw_buffers(GLADloadproc load) { if(!GLAD_GL_EXT_multiview_draw_buffers) return; glad_glReadBufferIndexedEXT = (PFNGLREADBUFFERINDEXEDEXTPROC)load("glReadBufferIndexedEXT"); @@ -2325,100 +11898,10 @@ static void load_GL_EXT_occlusion_query_boolean(GLADloadproc load) { glad_glGetQueryivEXT = (PFNGLGETQUERYIVEXTPROC)load("glGetQueryivEXT"); glad_glGetQueryObjectuivEXT = (PFNGLGETQUERYOBJECTUIVEXTPROC)load("glGetQueryObjectuivEXT"); } -static void load_GL_EXT_polygon_offset_clamp(GLADloadproc load) { - if(!GLAD_GL_EXT_polygon_offset_clamp) return; - glad_glPolygonOffsetClampEXT = (PFNGLPOLYGONOFFSETCLAMPEXTPROC)load("glPolygonOffsetClampEXT"); -} static void load_GL_EXT_primitive_bounding_box(GLADloadproc load) { if(!GLAD_GL_EXT_primitive_bounding_box) return; glad_glPrimitiveBoundingBoxEXT = (PFNGLPRIMITIVEBOUNDINGBOXEXTPROC)load("glPrimitiveBoundingBoxEXT"); } -static void load_GL_EXT_raster_multisample(GLADloadproc load) { - if(!GLAD_GL_EXT_raster_multisample) return; - glad_glRasterSamplesEXT = (PFNGLRASTERSAMPLESEXTPROC)load("glRasterSamplesEXT"); -} -static void load_GL_EXT_robustness(GLADloadproc load) { - if(!GLAD_GL_EXT_robustness) return; - glad_glGetGraphicsResetStatusEXT = (PFNGLGETGRAPHICSRESETSTATUSEXTPROC)load("glGetGraphicsResetStatusEXT"); - glad_glReadnPixelsEXT = (PFNGLREADNPIXELSEXTPROC)load("glReadnPixelsEXT"); - glad_glGetnUniformfvEXT = (PFNGLGETNUNIFORMFVEXTPROC)load("glGetnUniformfvEXT"); - glad_glGetnUniformivEXT = (PFNGLGETNUNIFORMIVEXTPROC)load("glGetnUniformivEXT"); -} -static void load_GL_EXT_semaphore(GLADloadproc load) { - if(!GLAD_GL_EXT_semaphore) return; - glad_glGetUnsignedBytevEXT = (PFNGLGETUNSIGNEDBYTEVEXTPROC)load("glGetUnsignedBytevEXT"); - glad_glGetUnsignedBytei_vEXT = (PFNGLGETUNSIGNEDBYTEI_VEXTPROC)load("glGetUnsignedBytei_vEXT"); - glad_glGenSemaphoresEXT = (PFNGLGENSEMAPHORESEXTPROC)load("glGenSemaphoresEXT"); - glad_glDeleteSemaphoresEXT = (PFNGLDELETESEMAPHORESEXTPROC)load("glDeleteSemaphoresEXT"); - glad_glIsSemaphoreEXT = (PFNGLISSEMAPHOREEXTPROC)load("glIsSemaphoreEXT"); - glad_glSemaphoreParameterui64vEXT = (PFNGLSEMAPHOREPARAMETERUI64VEXTPROC)load("glSemaphoreParameterui64vEXT"); - glad_glGetSemaphoreParameterui64vEXT = (PFNGLGETSEMAPHOREPARAMETERUI64VEXTPROC)load("glGetSemaphoreParameterui64vEXT"); - glad_glWaitSemaphoreEXT = (PFNGLWAITSEMAPHOREEXTPROC)load("glWaitSemaphoreEXT"); - glad_glSignalSemaphoreEXT = (PFNGLSIGNALSEMAPHOREEXTPROC)load("glSignalSemaphoreEXT"); -} -static void load_GL_EXT_semaphore_fd(GLADloadproc load) { - if(!GLAD_GL_EXT_semaphore_fd) return; - glad_glImportSemaphoreFdEXT = (PFNGLIMPORTSEMAPHOREFDEXTPROC)load("glImportSemaphoreFdEXT"); -} -static void load_GL_EXT_semaphore_win32(GLADloadproc load) { - if(!GLAD_GL_EXT_semaphore_win32) return; - glad_glImportSemaphoreWin32HandleEXT = (PFNGLIMPORTSEMAPHOREWIN32HANDLEEXTPROC)load("glImportSemaphoreWin32HandleEXT"); - glad_glImportSemaphoreWin32NameEXT = (PFNGLIMPORTSEMAPHOREWIN32NAMEEXTPROC)load("glImportSemaphoreWin32NameEXT"); -} -static void load_GL_EXT_separate_shader_objects(GLADloadproc load) { - if(!GLAD_GL_EXT_separate_shader_objects) return; - glad_glUseShaderProgramEXT = (PFNGLUSESHADERPROGRAMEXTPROC)load("glUseShaderProgramEXT"); - glad_glActiveProgramEXT = (PFNGLACTIVEPROGRAMEXTPROC)load("glActiveProgramEXT"); - glad_glCreateShaderProgramEXT = (PFNGLCREATESHADERPROGRAMEXTPROC)load("glCreateShaderProgramEXT"); - glad_glActiveShaderProgramEXT = (PFNGLACTIVESHADERPROGRAMEXTPROC)load("glActiveShaderProgramEXT"); - glad_glBindProgramPipelineEXT = (PFNGLBINDPROGRAMPIPELINEEXTPROC)load("glBindProgramPipelineEXT"); - glad_glCreateShaderProgramvEXT = (PFNGLCREATESHADERPROGRAMVEXTPROC)load("glCreateShaderProgramvEXT"); - glad_glDeleteProgramPipelinesEXT = (PFNGLDELETEPROGRAMPIPELINESEXTPROC)load("glDeleteProgramPipelinesEXT"); - glad_glGenProgramPipelinesEXT = (PFNGLGENPROGRAMPIPELINESEXTPROC)load("glGenProgramPipelinesEXT"); - glad_glGetProgramPipelineInfoLogEXT = (PFNGLGETPROGRAMPIPELINEINFOLOGEXTPROC)load("glGetProgramPipelineInfoLogEXT"); - glad_glGetProgramPipelineivEXT = (PFNGLGETPROGRAMPIPELINEIVEXTPROC)load("glGetProgramPipelineivEXT"); - glad_glIsProgramPipelineEXT = (PFNGLISPROGRAMPIPELINEEXTPROC)load("glIsProgramPipelineEXT"); - glad_glProgramParameteriEXT = (PFNGLPROGRAMPARAMETERIEXTPROC)load("glProgramParameteriEXT"); - glad_glProgramUniform1fEXT = (PFNGLPROGRAMUNIFORM1FEXTPROC)load("glProgramUniform1fEXT"); - glad_glProgramUniform1fvEXT = (PFNGLPROGRAMUNIFORM1FVEXTPROC)load("glProgramUniform1fvEXT"); - glad_glProgramUniform1iEXT = (PFNGLPROGRAMUNIFORM1IEXTPROC)load("glProgramUniform1iEXT"); - glad_glProgramUniform1ivEXT = (PFNGLPROGRAMUNIFORM1IVEXTPROC)load("glProgramUniform1ivEXT"); - glad_glProgramUniform2fEXT = (PFNGLPROGRAMUNIFORM2FEXTPROC)load("glProgramUniform2fEXT"); - glad_glProgramUniform2fvEXT = (PFNGLPROGRAMUNIFORM2FVEXTPROC)load("glProgramUniform2fvEXT"); - glad_glProgramUniform2iEXT = (PFNGLPROGRAMUNIFORM2IEXTPROC)load("glProgramUniform2iEXT"); - glad_glProgramUniform2ivEXT = (PFNGLPROGRAMUNIFORM2IVEXTPROC)load("glProgramUniform2ivEXT"); - glad_glProgramUniform3fEXT = (PFNGLPROGRAMUNIFORM3FEXTPROC)load("glProgramUniform3fEXT"); - glad_glProgramUniform3fvEXT = (PFNGLPROGRAMUNIFORM3FVEXTPROC)load("glProgramUniform3fvEXT"); - glad_glProgramUniform3iEXT = (PFNGLPROGRAMUNIFORM3IEXTPROC)load("glProgramUniform3iEXT"); - glad_glProgramUniform3ivEXT = (PFNGLPROGRAMUNIFORM3IVEXTPROC)load("glProgramUniform3ivEXT"); - glad_glProgramUniform4fEXT = (PFNGLPROGRAMUNIFORM4FEXTPROC)load("glProgramUniform4fEXT"); - glad_glProgramUniform4fvEXT = (PFNGLPROGRAMUNIFORM4FVEXTPROC)load("glProgramUniform4fvEXT"); - glad_glProgramUniform4iEXT = (PFNGLPROGRAMUNIFORM4IEXTPROC)load("glProgramUniform4iEXT"); - glad_glProgramUniform4ivEXT = (PFNGLPROGRAMUNIFORM4IVEXTPROC)load("glProgramUniform4ivEXT"); - glad_glProgramUniformMatrix2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC)load("glProgramUniformMatrix2fvEXT"); - glad_glProgramUniformMatrix3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC)load("glProgramUniformMatrix3fvEXT"); - glad_glProgramUniformMatrix4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC)load("glProgramUniformMatrix4fvEXT"); - glad_glUseProgramStagesEXT = (PFNGLUSEPROGRAMSTAGESEXTPROC)load("glUseProgramStagesEXT"); - glad_glValidateProgramPipelineEXT = (PFNGLVALIDATEPROGRAMPIPELINEEXTPROC)load("glValidateProgramPipelineEXT"); - glad_glProgramUniform1uiEXT = (PFNGLPROGRAMUNIFORM1UIEXTPROC)load("glProgramUniform1uiEXT"); - glad_glProgramUniform2uiEXT = (PFNGLPROGRAMUNIFORM2UIEXTPROC)load("glProgramUniform2uiEXT"); - glad_glProgramUniform3uiEXT = (PFNGLPROGRAMUNIFORM3UIEXTPROC)load("glProgramUniform3uiEXT"); - glad_glProgramUniform4uiEXT = (PFNGLPROGRAMUNIFORM4UIEXTPROC)load("glProgramUniform4uiEXT"); - glad_glProgramUniform1uivEXT = (PFNGLPROGRAMUNIFORM1UIVEXTPROC)load("glProgramUniform1uivEXT"); - glad_glProgramUniform2uivEXT = (PFNGLPROGRAMUNIFORM2UIVEXTPROC)load("glProgramUniform2uivEXT"); - glad_glProgramUniform3uivEXT = (PFNGLPROGRAMUNIFORM3UIVEXTPROC)load("glProgramUniform3uivEXT"); - glad_glProgramUniform4uivEXT = (PFNGLPROGRAMUNIFORM4UIVEXTPROC)load("glProgramUniform4uivEXT"); - glad_glProgramUniformMatrix2x3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC)load("glProgramUniformMatrix2x3fvEXT"); - glad_glProgramUniformMatrix3x2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC)load("glProgramUniformMatrix3x2fvEXT"); - glad_glProgramUniformMatrix2x4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC)load("glProgramUniformMatrix2x4fvEXT"); - glad_glProgramUniformMatrix4x2fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC)load("glProgramUniformMatrix4x2fvEXT"); - glad_glProgramUniformMatrix3x4fvEXT = (PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC)load("glProgramUniformMatrix3x4fvEXT"); - glad_glProgramUniformMatrix4x3fvEXT = (PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC)load("glProgramUniformMatrix4x3fvEXT"); -} -static void load_GL_EXT_shader_framebuffer_fetch_non_coherent(GLADloadproc load) { - if(!GLAD_GL_EXT_shader_framebuffer_fetch_non_coherent) return; - glad_glFramebufferFetchBarrierEXT = (PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC)load("glFramebufferFetchBarrierEXT"); -} static void load_GL_EXT_shader_pixel_local_storage2(GLADloadproc load) { if(!GLAD_GL_EXT_shader_pixel_local_storage2) return; glad_glFramebufferPixelLocalStorageSizeEXT = (PFNGLFRAMEBUFFERPIXELLOCALSTORAGESIZEEXTPROC)load("glFramebufferPixelLocalStorageSizeEXT"); @@ -2449,15 +11932,6 @@ static void load_GL_EXT_texture_buffer(GLADloadproc load) { glad_glTexBufferEXT = (PFNGLTEXBUFFEREXTPROC)load("glTexBufferEXT"); glad_glTexBufferRangeEXT = (PFNGLTEXBUFFERRANGEEXTPROC)load("glTexBufferRangeEXT"); } -static void load_GL_EXT_texture_storage(GLADloadproc load) { - if(!GLAD_GL_EXT_texture_storage) return; - glad_glTexStorage1DEXT = (PFNGLTEXSTORAGE1DEXTPROC)load("glTexStorage1DEXT"); - glad_glTexStorage2DEXT = (PFNGLTEXSTORAGE2DEXTPROC)load("glTexStorage2DEXT"); - glad_glTexStorage3DEXT = (PFNGLTEXSTORAGE3DEXTPROC)load("glTexStorage3DEXT"); - glad_glTextureStorage1DEXT = (PFNGLTEXTURESTORAGE1DEXTPROC)load("glTextureStorage1DEXT"); - glad_glTextureStorage2DEXT = (PFNGLTEXTURESTORAGE2DEXTPROC)load("glTextureStorage2DEXT"); - glad_glTextureStorage3DEXT = (PFNGLTEXTURESTORAGE3DEXTPROC)load("glTextureStorage3DEXT"); -} static void load_GL_EXT_texture_storage_compression(GLADloadproc load) { if(!GLAD_GL_EXT_texture_storage_compression) return; glad_glTexStorageAttribs2DEXT = (PFNGLTEXSTORAGEATTRIBS2DEXTPROC)load("glTexStorageAttribs2DEXT"); @@ -2467,15 +11941,6 @@ static void load_GL_EXT_texture_view(GLADloadproc load) { if(!GLAD_GL_EXT_texture_view) return; glad_glTextureViewEXT = (PFNGLTEXTUREVIEWEXTPROC)load("glTextureViewEXT"); } -static void load_GL_EXT_win32_keyed_mutex(GLADloadproc load) { - if(!GLAD_GL_EXT_win32_keyed_mutex) return; - glad_glAcquireKeyedMutexWin32EXT = (PFNGLACQUIREKEYEDMUTEXWIN32EXTPROC)load("glAcquireKeyedMutexWin32EXT"); - glad_glReleaseKeyedMutexWin32EXT = (PFNGLRELEASEKEYEDMUTEXWIN32EXTPROC)load("glReleaseKeyedMutexWin32EXT"); -} -static void load_GL_EXT_window_rectangles(GLADloadproc load) { - if(!GLAD_GL_EXT_window_rectangles) return; - glad_glWindowRectanglesEXT = (PFNGLWINDOWRECTANGLESEXTPROC)load("glWindowRectanglesEXT"); -} static void load_GL_IMG_bindless_texture(GLADloadproc load) { if(!GLAD_GL_IMG_bindless_texture) return; glad_glGetTextureHandleIMG = (PFNGLGETTEXTUREHANDLEIMGPROC)load("glGetTextureHandleIMG"); @@ -2490,79 +11955,6 @@ static void load_GL_IMG_framebuffer_downsample(GLADloadproc load) { glad_glFramebufferTexture2DDownsampleIMG = (PFNGLFRAMEBUFFERTEXTURE2DDOWNSAMPLEIMGPROC)load("glFramebufferTexture2DDownsampleIMG"); glad_glFramebufferTextureLayerDownsampleIMG = (PFNGLFRAMEBUFFERTEXTURELAYERDOWNSAMPLEIMGPROC)load("glFramebufferTextureLayerDownsampleIMG"); } -static void load_GL_IMG_multisampled_render_to_texture(GLADloadproc load) { - if(!GLAD_GL_IMG_multisampled_render_to_texture) return; - glad_glRenderbufferStorageMultisampleIMG = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEIMGPROC)load("glRenderbufferStorageMultisampleIMG"); - glad_glFramebufferTexture2DMultisampleIMG = (PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEIMGPROC)load("glFramebufferTexture2DMultisampleIMG"); -} -static void load_GL_INTEL_framebuffer_CMAA(GLADloadproc load) { - if(!GLAD_GL_INTEL_framebuffer_CMAA) return; - glad_glApplyFramebufferAttachmentCMAAINTEL = (PFNGLAPPLYFRAMEBUFFERATTACHMENTCMAAINTELPROC)load("glApplyFramebufferAttachmentCMAAINTEL"); -} -static void load_GL_INTEL_performance_query(GLADloadproc load) { - if(!GLAD_GL_INTEL_performance_query) return; - glad_glBeginPerfQueryINTEL = (PFNGLBEGINPERFQUERYINTELPROC)load("glBeginPerfQueryINTEL"); - glad_glCreatePerfQueryINTEL = (PFNGLCREATEPERFQUERYINTELPROC)load("glCreatePerfQueryINTEL"); - glad_glDeletePerfQueryINTEL = (PFNGLDELETEPERFQUERYINTELPROC)load("glDeletePerfQueryINTEL"); - glad_glEndPerfQueryINTEL = (PFNGLENDPERFQUERYINTELPROC)load("glEndPerfQueryINTEL"); - glad_glGetFirstPerfQueryIdINTEL = (PFNGLGETFIRSTPERFQUERYIDINTELPROC)load("glGetFirstPerfQueryIdINTEL"); - glad_glGetNextPerfQueryIdINTEL = (PFNGLGETNEXTPERFQUERYIDINTELPROC)load("glGetNextPerfQueryIdINTEL"); - glad_glGetPerfCounterInfoINTEL = (PFNGLGETPERFCOUNTERINFOINTELPROC)load("glGetPerfCounterInfoINTEL"); - glad_glGetPerfQueryDataINTEL = (PFNGLGETPERFQUERYDATAINTELPROC)load("glGetPerfQueryDataINTEL"); - glad_glGetPerfQueryIdByNameINTEL = (PFNGLGETPERFQUERYIDBYNAMEINTELPROC)load("glGetPerfQueryIdByNameINTEL"); - glad_glGetPerfQueryInfoINTEL = (PFNGLGETPERFQUERYINFOINTELPROC)load("glGetPerfQueryInfoINTEL"); -} -static void load_GL_KHR_blend_equation_advanced(GLADloadproc load) { - if(!GLAD_GL_KHR_blend_equation_advanced) return; - glad_glBlendBarrierKHR = (PFNGLBLENDBARRIERKHRPROC)load("glBlendBarrierKHR"); -} -static void load_GL_KHR_debug(GLADloadproc load) { - if(!GLAD_GL_KHR_debug) return; - glad_glDebugMessageControl = (PFNGLDEBUGMESSAGECONTROLPROC)load("glDebugMessageControl"); - glad_glDebugMessageInsert = (PFNGLDEBUGMESSAGEINSERTPROC)load("glDebugMessageInsert"); - glad_glDebugMessageCallback = (PFNGLDEBUGMESSAGECALLBACKPROC)load("glDebugMessageCallback"); - glad_glGetDebugMessageLog = (PFNGLGETDEBUGMESSAGELOGPROC)load("glGetDebugMessageLog"); - glad_glPushDebugGroup = (PFNGLPUSHDEBUGGROUPPROC)load("glPushDebugGroup"); - glad_glPopDebugGroup = (PFNGLPOPDEBUGGROUPPROC)load("glPopDebugGroup"); - glad_glObjectLabel = (PFNGLOBJECTLABELPROC)load("glObjectLabel"); - glad_glGetObjectLabel = (PFNGLGETOBJECTLABELPROC)load("glGetObjectLabel"); - glad_glObjectPtrLabel = (PFNGLOBJECTPTRLABELPROC)load("glObjectPtrLabel"); - glad_glGetObjectPtrLabel = (PFNGLGETOBJECTPTRLABELPROC)load("glGetObjectPtrLabel"); - glad_glGetPointerv = (PFNGLGETPOINTERVPROC)load("glGetPointerv"); - glad_glDebugMessageControlKHR = (PFNGLDEBUGMESSAGECONTROLKHRPROC)load("glDebugMessageControlKHR"); - glad_glDebugMessageInsertKHR = (PFNGLDEBUGMESSAGEINSERTKHRPROC)load("glDebugMessageInsertKHR"); - glad_glDebugMessageCallbackKHR = (PFNGLDEBUGMESSAGECALLBACKKHRPROC)load("glDebugMessageCallbackKHR"); - glad_glGetDebugMessageLogKHR = (PFNGLGETDEBUGMESSAGELOGKHRPROC)load("glGetDebugMessageLogKHR"); - glad_glPushDebugGroupKHR = (PFNGLPUSHDEBUGGROUPKHRPROC)load("glPushDebugGroupKHR"); - glad_glPopDebugGroupKHR = (PFNGLPOPDEBUGGROUPKHRPROC)load("glPopDebugGroupKHR"); - glad_glObjectLabelKHR = (PFNGLOBJECTLABELKHRPROC)load("glObjectLabelKHR"); - glad_glGetObjectLabelKHR = (PFNGLGETOBJECTLABELKHRPROC)load("glGetObjectLabelKHR"); - glad_glObjectPtrLabelKHR = (PFNGLOBJECTPTRLABELKHRPROC)load("glObjectPtrLabelKHR"); - glad_glGetObjectPtrLabelKHR = (PFNGLGETOBJECTPTRLABELKHRPROC)load("glGetObjectPtrLabelKHR"); - glad_glGetPointervKHR = (PFNGLGETPOINTERVKHRPROC)load("glGetPointervKHR"); -} -static void load_GL_KHR_parallel_shader_compile(GLADloadproc load) { - if(!GLAD_GL_KHR_parallel_shader_compile) return; - glad_glMaxShaderCompilerThreadsKHR = (PFNGLMAXSHADERCOMPILERTHREADSKHRPROC)load("glMaxShaderCompilerThreadsKHR"); -} -static void load_GL_KHR_robustness(GLADloadproc load) { - if(!GLAD_GL_KHR_robustness) return; - glad_glGetGraphicsResetStatus = (PFNGLGETGRAPHICSRESETSTATUSPROC)load("glGetGraphicsResetStatus"); - glad_glReadnPixels = (PFNGLREADNPIXELSPROC)load("glReadnPixels"); - glad_glGetnUniformfv = (PFNGLGETNUNIFORMFVPROC)load("glGetnUniformfv"); - glad_glGetnUniformiv = (PFNGLGETNUNIFORMIVPROC)load("glGetnUniformiv"); - glad_glGetnUniformuiv = (PFNGLGETNUNIFORMUIVPROC)load("glGetnUniformuiv"); - glad_glGetGraphicsResetStatusKHR = (PFNGLGETGRAPHICSRESETSTATUSKHRPROC)load("glGetGraphicsResetStatusKHR"); - glad_glReadnPixelsKHR = (PFNGLREADNPIXELSKHRPROC)load("glReadnPixelsKHR"); - glad_glGetnUniformfvKHR = (PFNGLGETNUNIFORMFVKHRPROC)load("glGetnUniformfvKHR"); - glad_glGetnUniformivKHR = (PFNGLGETNUNIFORMIVKHRPROC)load("glGetnUniformivKHR"); - glad_glGetnUniformuivKHR = (PFNGLGETNUNIFORMUIVKHRPROC)load("glGetnUniformuivKHR"); -} -static void load_GL_MESA_framebuffer_flip_y(GLADloadproc load) { - if(!GLAD_GL_MESA_framebuffer_flip_y) return; - glad_glFramebufferParameteriMESA = (PFNGLFRAMEBUFFERPARAMETERIMESAPROC)load("glFramebufferParameteriMESA"); - glad_glGetFramebufferParameterivMESA = (PFNGLGETFRAMEBUFFERPARAMETERIVMESAPROC)load("glGetFramebufferParameterivMESA"); -} static void load_GL_MESA_sampler_objects(GLADloadproc load) { if(!GLAD_GL_MESA_sampler_objects) return; glad_glGenSamplers = (PFNGLGENSAMPLERSPROC)load("glGenSamplers"); @@ -2576,44 +11968,6 @@ static void load_GL_MESA_sampler_objects(GLADloadproc load) { glad_glGetSamplerParameteriv = (PFNGLGETSAMPLERPARAMETERIVPROC)load("glGetSamplerParameteriv"); glad_glGetSamplerParameterfv = (PFNGLGETSAMPLERPARAMETERFVPROC)load("glGetSamplerParameterfv"); } -static void load_GL_NV_bindless_texture(GLADloadproc load) { - if(!GLAD_GL_NV_bindless_texture) return; - glad_glGetTextureHandleNV = (PFNGLGETTEXTUREHANDLENVPROC)load("glGetTextureHandleNV"); - glad_glGetTextureSamplerHandleNV = (PFNGLGETTEXTURESAMPLERHANDLENVPROC)load("glGetTextureSamplerHandleNV"); - glad_glMakeTextureHandleResidentNV = (PFNGLMAKETEXTUREHANDLERESIDENTNVPROC)load("glMakeTextureHandleResidentNV"); - glad_glMakeTextureHandleNonResidentNV = (PFNGLMAKETEXTUREHANDLENONRESIDENTNVPROC)load("glMakeTextureHandleNonResidentNV"); - glad_glGetImageHandleNV = (PFNGLGETIMAGEHANDLENVPROC)load("glGetImageHandleNV"); - glad_glMakeImageHandleResidentNV = (PFNGLMAKEIMAGEHANDLERESIDENTNVPROC)load("glMakeImageHandleResidentNV"); - glad_glMakeImageHandleNonResidentNV = (PFNGLMAKEIMAGEHANDLENONRESIDENTNVPROC)load("glMakeImageHandleNonResidentNV"); - glad_glUniformHandleui64NV = (PFNGLUNIFORMHANDLEUI64NVPROC)load("glUniformHandleui64NV"); - glad_glUniformHandleui64vNV = (PFNGLUNIFORMHANDLEUI64VNVPROC)load("glUniformHandleui64vNV"); - glad_glProgramUniformHandleui64NV = (PFNGLPROGRAMUNIFORMHANDLEUI64NVPROC)load("glProgramUniformHandleui64NV"); - glad_glProgramUniformHandleui64vNV = (PFNGLPROGRAMUNIFORMHANDLEUI64VNVPROC)load("glProgramUniformHandleui64vNV"); - glad_glIsTextureHandleResidentNV = (PFNGLISTEXTUREHANDLERESIDENTNVPROC)load("glIsTextureHandleResidentNV"); - glad_glIsImageHandleResidentNV = (PFNGLISIMAGEHANDLERESIDENTNVPROC)load("glIsImageHandleResidentNV"); -} -static void load_GL_NV_blend_equation_advanced(GLADloadproc load) { - if(!GLAD_GL_NV_blend_equation_advanced) return; - glad_glBlendParameteriNV = (PFNGLBLENDPARAMETERINVPROC)load("glBlendParameteriNV"); - glad_glBlendBarrierNV = (PFNGLBLENDBARRIERNVPROC)load("glBlendBarrierNV"); -} -static void load_GL_NV_clip_space_w_scaling(GLADloadproc load) { - if(!GLAD_GL_NV_clip_space_w_scaling) return; - glad_glViewportPositionWScaleNV = (PFNGLVIEWPORTPOSITIONWSCALENVPROC)load("glViewportPositionWScaleNV"); -} -static void load_GL_NV_conditional_render(GLADloadproc load) { - if(!GLAD_GL_NV_conditional_render) return; - glad_glBeginConditionalRenderNV = (PFNGLBEGINCONDITIONALRENDERNVPROC)load("glBeginConditionalRenderNV"); - glad_glEndConditionalRenderNV = (PFNGLENDCONDITIONALRENDERNVPROC)load("glEndConditionalRenderNV"); -} -static void load_GL_NV_conservative_raster(GLADloadproc load) { - if(!GLAD_GL_NV_conservative_raster) return; - glad_glSubpixelPrecisionBiasNV = (PFNGLSUBPIXELPRECISIONBIASNVPROC)load("glSubpixelPrecisionBiasNV"); -} -static void load_GL_NV_conservative_raster_pre_snap_triangles(GLADloadproc load) { - if(!GLAD_GL_NV_conservative_raster_pre_snap_triangles) return; - glad_glConservativeRasterParameteriNV = (PFNGLCONSERVATIVERASTERPARAMETERINVPROC)load("glConservativeRasterParameteriNV"); -} static void load_GL_NV_copy_buffer(GLADloadproc load) { if(!GLAD_GL_NV_copy_buffer) return; glad_glCopyBufferSubDataNV = (PFNGLCOPYBUFFERSUBDATANVPROC)load("glCopyBufferSubDataNV"); @@ -2632,110 +11986,18 @@ static void load_GL_NV_draw_instanced(GLADloadproc load) { glad_glDrawArraysInstancedNV = (PFNGLDRAWARRAYSINSTANCEDNVPROC)load("glDrawArraysInstancedNV"); glad_glDrawElementsInstancedNV = (PFNGLDRAWELEMENTSINSTANCEDNVPROC)load("glDrawElementsInstancedNV"); } -static void load_GL_NV_draw_vulkan_image(GLADloadproc load) { - if(!GLAD_GL_NV_draw_vulkan_image) return; - glad_glDrawVkImageNV = (PFNGLDRAWVKIMAGENVPROC)load("glDrawVkImageNV"); - glad_glGetVkProcAddrNV = (PFNGLGETVKPROCADDRNVPROC)load("glGetVkProcAddrNV"); - glad_glWaitVkSemaphoreNV = (PFNGLWAITVKSEMAPHORENVPROC)load("glWaitVkSemaphoreNV"); - glad_glSignalVkSemaphoreNV = (PFNGLSIGNALVKSEMAPHORENVPROC)load("glSignalVkSemaphoreNV"); - glad_glSignalVkFenceNV = (PFNGLSIGNALVKFENCENVPROC)load("glSignalVkFenceNV"); -} -static void load_GL_NV_fence(GLADloadproc load) { - if(!GLAD_GL_NV_fence) return; - glad_glDeleteFencesNV = (PFNGLDELETEFENCESNVPROC)load("glDeleteFencesNV"); - glad_glGenFencesNV = (PFNGLGENFENCESNVPROC)load("glGenFencesNV"); - glad_glIsFenceNV = (PFNGLISFENCENVPROC)load("glIsFenceNV"); - glad_glTestFenceNV = (PFNGLTESTFENCENVPROC)load("glTestFenceNV"); - glad_glGetFenceivNV = (PFNGLGETFENCEIVNVPROC)load("glGetFenceivNV"); - glad_glFinishFenceNV = (PFNGLFINISHFENCENVPROC)load("glFinishFenceNV"); - glad_glSetFenceNV = (PFNGLSETFENCENVPROC)load("glSetFenceNV"); -} -static void load_GL_NV_fragment_coverage_to_color(GLADloadproc load) { - if(!GLAD_GL_NV_fragment_coverage_to_color) return; - glad_glFragmentCoverageColorNV = (PFNGLFRAGMENTCOVERAGECOLORNVPROC)load("glFragmentCoverageColorNV"); -} static void load_GL_NV_framebuffer_blit(GLADloadproc load) { if(!GLAD_GL_NV_framebuffer_blit) return; glad_glBlitFramebufferNV = (PFNGLBLITFRAMEBUFFERNVPROC)load("glBlitFramebufferNV"); } -static void load_GL_NV_framebuffer_mixed_samples(GLADloadproc load) { - if(!GLAD_GL_NV_framebuffer_mixed_samples) return; - glad_glRasterSamplesEXT = (PFNGLRASTERSAMPLESEXTPROC)load("glRasterSamplesEXT"); - glad_glCoverageModulationTableNV = (PFNGLCOVERAGEMODULATIONTABLENVPROC)load("glCoverageModulationTableNV"); - glad_glGetCoverageModulationTableNV = (PFNGLGETCOVERAGEMODULATIONTABLENVPROC)load("glGetCoverageModulationTableNV"); - glad_glCoverageModulationNV = (PFNGLCOVERAGEMODULATIONNVPROC)load("glCoverageModulationNV"); -} static void load_GL_NV_framebuffer_multisample(GLADloadproc load) { if(!GLAD_GL_NV_framebuffer_multisample) return; glad_glRenderbufferStorageMultisampleNV = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLENVPROC)load("glRenderbufferStorageMultisampleNV"); } -static void load_GL_NV_gpu_shader5(GLADloadproc load) { - if(!GLAD_GL_NV_gpu_shader5) return; - glad_glUniform1i64NV = (PFNGLUNIFORM1I64NVPROC)load("glUniform1i64NV"); - glad_glUniform2i64NV = (PFNGLUNIFORM2I64NVPROC)load("glUniform2i64NV"); - glad_glUniform3i64NV = (PFNGLUNIFORM3I64NVPROC)load("glUniform3i64NV"); - glad_glUniform4i64NV = (PFNGLUNIFORM4I64NVPROC)load("glUniform4i64NV"); - glad_glUniform1i64vNV = (PFNGLUNIFORM1I64VNVPROC)load("glUniform1i64vNV"); - glad_glUniform2i64vNV = (PFNGLUNIFORM2I64VNVPROC)load("glUniform2i64vNV"); - glad_glUniform3i64vNV = (PFNGLUNIFORM3I64VNVPROC)load("glUniform3i64vNV"); - glad_glUniform4i64vNV = (PFNGLUNIFORM4I64VNVPROC)load("glUniform4i64vNV"); - glad_glUniform1ui64NV = (PFNGLUNIFORM1UI64NVPROC)load("glUniform1ui64NV"); - glad_glUniform2ui64NV = (PFNGLUNIFORM2UI64NVPROC)load("glUniform2ui64NV"); - glad_glUniform3ui64NV = (PFNGLUNIFORM3UI64NVPROC)load("glUniform3ui64NV"); - glad_glUniform4ui64NV = (PFNGLUNIFORM4UI64NVPROC)load("glUniform4ui64NV"); - glad_glUniform1ui64vNV = (PFNGLUNIFORM1UI64VNVPROC)load("glUniform1ui64vNV"); - glad_glUniform2ui64vNV = (PFNGLUNIFORM2UI64VNVPROC)load("glUniform2ui64vNV"); - glad_glUniform3ui64vNV = (PFNGLUNIFORM3UI64VNVPROC)load("glUniform3ui64vNV"); - glad_glUniform4ui64vNV = (PFNGLUNIFORM4UI64VNVPROC)load("glUniform4ui64vNV"); - glad_glGetUniformi64vNV = (PFNGLGETUNIFORMI64VNVPROC)load("glGetUniformi64vNV"); - glad_glProgramUniform1i64NV = (PFNGLPROGRAMUNIFORM1I64NVPROC)load("glProgramUniform1i64NV"); - glad_glProgramUniform2i64NV = (PFNGLPROGRAMUNIFORM2I64NVPROC)load("glProgramUniform2i64NV"); - glad_glProgramUniform3i64NV = (PFNGLPROGRAMUNIFORM3I64NVPROC)load("glProgramUniform3i64NV"); - glad_glProgramUniform4i64NV = (PFNGLPROGRAMUNIFORM4I64NVPROC)load("glProgramUniform4i64NV"); - glad_glProgramUniform1i64vNV = (PFNGLPROGRAMUNIFORM1I64VNVPROC)load("glProgramUniform1i64vNV"); - glad_glProgramUniform2i64vNV = (PFNGLPROGRAMUNIFORM2I64VNVPROC)load("glProgramUniform2i64vNV"); - glad_glProgramUniform3i64vNV = (PFNGLPROGRAMUNIFORM3I64VNVPROC)load("glProgramUniform3i64vNV"); - glad_glProgramUniform4i64vNV = (PFNGLPROGRAMUNIFORM4I64VNVPROC)load("glProgramUniform4i64vNV"); - glad_glProgramUniform1ui64NV = (PFNGLPROGRAMUNIFORM1UI64NVPROC)load("glProgramUniform1ui64NV"); - glad_glProgramUniform2ui64NV = (PFNGLPROGRAMUNIFORM2UI64NVPROC)load("glProgramUniform2ui64NV"); - glad_glProgramUniform3ui64NV = (PFNGLPROGRAMUNIFORM3UI64NVPROC)load("glProgramUniform3ui64NV"); - glad_glProgramUniform4ui64NV = (PFNGLPROGRAMUNIFORM4UI64NVPROC)load("glProgramUniform4ui64NV"); - glad_glProgramUniform1ui64vNV = (PFNGLPROGRAMUNIFORM1UI64VNVPROC)load("glProgramUniform1ui64vNV"); - glad_glProgramUniform2ui64vNV = (PFNGLPROGRAMUNIFORM2UI64VNVPROC)load("glProgramUniform2ui64vNV"); - glad_glProgramUniform3ui64vNV = (PFNGLPROGRAMUNIFORM3UI64VNVPROC)load("glProgramUniform3ui64vNV"); - glad_glProgramUniform4ui64vNV = (PFNGLPROGRAMUNIFORM4UI64VNVPROC)load("glProgramUniform4ui64vNV"); -} static void load_GL_NV_instanced_arrays(GLADloadproc load) { if(!GLAD_GL_NV_instanced_arrays) return; glad_glVertexAttribDivisorNV = (PFNGLVERTEXATTRIBDIVISORNVPROC)load("glVertexAttribDivisorNV"); } -static void load_GL_NV_internalformat_sample_query(GLADloadproc load) { - if(!GLAD_GL_NV_internalformat_sample_query) return; - glad_glGetInternalformatSampleivNV = (PFNGLGETINTERNALFORMATSAMPLEIVNVPROC)load("glGetInternalformatSampleivNV"); -} -static void load_GL_NV_memory_attachment(GLADloadproc load) { - if(!GLAD_GL_NV_memory_attachment) return; - glad_glGetMemoryObjectDetachedResourcesuivNV = (PFNGLGETMEMORYOBJECTDETACHEDRESOURCESUIVNVPROC)load("glGetMemoryObjectDetachedResourcesuivNV"); - glad_glResetMemoryObjectParameterNV = (PFNGLRESETMEMORYOBJECTPARAMETERNVPROC)load("glResetMemoryObjectParameterNV"); - glad_glTexAttachMemoryNV = (PFNGLTEXATTACHMEMORYNVPROC)load("glTexAttachMemoryNV"); - glad_glBufferAttachMemoryNV = (PFNGLBUFFERATTACHMEMORYNVPROC)load("glBufferAttachMemoryNV"); - glad_glTextureAttachMemoryNV = (PFNGLTEXTUREATTACHMEMORYNVPROC)load("glTextureAttachMemoryNV"); - glad_glNamedBufferAttachMemoryNV = (PFNGLNAMEDBUFFERATTACHMEMORYNVPROC)load("glNamedBufferAttachMemoryNV"); -} -static void load_GL_NV_memory_object_sparse(GLADloadproc load) { - if(!GLAD_GL_NV_memory_object_sparse) return; - glad_glBufferPageCommitmentMemNV = (PFNGLBUFFERPAGECOMMITMENTMEMNVPROC)load("glBufferPageCommitmentMemNV"); - glad_glTexPageCommitmentMemNV = (PFNGLTEXPAGECOMMITMENTMEMNVPROC)load("glTexPageCommitmentMemNV"); - glad_glNamedBufferPageCommitmentMemNV = (PFNGLNAMEDBUFFERPAGECOMMITMENTMEMNVPROC)load("glNamedBufferPageCommitmentMemNV"); - glad_glTexturePageCommitmentMemNV = (PFNGLTEXTUREPAGECOMMITMENTMEMNVPROC)load("glTexturePageCommitmentMemNV"); -} -static void load_GL_NV_mesh_shader(GLADloadproc load) { - if(!GLAD_GL_NV_mesh_shader) return; - glad_glDrawMeshTasksNV = (PFNGLDRAWMESHTASKSNVPROC)load("glDrawMeshTasksNV"); - glad_glDrawMeshTasksIndirectNV = (PFNGLDRAWMESHTASKSINDIRECTNVPROC)load("glDrawMeshTasksIndirectNV"); - glad_glMultiDrawMeshTasksIndirectNV = (PFNGLMULTIDRAWMESHTASKSINDIRECTNVPROC)load("glMultiDrawMeshTasksIndirectNV"); - glad_glMultiDrawMeshTasksIndirectCountNV = (PFNGLMULTIDRAWMESHTASKSINDIRECTCOUNTNVPROC)load("glMultiDrawMeshTasksIndirectCountNV"); -} static void load_GL_NV_non_square_matrices(GLADloadproc load) { if(!GLAD_GL_NV_non_square_matrices) return; glad_glUniformMatrix2x3fvNV = (PFNGLUNIFORMMATRIX2X3FVNVPROC)load("glUniformMatrix2x3fvNV"); @@ -2745,92 +12007,6 @@ static void load_GL_NV_non_square_matrices(GLADloadproc load) { glad_glUniformMatrix3x4fvNV = (PFNGLUNIFORMMATRIX3X4FVNVPROC)load("glUniformMatrix3x4fvNV"); glad_glUniformMatrix4x3fvNV = (PFNGLUNIFORMMATRIX4X3FVNVPROC)load("glUniformMatrix4x3fvNV"); } -static void load_GL_NV_path_rendering(GLADloadproc load) { - if(!GLAD_GL_NV_path_rendering) return; - glad_glGenPathsNV = (PFNGLGENPATHSNVPROC)load("glGenPathsNV"); - glad_glDeletePathsNV = (PFNGLDELETEPATHSNVPROC)load("glDeletePathsNV"); - glad_glIsPathNV = (PFNGLISPATHNVPROC)load("glIsPathNV"); - glad_glPathCommandsNV = (PFNGLPATHCOMMANDSNVPROC)load("glPathCommandsNV"); - glad_glPathCoordsNV = (PFNGLPATHCOORDSNVPROC)load("glPathCoordsNV"); - glad_glPathSubCommandsNV = (PFNGLPATHSUBCOMMANDSNVPROC)load("glPathSubCommandsNV"); - glad_glPathSubCoordsNV = (PFNGLPATHSUBCOORDSNVPROC)load("glPathSubCoordsNV"); - glad_glPathStringNV = (PFNGLPATHSTRINGNVPROC)load("glPathStringNV"); - glad_glPathGlyphsNV = (PFNGLPATHGLYPHSNVPROC)load("glPathGlyphsNV"); - glad_glPathGlyphRangeNV = (PFNGLPATHGLYPHRANGENVPROC)load("glPathGlyphRangeNV"); - glad_glWeightPathsNV = (PFNGLWEIGHTPATHSNVPROC)load("glWeightPathsNV"); - glad_glCopyPathNV = (PFNGLCOPYPATHNVPROC)load("glCopyPathNV"); - glad_glInterpolatePathsNV = (PFNGLINTERPOLATEPATHSNVPROC)load("glInterpolatePathsNV"); - glad_glTransformPathNV = (PFNGLTRANSFORMPATHNVPROC)load("glTransformPathNV"); - glad_glPathParameterivNV = (PFNGLPATHPARAMETERIVNVPROC)load("glPathParameterivNV"); - glad_glPathParameteriNV = (PFNGLPATHPARAMETERINVPROC)load("glPathParameteriNV"); - glad_glPathParameterfvNV = (PFNGLPATHPARAMETERFVNVPROC)load("glPathParameterfvNV"); - glad_glPathParameterfNV = (PFNGLPATHPARAMETERFNVPROC)load("glPathParameterfNV"); - glad_glPathDashArrayNV = (PFNGLPATHDASHARRAYNVPROC)load("glPathDashArrayNV"); - glad_glPathStencilFuncNV = (PFNGLPATHSTENCILFUNCNVPROC)load("glPathStencilFuncNV"); - glad_glPathStencilDepthOffsetNV = (PFNGLPATHSTENCILDEPTHOFFSETNVPROC)load("glPathStencilDepthOffsetNV"); - glad_glStencilFillPathNV = (PFNGLSTENCILFILLPATHNVPROC)load("glStencilFillPathNV"); - glad_glStencilStrokePathNV = (PFNGLSTENCILSTROKEPATHNVPROC)load("glStencilStrokePathNV"); - glad_glStencilFillPathInstancedNV = (PFNGLSTENCILFILLPATHINSTANCEDNVPROC)load("glStencilFillPathInstancedNV"); - glad_glStencilStrokePathInstancedNV = (PFNGLSTENCILSTROKEPATHINSTANCEDNVPROC)load("glStencilStrokePathInstancedNV"); - glad_glPathCoverDepthFuncNV = (PFNGLPATHCOVERDEPTHFUNCNVPROC)load("glPathCoverDepthFuncNV"); - glad_glCoverFillPathNV = (PFNGLCOVERFILLPATHNVPROC)load("glCoverFillPathNV"); - glad_glCoverStrokePathNV = (PFNGLCOVERSTROKEPATHNVPROC)load("glCoverStrokePathNV"); - glad_glCoverFillPathInstancedNV = (PFNGLCOVERFILLPATHINSTANCEDNVPROC)load("glCoverFillPathInstancedNV"); - glad_glCoverStrokePathInstancedNV = (PFNGLCOVERSTROKEPATHINSTANCEDNVPROC)load("glCoverStrokePathInstancedNV"); - glad_glGetPathParameterivNV = (PFNGLGETPATHPARAMETERIVNVPROC)load("glGetPathParameterivNV"); - glad_glGetPathParameterfvNV = (PFNGLGETPATHPARAMETERFVNVPROC)load("glGetPathParameterfvNV"); - glad_glGetPathCommandsNV = (PFNGLGETPATHCOMMANDSNVPROC)load("glGetPathCommandsNV"); - glad_glGetPathCoordsNV = (PFNGLGETPATHCOORDSNVPROC)load("glGetPathCoordsNV"); - glad_glGetPathDashArrayNV = (PFNGLGETPATHDASHARRAYNVPROC)load("glGetPathDashArrayNV"); - glad_glGetPathMetricsNV = (PFNGLGETPATHMETRICSNVPROC)load("glGetPathMetricsNV"); - glad_glGetPathMetricRangeNV = (PFNGLGETPATHMETRICRANGENVPROC)load("glGetPathMetricRangeNV"); - glad_glGetPathSpacingNV = (PFNGLGETPATHSPACINGNVPROC)load("glGetPathSpacingNV"); - glad_glIsPointInFillPathNV = (PFNGLISPOINTINFILLPATHNVPROC)load("glIsPointInFillPathNV"); - glad_glIsPointInStrokePathNV = (PFNGLISPOINTINSTROKEPATHNVPROC)load("glIsPointInStrokePathNV"); - glad_glGetPathLengthNV = (PFNGLGETPATHLENGTHNVPROC)load("glGetPathLengthNV"); - glad_glPointAlongPathNV = (PFNGLPOINTALONGPATHNVPROC)load("glPointAlongPathNV"); - glad_glMatrixLoad3x2fNV = (PFNGLMATRIXLOAD3X2FNVPROC)load("glMatrixLoad3x2fNV"); - glad_glMatrixLoad3x3fNV = (PFNGLMATRIXLOAD3X3FNVPROC)load("glMatrixLoad3x3fNV"); - glad_glMatrixLoadTranspose3x3fNV = (PFNGLMATRIXLOADTRANSPOSE3X3FNVPROC)load("glMatrixLoadTranspose3x3fNV"); - glad_glMatrixMult3x2fNV = (PFNGLMATRIXMULT3X2FNVPROC)load("glMatrixMult3x2fNV"); - glad_glMatrixMult3x3fNV = (PFNGLMATRIXMULT3X3FNVPROC)load("glMatrixMult3x3fNV"); - glad_glMatrixMultTranspose3x3fNV = (PFNGLMATRIXMULTTRANSPOSE3X3FNVPROC)load("glMatrixMultTranspose3x3fNV"); - glad_glStencilThenCoverFillPathNV = (PFNGLSTENCILTHENCOVERFILLPATHNVPROC)load("glStencilThenCoverFillPathNV"); - glad_glStencilThenCoverStrokePathNV = (PFNGLSTENCILTHENCOVERSTROKEPATHNVPROC)load("glStencilThenCoverStrokePathNV"); - glad_glStencilThenCoverFillPathInstancedNV = (PFNGLSTENCILTHENCOVERFILLPATHINSTANCEDNVPROC)load("glStencilThenCoverFillPathInstancedNV"); - glad_glStencilThenCoverStrokePathInstancedNV = (PFNGLSTENCILTHENCOVERSTROKEPATHINSTANCEDNVPROC)load("glStencilThenCoverStrokePathInstancedNV"); - glad_glPathGlyphIndexRangeNV = (PFNGLPATHGLYPHINDEXRANGENVPROC)load("glPathGlyphIndexRangeNV"); - glad_glPathGlyphIndexArrayNV = (PFNGLPATHGLYPHINDEXARRAYNVPROC)load("glPathGlyphIndexArrayNV"); - glad_glPathMemoryGlyphIndexArrayNV = (PFNGLPATHMEMORYGLYPHINDEXARRAYNVPROC)load("glPathMemoryGlyphIndexArrayNV"); - glad_glProgramPathFragmentInputGenNV = (PFNGLPROGRAMPATHFRAGMENTINPUTGENNVPROC)load("glProgramPathFragmentInputGenNV"); - glad_glGetProgramResourcefvNV = (PFNGLGETPROGRAMRESOURCEFVNVPROC)load("glGetProgramResourcefvNV"); - glad_glPathColorGenNV = (PFNGLPATHCOLORGENNVPROC)load("glPathColorGenNV"); - glad_glPathTexGenNV = (PFNGLPATHTEXGENNVPROC)load("glPathTexGenNV"); - glad_glPathFogGenNV = (PFNGLPATHFOGGENNVPROC)load("glPathFogGenNV"); - glad_glGetPathColorGenivNV = (PFNGLGETPATHCOLORGENIVNVPROC)load("glGetPathColorGenivNV"); - glad_glGetPathColorGenfvNV = (PFNGLGETPATHCOLORGENFVNVPROC)load("glGetPathColorGenfvNV"); - glad_glGetPathTexGenivNV = (PFNGLGETPATHTEXGENIVNVPROC)load("glGetPathTexGenivNV"); - glad_glGetPathTexGenfvNV = (PFNGLGETPATHTEXGENFVNVPROC)load("glGetPathTexGenfvNV"); - glad_glMatrixFrustumEXT = (PFNGLMATRIXFRUSTUMEXTPROC)load("glMatrixFrustumEXT"); - glad_glMatrixLoadIdentityEXT = (PFNGLMATRIXLOADIDENTITYEXTPROC)load("glMatrixLoadIdentityEXT"); - glad_glMatrixLoadTransposefEXT = (PFNGLMATRIXLOADTRANSPOSEFEXTPROC)load("glMatrixLoadTransposefEXT"); - glad_glMatrixLoadTransposedEXT = (PFNGLMATRIXLOADTRANSPOSEDEXTPROC)load("glMatrixLoadTransposedEXT"); - glad_glMatrixLoadfEXT = (PFNGLMATRIXLOADFEXTPROC)load("glMatrixLoadfEXT"); - glad_glMatrixLoaddEXT = (PFNGLMATRIXLOADDEXTPROC)load("glMatrixLoaddEXT"); - glad_glMatrixMultTransposefEXT = (PFNGLMATRIXMULTTRANSPOSEFEXTPROC)load("glMatrixMultTransposefEXT"); - glad_glMatrixMultTransposedEXT = (PFNGLMATRIXMULTTRANSPOSEDEXTPROC)load("glMatrixMultTransposedEXT"); - glad_glMatrixMultfEXT = (PFNGLMATRIXMULTFEXTPROC)load("glMatrixMultfEXT"); - glad_glMatrixMultdEXT = (PFNGLMATRIXMULTDEXTPROC)load("glMatrixMultdEXT"); - glad_glMatrixOrthoEXT = (PFNGLMATRIXORTHOEXTPROC)load("glMatrixOrthoEXT"); - glad_glMatrixPopEXT = (PFNGLMATRIXPOPEXTPROC)load("glMatrixPopEXT"); - glad_glMatrixPushEXT = (PFNGLMATRIXPUSHEXTPROC)load("glMatrixPushEXT"); - glad_glMatrixRotatefEXT = (PFNGLMATRIXROTATEFEXTPROC)load("glMatrixRotatefEXT"); - glad_glMatrixRotatedEXT = (PFNGLMATRIXROTATEDEXTPROC)load("glMatrixRotatedEXT"); - glad_glMatrixScalefEXT = (PFNGLMATRIXSCALEFEXTPROC)load("glMatrixScalefEXT"); - glad_glMatrixScaledEXT = (PFNGLMATRIXSCALEDEXTPROC)load("glMatrixScaledEXT"); - glad_glMatrixTranslatefEXT = (PFNGLMATRIXTRANSLATEFEXTPROC)load("glMatrixTranslatefEXT"); - glad_glMatrixTranslatedEXT = (PFNGLMATRIXTRANSLATEDEXTPROC)load("glMatrixTranslatedEXT"); -} static void load_GL_NV_polygon_mode(GLADloadproc load) { if(!GLAD_GL_NV_polygon_mode) return; glad_glPolygonModeNV = (PFNGLPOLYGONMODENVPROC)load("glPolygonModeNV"); @@ -2839,37 +12015,6 @@ static void load_GL_NV_read_buffer(GLADloadproc load) { if(!GLAD_GL_NV_read_buffer) return; glad_glReadBufferNV = (PFNGLREADBUFFERNVPROC)load("glReadBufferNV"); } -static void load_GL_NV_sample_locations(GLADloadproc load) { - if(!GLAD_GL_NV_sample_locations) return; - glad_glFramebufferSampleLocationsfvNV = (PFNGLFRAMEBUFFERSAMPLELOCATIONSFVNVPROC)load("glFramebufferSampleLocationsfvNV"); - glad_glNamedFramebufferSampleLocationsfvNV = (PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVNVPROC)load("glNamedFramebufferSampleLocationsfvNV"); - glad_glResolveDepthValuesNV = (PFNGLRESOLVEDEPTHVALUESNVPROC)load("glResolveDepthValuesNV"); -} -static void load_GL_NV_scissor_exclusive(GLADloadproc load) { - if(!GLAD_GL_NV_scissor_exclusive) return; - glad_glScissorExclusiveNV = (PFNGLSCISSOREXCLUSIVENVPROC)load("glScissorExclusiveNV"); - glad_glScissorExclusiveArrayvNV = (PFNGLSCISSOREXCLUSIVEARRAYVNVPROC)load("glScissorExclusiveArrayvNV"); -} -static void load_GL_NV_shading_rate_image(GLADloadproc load) { - if(!GLAD_GL_NV_shading_rate_image) return; - glad_glBindShadingRateImageNV = (PFNGLBINDSHADINGRATEIMAGENVPROC)load("glBindShadingRateImageNV"); - glad_glGetShadingRateImagePaletteNV = (PFNGLGETSHADINGRATEIMAGEPALETTENVPROC)load("glGetShadingRateImagePaletteNV"); - glad_glGetShadingRateSampleLocationivNV = (PFNGLGETSHADINGRATESAMPLELOCATIONIVNVPROC)load("glGetShadingRateSampleLocationivNV"); - glad_glShadingRateImageBarrierNV = (PFNGLSHADINGRATEIMAGEBARRIERNVPROC)load("glShadingRateImageBarrierNV"); - glad_glShadingRateImagePaletteNV = (PFNGLSHADINGRATEIMAGEPALETTENVPROC)load("glShadingRateImagePaletteNV"); - glad_glShadingRateSampleOrderNV = (PFNGLSHADINGRATESAMPLEORDERNVPROC)load("glShadingRateSampleOrderNV"); - glad_glShadingRateSampleOrderCustomNV = (PFNGLSHADINGRATESAMPLEORDERCUSTOMNVPROC)load("glShadingRateSampleOrderCustomNV"); -} -static void load_GL_NV_texture_barrier(GLADloadproc load) { - if(!GLAD_GL_NV_texture_barrier) return; - glad_glTextureBarrierNV = (PFNGLTEXTUREBARRIERNVPROC)load("glTextureBarrierNV"); -} -static void load_GL_NV_timeline_semaphore(GLADloadproc load) { - if(!GLAD_GL_NV_timeline_semaphore) return; - glad_glCreateSemaphoresNV = (PFNGLCREATESEMAPHORESNVPROC)load("glCreateSemaphoresNV"); - glad_glSemaphoreParameterivNV = (PFNGLSEMAPHOREPARAMETERIVNVPROC)load("glSemaphoreParameterivNV"); - glad_glGetSemaphoreParameterivNV = (PFNGLGETSEMAPHOREPARAMETERIVNVPROC)load("glGetSemaphoreParameterivNV"); -} static void load_GL_NV_viewport_array(GLADloadproc load) { if(!GLAD_GL_NV_viewport_array) return; glad_glViewportArrayvNV = (PFNGLVIEWPORTARRAYVNVPROC)load("glViewportArrayvNV"); @@ -2885,19 +12030,6 @@ static void load_GL_NV_viewport_array(GLADloadproc load) { glad_glDisableiNV = (PFNGLDISABLEINVPROC)load("glDisableiNV"); glad_glIsEnablediNV = (PFNGLISENABLEDINVPROC)load("glIsEnablediNV"); } -static void load_GL_NV_viewport_swizzle(GLADloadproc load) { - if(!GLAD_GL_NV_viewport_swizzle) return; - glad_glViewportSwizzleNV = (PFNGLVIEWPORTSWIZZLENVPROC)load("glViewportSwizzleNV"); -} -static void load_GL_OES_EGL_image(GLADloadproc load) { - if(!GLAD_GL_OES_EGL_image) return; - glad_glEGLImageTargetTexture2DOES = (PFNGLEGLIMAGETARGETTEXTURE2DOESPROC)load("glEGLImageTargetTexture2DOES"); - glad_glEGLImageTargetRenderbufferStorageOES = (PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC)load("glEGLImageTargetRenderbufferStorageOES"); -} -static void load_GL_OES_EGL_image_external(GLADloadproc load) { - if(!GLAD_GL_OES_EGL_image_external) return; - glad_glEGLImageTargetTexture2DOES = (PFNGLEGLIMAGETARGETTEXTURE2DOESPROC)load("glEGLImageTargetTexture2DOES"); -} static void load_GL_OES_copy_image(GLADloadproc load) { if(!GLAD_GL_OES_copy_image) return; glad_glCopyImageSubDataOES = (PFNGLCOPYIMAGESUBDATAOESPROC)load("glCopyImageSubDataOES"); @@ -2929,12 +12061,6 @@ static void load_GL_OES_get_program_binary(GLADloadproc load) { glad_glGetProgramBinaryOES = (PFNGLGETPROGRAMBINARYOESPROC)load("glGetProgramBinaryOES"); glad_glProgramBinaryOES = (PFNGLPROGRAMBINARYOESPROC)load("glProgramBinaryOES"); } -static void load_GL_OES_mapbuffer(GLADloadproc load) { - if(!GLAD_GL_OES_mapbuffer) return; - glad_glMapBufferOES = (PFNGLMAPBUFFEROESPROC)load("glMapBufferOES"); - glad_glUnmapBufferOES = (PFNGLUNMAPBUFFEROESPROC)load("glUnmapBufferOES"); - glad_glGetBufferPointervOES = (PFNGLGETBUFFERPOINTERVOESPROC)load("glGetBufferPointervOES"); -} static void load_GL_OES_primitive_bounding_box(GLADloadproc load) { if(!GLAD_GL_OES_primitive_bounding_box) return; glad_glPrimitiveBoundingBoxOES = (PFNGLPRIMITIVEBOUNDINGBOXOESPROC)load("glPrimitiveBoundingBoxOES"); @@ -2980,13 +12106,6 @@ static void load_GL_OES_texture_view(GLADloadproc load) { if(!GLAD_GL_OES_texture_view) return; glad_glTextureViewOES = (PFNGLTEXTUREVIEWOESPROC)load("glTextureViewOES"); } -static void load_GL_OES_vertex_array_object(GLADloadproc load) { - if(!GLAD_GL_OES_vertex_array_object) return; - glad_glBindVertexArrayOES = (PFNGLBINDVERTEXARRAYOESPROC)load("glBindVertexArrayOES"); - glad_glDeleteVertexArraysOES = (PFNGLDELETEVERTEXARRAYSOESPROC)load("glDeleteVertexArraysOES"); - glad_glGenVertexArraysOES = (PFNGLGENVERTEXARRAYSOESPROC)load("glGenVertexArraysOES"); - glad_glIsVertexArrayOES = (PFNGLISVERTEXARRAYOESPROC)load("glIsVertexArrayOES"); -} static void load_GL_OES_viewport_array(GLADloadproc load) { if(!GLAD_GL_OES_viewport_array) return; glad_glViewportArrayvOES = (PFNGLVIEWPORTARRAYVOESPROC)load("glViewportArrayvOES"); @@ -3002,11 +12121,6 @@ static void load_GL_OES_viewport_array(GLADloadproc load) { glad_glDisableiOES = (PFNGLDISABLEIOESPROC)load("glDisableiOES"); glad_glIsEnablediOES = (PFNGLISENABLEDIOESPROC)load("glIsEnablediOES"); } -static void load_GL_OVR_multiview(GLADloadproc load) { - if(!GLAD_GL_OVR_multiview) return; - glad_glFramebufferTextureMultiviewOVR = (PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC)load("glFramebufferTextureMultiviewOVR"); - glad_glNamedFramebufferTextureMultiviewOVR = (PFNGLNAMEDFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC)load("glNamedFramebufferTextureMultiviewOVR"); -} static void load_GL_OVR_multiview_multisampled_render_to_texture(GLADloadproc load) { if(!GLAD_GL_OVR_multiview_multisampled_render_to_texture) return; glad_glFramebufferTextureMultisampleMultiviewOVR = (PFNGLFRAMEBUFFERTEXTUREMULTISAMPLEMULTIVIEWOVRPROC)load("glFramebufferTextureMultisampleMultiviewOVR"); @@ -3015,31 +12129,6 @@ static void load_GL_QCOM_alpha_test(GLADloadproc load) { if(!GLAD_GL_QCOM_alpha_test) return; glad_glAlphaFuncQCOM = (PFNGLALPHAFUNCQCOMPROC)load("glAlphaFuncQCOM"); } -static void load_GL_QCOM_driver_control(GLADloadproc load) { - if(!GLAD_GL_QCOM_driver_control) return; - glad_glGetDriverControlsQCOM = (PFNGLGETDRIVERCONTROLSQCOMPROC)load("glGetDriverControlsQCOM"); - glad_glGetDriverControlStringQCOM = (PFNGLGETDRIVERCONTROLSTRINGQCOMPROC)load("glGetDriverControlStringQCOM"); - glad_glEnableDriverControlQCOM = (PFNGLENABLEDRIVERCONTROLQCOMPROC)load("glEnableDriverControlQCOM"); - glad_glDisableDriverControlQCOM = (PFNGLDISABLEDRIVERCONTROLQCOMPROC)load("glDisableDriverControlQCOM"); -} -static void load_GL_QCOM_extended_get(GLADloadproc load) { - if(!GLAD_GL_QCOM_extended_get) return; - glad_glExtGetTexturesQCOM = (PFNGLEXTGETTEXTURESQCOMPROC)load("glExtGetTexturesQCOM"); - glad_glExtGetBuffersQCOM = (PFNGLEXTGETBUFFERSQCOMPROC)load("glExtGetBuffersQCOM"); - glad_glExtGetRenderbuffersQCOM = (PFNGLEXTGETRENDERBUFFERSQCOMPROC)load("glExtGetRenderbuffersQCOM"); - glad_glExtGetFramebuffersQCOM = (PFNGLEXTGETFRAMEBUFFERSQCOMPROC)load("glExtGetFramebuffersQCOM"); - glad_glExtGetTexLevelParameterivQCOM = (PFNGLEXTGETTEXLEVELPARAMETERIVQCOMPROC)load("glExtGetTexLevelParameterivQCOM"); - glad_glExtTexObjectStateOverrideiQCOM = (PFNGLEXTTEXOBJECTSTATEOVERRIDEIQCOMPROC)load("glExtTexObjectStateOverrideiQCOM"); - glad_glExtGetTexSubImageQCOM = (PFNGLEXTGETTEXSUBIMAGEQCOMPROC)load("glExtGetTexSubImageQCOM"); - glad_glExtGetBufferPointervQCOM = (PFNGLEXTGETBUFFERPOINTERVQCOMPROC)load("glExtGetBufferPointervQCOM"); -} -static void load_GL_QCOM_extended_get2(GLADloadproc load) { - if(!GLAD_GL_QCOM_extended_get2) return; - glad_glExtGetShadersQCOM = (PFNGLEXTGETSHADERSQCOMPROC)load("glExtGetShadersQCOM"); - glad_glExtGetProgramsQCOM = (PFNGLEXTGETPROGRAMSQCOMPROC)load("glExtGetProgramsQCOM"); - glad_glExtIsProgramBinaryQCOM = (PFNGLEXTISPROGRAMBINARYQCOMPROC)load("glExtIsProgramBinaryQCOM"); - glad_glExtGetProgramBinarySourceQCOM = (PFNGLEXTGETPROGRAMBINARYSOURCEQCOMPROC)load("glExtGetProgramBinarySourceQCOM"); -} static void load_GL_QCOM_frame_extrapolation(GLADloadproc load) { if(!GLAD_GL_QCOM_frame_extrapolation) return; glad_glExtrapolateTex2DQCOM = (PFNGLEXTRAPOLATETEX2DQCOMPROC)load("glExtrapolateTex2DQCOM"); @@ -3066,11 +12155,6 @@ static void load_GL_QCOM_texture_foveated(GLADloadproc load) { if(!GLAD_GL_QCOM_texture_foveated) return; glad_glTextureFoveationParametersQCOM = (PFNGLTEXTUREFOVEATIONPARAMETERSQCOMPROC)load("glTextureFoveationParametersQCOM"); } -static void load_GL_QCOM_tiled_rendering(GLADloadproc load) { - if(!GLAD_GL_QCOM_tiled_rendering) return; - glad_glStartTilingQCOM = (PFNGLSTARTTILINGQCOMPROC)load("glStartTilingQCOM"); - glad_glEndTilingQCOM = (PFNGLENDTILINGQCOMPROC)load("glEndTilingQCOM"); -} static int find_extensionsGLES2(void) { if (!get_exts()) return 0; GLAD_GL_AMD_compressed_3DC_texture = has_ext("GL_AMD_compressed_3DC_texture"); @@ -3600,3 +12684,185 @@ int gladLoadGLES2Loader(GLADloadproc load) { return GLVersion.major != 0 || GLVersion.minor != 0; } +static void load_GL_SC_VERSION_2_0(GLADloadproc load) { + if(!GLAD_GL_SC_VERSION_2_0) return; + glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC)load("glActiveTexture"); + glad_glBindBuffer = (PFNGLBINDBUFFERPROC)load("glBindBuffer"); + glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC)load("glBindFramebuffer"); + glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC)load("glBindRenderbuffer"); + glad_glBindTexture = (PFNGLBINDTEXTUREPROC)load("glBindTexture"); + glad_glBlendColor = (PFNGLBLENDCOLORPROC)load("glBlendColor"); + glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC)load("glBlendEquation"); + glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC)load("glBlendEquationSeparate"); + glad_glBlendFunc = (PFNGLBLENDFUNCPROC)load("glBlendFunc"); + glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC)load("glBlendFuncSeparate"); + glad_glBufferData = (PFNGLBUFFERDATAPROC)load("glBufferData"); + glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC)load("glBufferSubData"); + glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC)load("glCheckFramebufferStatus"); + glad_glClear = (PFNGLCLEARPROC)load("glClear"); + glad_glClearColor = (PFNGLCLEARCOLORPROC)load("glClearColor"); + glad_glClearDepthf = (PFNGLCLEARDEPTHFPROC)load("glClearDepthf"); + glad_glClearStencil = (PFNGLCLEARSTENCILPROC)load("glClearStencil"); + glad_glColorMask = (PFNGLCOLORMASKPROC)load("glColorMask"); + glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)load("glCompressedTexSubImage2D"); + glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC)load("glCreateProgram"); + glad_glCullFace = (PFNGLCULLFACEPROC)load("glCullFace"); + glad_glDepthFunc = (PFNGLDEPTHFUNCPROC)load("glDepthFunc"); + glad_glDepthMask = (PFNGLDEPTHMASKPROC)load("glDepthMask"); + glad_glDepthRangef = (PFNGLDEPTHRANGEFPROC)load("glDepthRangef"); + glad_glDisable = (PFNGLDISABLEPROC)load("glDisable"); + glad_glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC)load("glDisableVertexAttribArray"); + glad_glDrawArrays = (PFNGLDRAWARRAYSPROC)load("glDrawArrays"); + glad_glDrawRangeElements = (PFNGLDRAWRANGEELEMENTSPROC)load("glDrawRangeElements"); + glad_glEnable = (PFNGLENABLEPROC)load("glEnable"); + glad_glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC)load("glEnableVertexAttribArray"); + glad_glFinish = (PFNGLFINISHPROC)load("glFinish"); + glad_glFlush = (PFNGLFLUSHPROC)load("glFlush"); + glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC)load("glFramebufferRenderbuffer"); + glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC)load("glFramebufferTexture2D"); + glad_glFrontFace = (PFNGLFRONTFACEPROC)load("glFrontFace"); + glad_glGenBuffers = (PFNGLGENBUFFERSPROC)load("glGenBuffers"); + glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC)load("glGenerateMipmap"); + glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC)load("glGenFramebuffers"); + glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC)load("glGenRenderbuffers"); + glad_glGenTextures = (PFNGLGENTEXTURESPROC)load("glGenTextures"); + glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC)load("glGetAttribLocation"); + glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC)load("glGetBooleanv"); + glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC)load("glGetBufferParameteriv"); + glad_glGetError = (PFNGLGETERRORPROC)load("glGetError"); + glad_glGetFloatv = (PFNGLGETFLOATVPROC)load("glGetFloatv"); + glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)load("glGetFramebufferAttachmentParameteriv"); + glad_glGetGraphicsResetStatus = (PFNGLGETGRAPHICSRESETSTATUSPROC)load("glGetGraphicsResetStatus"); + glad_glGetIntegerv = (PFNGLGETINTEGERVPROC)load("glGetIntegerv"); + glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC)load("glGetProgramiv"); + glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC)load("glGetRenderbufferParameteriv"); + glad_glGetString = (PFNGLGETSTRINGPROC)load("glGetString"); + glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC)load("glGetTexParameterfv"); + glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC)load("glGetTexParameteriv"); + glad_glGetnUniformfv = (PFNGLGETNUNIFORMFVPROC)load("glGetnUniformfv"); + glad_glGetnUniformiv = (PFNGLGETNUNIFORMIVPROC)load("glGetnUniformiv"); + glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC)load("glGetUniformLocation"); + glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC)load("glGetVertexAttribfv"); + glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC)load("glGetVertexAttribiv"); + glad_glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC)load("glGetVertexAttribPointerv"); + glad_glHint = (PFNGLHINTPROC)load("glHint"); + glad_glIsEnabled = (PFNGLISENABLEDPROC)load("glIsEnabled"); + glad_glLineWidth = (PFNGLLINEWIDTHPROC)load("glLineWidth"); + glad_glPixelStorei = (PFNGLPIXELSTOREIPROC)load("glPixelStorei"); + glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC)load("glPolygonOffset"); + glad_glProgramBinary = (PFNGLPROGRAMBINARYPROC)load("glProgramBinary"); + glad_glReadnPixels = (PFNGLREADNPIXELSPROC)load("glReadnPixels"); + glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC)load("glRenderbufferStorage"); + glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC)load("glSampleCoverage"); + glad_glScissor = (PFNGLSCISSORPROC)load("glScissor"); + glad_glStencilFunc = (PFNGLSTENCILFUNCPROC)load("glStencilFunc"); + glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC)load("glStencilFuncSeparate"); + glad_glStencilMask = (PFNGLSTENCILMASKPROC)load("glStencilMask"); + glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC)load("glStencilMaskSeparate"); + glad_glStencilOp = (PFNGLSTENCILOPPROC)load("glStencilOp"); + glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC)load("glStencilOpSeparate"); + glad_glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)load("glTexStorage2D"); + glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC)load("glTexParameterf"); + glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC)load("glTexParameterfv"); + glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC)load("glTexParameteri"); + glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC)load("glTexParameteriv"); + glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC)load("glTexSubImage2D"); + glad_glUniform1f = (PFNGLUNIFORM1FPROC)load("glUniform1f"); + glad_glUniform1fv = (PFNGLUNIFORM1FVPROC)load("glUniform1fv"); + glad_glUniform1i = (PFNGLUNIFORM1IPROC)load("glUniform1i"); + glad_glUniform1iv = (PFNGLUNIFORM1IVPROC)load("glUniform1iv"); + glad_glUniform2f = (PFNGLUNIFORM2FPROC)load("glUniform2f"); + glad_glUniform2fv = (PFNGLUNIFORM2FVPROC)load("glUniform2fv"); + glad_glUniform2i = (PFNGLUNIFORM2IPROC)load("glUniform2i"); + glad_glUniform2iv = (PFNGLUNIFORM2IVPROC)load("glUniform2iv"); + glad_glUniform3f = (PFNGLUNIFORM3FPROC)load("glUniform3f"); + glad_glUniform3fv = (PFNGLUNIFORM3FVPROC)load("glUniform3fv"); + glad_glUniform3i = (PFNGLUNIFORM3IPROC)load("glUniform3i"); + glad_glUniform3iv = (PFNGLUNIFORM3IVPROC)load("glUniform3iv"); + glad_glUniform4f = (PFNGLUNIFORM4FPROC)load("glUniform4f"); + glad_glUniform4fv = (PFNGLUNIFORM4FVPROC)load("glUniform4fv"); + glad_glUniform4i = (PFNGLUNIFORM4IPROC)load("glUniform4i"); + glad_glUniform4iv = (PFNGLUNIFORM4IVPROC)load("glUniform4iv"); + glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC)load("glUniformMatrix2fv"); + glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC)load("glUniformMatrix3fv"); + glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC)load("glUniformMatrix4fv"); + glad_glUseProgram = (PFNGLUSEPROGRAMPROC)load("glUseProgram"); + glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC)load("glVertexAttrib1f"); + glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC)load("glVertexAttrib1fv"); + glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC)load("glVertexAttrib2f"); + glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC)load("glVertexAttrib2fv"); + glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC)load("glVertexAttrib3f"); + glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC)load("glVertexAttrib3fv"); + glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC)load("glVertexAttrib4f"); + glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC)load("glVertexAttrib4fv"); + glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC)load("glVertexAttribPointer"); + glad_glViewport = (PFNGLVIEWPORTPROC)load("glViewport"); +} +static int find_extensionsGLSC2(void) { + if (!get_exts()) return 0; + GLAD_GL_EXT_texture_compression_s3tc = has_ext("GL_EXT_texture_compression_s3tc"); + GLAD_GL_IMG_pvric_end_to_end_signature = has_ext("GL_IMG_pvric_end_to_end_signature"); + GLAD_GL_IMG_tile_region_protection = has_ext("GL_IMG_tile_region_protection"); + GLAD_GL_OES_depth24 = has_ext("GL_OES_depth24"); + GLAD_GL_OES_depth32 = has_ext("GL_OES_depth32"); + GLAD_GL_OES_rgb8_rgba8 = has_ext("GL_OES_rgb8_rgba8"); + GLAD_GL_OES_standard_derivatives = has_ext("GL_OES_standard_derivatives"); + free_exts(); + return 1; +} + +static void find_coreGLSC2(void) { + + /* Thank you @elmindreda + * https://github.com/elmindreda/greg/blob/master/templates/greg.c.in#L176 + * https://github.com/glfw/glfw/blob/master/src/context.c#L36 + */ + int i, major, minor; + + const char* version; + const char* prefixes[] = { + "OpenGL ES-CM ", + "OpenGL ES-CL ", + "OpenGL ES ", + NULL + }; + + version = (const char*) glGetString(GL_VERSION); + if (!version) return; + + for (i = 0; prefixes[i]; i++) { + const size_t length = strlen(prefixes[i]); + if (strncmp(version, prefixes[i], length) == 0) { + version += length; + break; + } + } + +/* PR #18 */ +#ifdef _MSC_VER + sscanf_s(version, "%d.%d", &major, &minor); +#else + sscanf(version, "%d.%d", &major, &minor); +#endif + + GLVersion.major = major; GLVersion.minor = minor; + max_loaded_major = major; max_loaded_minor = minor; + GLAD_GL_SC_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2; + if (GLVersion.major > 2 || (GLVersion.major >= 2 && GLVersion.minor >= 0)) { + max_loaded_major = 2; + max_loaded_minor = 0; + } +} + +int gladLoadGLSC2Loader(GLADloadproc load) { + GLVersion.major = 0; GLVersion.minor = 0; + glGetString = (PFNGLGETSTRINGPROC)load("glGetString"); + if(glGetString == NULL) return 0; + if(glGetString(GL_VERSION) == NULL) return 0; + find_coreGLSC2(); + load_GL_SC_VERSION_2_0(load); + + if (!find_extensionsGLSC2()) return 0; + return GLVersion.major != 0 || GLVersion.minor != 0; +} + diff --git a/Extra2D/src/graphics/opengl/gl_font_atlas.cpp b/Extra2D/src/graphics/opengl/gl_font_atlas.cpp index 6fd4dd4..24e5d29 100644 --- a/Extra2D/src/graphics/opengl/gl_font_atlas.cpp +++ b/Extra2D/src/graphics/opengl/gl_font_atlas.cpp @@ -1,6 +1,6 @@ #include -#include #include +#include #include #define STB_TRUETYPE_IMPLEMENTATION #include @@ -75,7 +75,10 @@ Vec2 GLFontAtlas::measureText(const std::string &text) { float height = getAscent() - getDescent(); float currentWidth = 0.0f; - for (char32_t codepoint : utf8ToUtf32(text)) { + std::u32string utf32_text; + utf32_text.resize(simdutf::utf32_length_from_utf8(text.data(), text.size())); + simdutf::convert_utf8_to_utf32(text.data(), text.size(), utf32_text.data()); + for (char32_t codepoint : utf32_text) { if (codepoint == '\n') { width = std::max(width, currentWidth); currentWidth = 0.0f; diff --git a/Extra2D/src/graphics/opengl/gl_renderer.cpp b/Extra2D/src/graphics/opengl/gl_renderer.cpp index 2be034c..7ca31f3 100644 --- a/Extra2D/src/graphics/opengl/gl_renderer.cpp +++ b/Extra2D/src/graphics/opengl/gl_renderer.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -448,7 +448,10 @@ void GLRenderer::drawText(const FontAtlas &font, const std::string &text, std::vector sprites; sprites.reserve(text.size()); // 预分配空间 - for (char32_t codepoint : utf8ToUtf32(text)) { + std::u32string utf32_text; + utf32_text.resize(simdutf::utf32_length_from_utf8(text.data(), text.size())); + simdutf::convert_utf8_to_utf32(text.data(), text.size(), utf32_text.data()); + for (char32_t codepoint : utf32_text) { if (codepoint == '\n') { cursorX = x; cursorY += font.getLineHeight(); diff --git a/Extra2D/src/services/asset_service.cpp b/Extra2D/src/services/asset_service.cpp new file mode 100644 index 0000000..f04cf60 --- /dev/null +++ b/Extra2D/src/services/asset_service.cpp @@ -0,0 +1,332 @@ +#include "extra2d/services/asset_service.h" + +#include + +namespace extra2d { + +// --------------------------------------------------------------------------- +// AssetService 实现 +// --------------------------------------------------------------------------- + +AssetService::AssetService() : cache_(ptr::makeUnique()) {} + +AssetService::~AssetService() { shutdown(); } + +bool AssetService::init() { + if (initialized()) { + return true; + } + + setState(ServiceState::Initializing); + + registerLoader(AssetLoaderFactory::createTextureLoader()); + registerLoader(AssetLoaderFactory::createFontLoader()); + registerLoader(AssetLoaderFactory::createShaderLoader()); + registerLoader(AssetLoaderFactory::createAudioLoader()); + registerLoader(AssetLoaderFactory::createDataLoader()); + + running_ = true; + workerThread_ = std::thread(&AssetService::workerFunc, this); + + setState(ServiceState::Running); + return true; +} + +void AssetService::shutdown() { + if (!initialized()) { + return; + } + + setState(ServiceState::Stopping); + + running_ = false; + taskCv_.notify_all(); + + if (workerThread_.joinable()) { + workerThread_.join(); + } + + clear(); + packManager_.unmountAll(); + loaders_.clear(); + + setState(ServiceState::Stopped); +} + +bool AssetService::isLoaded(const std::string &path) const { + std::shared_lock lock(mutex_); + auto it = states_.find(AssetID(path)); + return it != states_.end() && it->second == AssetState::Loaded; +} + +bool AssetService::isLoading(const std::string &path) const { + std::shared_lock lock(mutex_); + auto it = states_.find(AssetID(path)); + return it != states_.end() && it->second == AssetState::Loading; +} + +void AssetService::unload(const std::string &path) { + std::unique_lock lock(mutex_); + AssetID id(path); + assets_.erase(id); + states_.erase(id); +} + +void AssetService::setLimit(size_t maxBytes) { cache_->setLimit(maxBytes); } + +size_t AssetService::size() const { return cache_->size(); } + +void AssetService::purge() { + std::unique_lock lock(mutex_); + + std::vector toRemove; + for (const auto &[id, loaded] : assets_) { + if (loaded.asset.use_count() <= 2) { + toRemove.push_back(id); + } + } + for (const auto &id : toRemove) { + assets_.erase(id); + states_.erase(id); + } + + cache_->purge(); +} + +void AssetService::clear() { + std::unique_lock lock(mutex_); + assets_.clear(); + states_.clear(); + cache_->clear(); +} + +CacheStats AssetService::stats() const { return cache_->stats(); } + +bool AssetService::mount(const std::string &path) { + return packManager_.mount(path); +} + +void AssetService::unmount(const std::string &path) { + packManager_.unmount(path); +} + +void AssetService::setPipe(DataPipe pipe) { pipe_ = std::move(pipe); } + +void AssetService::setRoot(const std::string &path) { root_ = path; } + +std::string AssetService::root() const { return root_; } + +void AssetService::process() { + std::queue> callbacks; + { + std::lock_guard lock(callbackMutex_); + callbacks = std::move(callbackQueue_); + callbackQueue_ = {}; + } + + while (!callbacks.empty()) { + callbacks.front()(); + callbacks.pop(); + } +} + +AssetHandleBase AssetService::loadImpl(const AssetID &id, + std::type_index type) { + { + std::shared_lock lock(mutex_); + auto it = assets_.find(id); + if (it != assets_.end()) { + cache_->hit(); + return AssetHandleBase(id, it->second.asset); + } + } + + cache_->miss(); + + Ref asset = loadFromFile(id, type); + if (!asset) { + asset = loadFromPack(id, type); + } + + if (!asset) { + return AssetHandleBase(); + } + + { + std::unique_lock lock(mutex_); + assets_[id] = {asset, type}; + states_[id] = AssetState::Loaded; + } + + cache_->add(asset); + return AssetHandleBase(id, asset); +} + +void AssetService::loadAsyncImpl( + const AssetID &id, std::type_index type, + std::function callback) { + { + std::shared_lock lock(mutex_); + auto it = assets_.find(id); + if (it != assets_.end()) { + cache_->hit(); + callback(AssetHandleBase(id, it->second.asset)); + return; + } + } + + cache_->miss(); + + { + std::unique_lock lock(mutex_); + states_[id] = AssetState::Loading; + } + + { + std::lock_guard lock(taskMutex_); + taskQueue_.push({id, type, std::move(callback)}); + } + taskCv_.notify_one(); +} + +AssetHandleBase AssetService::getImpl(const AssetID &id, std::type_index type) { + (void)type; + std::shared_lock lock(mutex_); + auto it = assets_.find(id); + if (it != assets_.end()) { + cache_->hit(); + return AssetHandleBase(id, it->second.asset); + } + + cache_->miss(); + return AssetHandleBase(); +} + +void AssetService::preloadImpl(const AssetID &id, std::type_index type) { + { + std::shared_lock lock(mutex_); + if (assets_.find(id) != assets_.end()) { + return; + } + } + + { + std::lock_guard lock(taskMutex_); + taskQueue_.push({id, type, nullptr}); + } + taskCv_.notify_one(); +} + +void AssetService::registerLoaderImpl(std::type_index type, + Unique loader) { + std::unique_lock lock(mutex_); + loaders_[type] = std::move(loader); +} + +void AssetService::workerFunc() { + while (running_) { + LoadTask task; + { + std::unique_lock lock(taskMutex_); + taskCv_.wait(lock, [this] { return !taskQueue_.empty() || !running_; }); + + if (!running_) { + break; + } + + if (taskQueue_.empty()) { + continue; + } + + task = std::move(taskQueue_.front()); + taskQueue_.pop(); + } + + Ref asset = loadFromFile(task.id, task.type); + if (!asset) { + asset = loadFromPack(task.id, task.type); + } + + if (asset) { + std::unique_lock lock(mutex_); + assets_[task.id] = {asset, task.type}; + states_[task.id] = AssetState::Loaded; + cache_->add(asset); + } else { + std::unique_lock lock(mutex_); + states_[task.id] = AssetState::Failed; + } + + if (task.callback) { + std::lock_guard lock(callbackMutex_); + callbackQueue_.push( + [task, asset]() { task.callback(AssetHandleBase(task.id, asset)); }); + } + } +} + +Ref AssetService::loadFromFile(const AssetID &id, std::type_index type) { + auto *loader = getLoader(type); + if (!loader) { + return nullptr; + } + + std::string fullPath = root_.empty() ? id.path : root_ + "/" + id.path; + + if (!std::filesystem::exists(fullPath)) { + return nullptr; + } + + return loader->loadBase(fullPath); +} + +Ref AssetService::loadFromPack(const AssetID &id, std::type_index type) { + auto *loader = getLoader(type); + if (!loader) { + return nullptr; + } + + auto *pack = packManager_.find(id); + if (!pack) { + return nullptr; + } + + auto data = pack->read(id); + if (data.empty()) { + return nullptr; + } + + if (!pipe_.empty()) { + data = pipe_.process(data); + } + + return loader->loadFromMemoryBase(data.data(), data.size()); +} + +AssetLoaderBase *AssetService::getLoader(std::type_index type) { + auto it = loaders_.find(type); + return it != loaders_.end() ? it->second.get() : nullptr; +} + +std::type_index AssetService::inferType(const std::string &path) { + std::filesystem::path p(path); + std::string ext = p.extension().string(); + + AssetType type = AssetLoaderFactory::getTypeByExtension(ext); + switch (type) { + case AssetType::Texture: + return typeid(TextureAsset); + case AssetType::Font: + return typeid(FontAsset); + case AssetType::Shader: + return typeid(ShaderAsset); + case AssetType::Audio: + return typeid(AudioAsset); + case AssetType::Data: + return typeid(DataAsset); + default: + return typeid(DataAsset); + } +} + +} // namespace extra2d diff --git a/Extra2D/src/services/event_service.cpp b/Extra2D/src/services/event_service.cpp new file mode 100644 index 0000000..963b0d0 --- /dev/null +++ b/Extra2D/src/services/event_service.cpp @@ -0,0 +1,64 @@ +#include + +namespace extra2d { + +EventService::EventService() { + info_.name = "EventService"; + info_.priority = ServicePriority::Event; + info_.enabled = true; +} + +ServiceInfo EventService::info() const { return info_; } + +bool EventService::init() { + setState(ServiceState::Running); + return true; +} + +void EventService::shutdown() { + queue_.clear(); + dispatcher_.offAll(); + setState(ServiceState::Stopped); +} + +void EventService::update(f32 dt) { + if (state() == ServiceState::Running) { + process(); + } +} + +void EventService::push(const Event &event) { queue_.push(event); } + +void EventService::push(Event &&event) { queue_.push(std::move(event)); } + +bool EventService::poll(Event &event) { return queue_.poll(event); } + +ListenerID EventService::on(EventType type, EventDispatcher::EventFn fn) { + return dispatcher_.on(type, fn); +} + +void EventService::off(ListenerID id) { + dispatcher_.off(id); +} + +void EventService::offAll(EventType type) { + dispatcher_.offAll(type); +} + +void EventService::offAll() { dispatcher_.offAll(); } + +void EventService::dispatch(Event &event) { dispatcher_.dispatch(event); } + +void EventService::process() { dispatcher_.process(queue_); } + +size_t EventService::listenerCount(EventType type) const { + return dispatcher_.listenerCount(type); +} + +size_t EventService::totalListeners() const { + return dispatcher_.totalListeners(); +} + +size_t EventService::queueSize() const { return queue_.size(); } + +} // namespace extra2d diff --git a/Extra2D/src/services/logger_service.cpp b/Extra2D/src/services/logger_service.cpp new file mode 100644 index 0000000..71cf410 --- /dev/null +++ b/Extra2D/src/services/logger_service.cpp @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#endif + +namespace extra2d { + +#ifdef _WIN32 +static bool enableWindowsConsoleFeatures() { + bool success = true; + + HANDLE hOut = GetStdHandle(STD_OUTPUT_HANDLE); + if (hOut != INVALID_HANDLE_VALUE) { + DWORD dwMode = 0; + if (GetConsoleMode(hOut, &dwMode)) { + dwMode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING; + if (!SetConsoleMode(hOut, dwMode)) { + success = false; + } + } else { + success = false; + } + } + + SetConsoleOutputCP(CP_UTF8); + SetConsoleCP(CP_UTF8); + + return success; +} + +static bool g_windowsConsoleInitialized = false; +#endif + +class ConsoleLogger::Impl { +public: + std::mutex mutex_; +}; + +ConsoleLogger::ConsoleLogger() + : level_(LogLevel::Info), colors_(true), + impl_(std::make_unique()) { + info_.name = "ConsoleLogger"; + info_.priority = ServicePriority::Core; + + levelColors_[static_cast(LogLevel::Trace)] = LogColor::Gray(); + levelColors_[static_cast(LogLevel::Debug)] = LogColor::Cyan(); + levelColors_[static_cast(LogLevel::Info)] = LogColor::SkyLight(); + levelColors_[static_cast(LogLevel::Registry)] = LogColor::IndigoLight(); + levelColors_[static_cast(LogLevel::Warn)] = LogColor::Yellow(); + levelColors_[static_cast(LogLevel::Error)] = LogColor::Red(); + levelColors_[static_cast(LogLevel::Fatal)] = LogColor::Magenta(); + +#ifdef _WIN32 + if (!g_windowsConsoleInitialized) { + g_windowsConsoleInitialized = enableWindowsConsoleFeatures(); + } +#endif +} + +ConsoleLogger::~ConsoleLogger() = default; + +bool ConsoleLogger::init() { + setState(ServiceState::Running); + return true; +} + +void ConsoleLogger::shutdown() { setState(ServiceState::Stopped); } + +void ConsoleLogger::level(LogLevel lvl) { level_ = lvl; } + +LogLevel ConsoleLogger::level() const { return level_; } + +bool ConsoleLogger::enabled(LogLevel lvl) const { + return static_cast(lvl) >= static_cast(level_); +} + +void ConsoleLogger::log(LogLevel lvl, const char *fmt, ...) { + if (!enabled(lvl)) + return; + + char buffer[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + + output(lvl, buffer); +} + +void ConsoleLogger::log(LogLevel lvl, const std::string &msg) { + if (!enabled(lvl)) + return; + output(lvl, msg.c_str()); +} + +void ConsoleLogger::trace(const char *fmt, ...) { + if (!enabled(LogLevel::Trace)) + return; + char buffer[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + output(LogLevel::Trace, buffer); +} + +void ConsoleLogger::debug(const char *fmt, ...) { + if (!enabled(LogLevel::Debug)) + return; + char buffer[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + output(LogLevel::Debug, buffer); +} + +void ConsoleLogger::info(const char *fmt, ...) { + if (!enabled(LogLevel::Info)) + return; + char buffer[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + output(LogLevel::Info, buffer); +} + +void ConsoleLogger::registry(const char *fmt, ...) { + if (!enabled(LogLevel::Registry)) + return; + char buffer[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + output(LogLevel::Registry, buffer); +} + +void ConsoleLogger::warn(const char *fmt, ...) { + if (!enabled(LogLevel::Warn)) + return; + char buffer[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + output(LogLevel::Warn, buffer); +} + +void ConsoleLogger::error(const char *fmt, ...) { + if (!enabled(LogLevel::Error)) + return; + char buffer[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + output(LogLevel::Error, buffer); +} + +void ConsoleLogger::fatal(const char *fmt, ...) { + if (!enabled(LogLevel::Fatal)) + return; + char buffer[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + output(LogLevel::Fatal, buffer); +} + +void ConsoleLogger::levelColor(LogLevel lvl, const LogColor &c) { + int idx = static_cast(lvl); + if (idx >= 0 && idx < 7) { + levelColors_[idx] = c; + } +} + +LogColor ConsoleLogger::levelColor(LogLevel lvl) const { + int idx = static_cast(lvl); + if (idx >= 0 && idx < 7) { + return levelColors_[idx]; + } + return LogColor::White(); +} + +void ConsoleLogger::colors(bool on) { colors_ = on; } + +bool ConsoleLogger::colors() const { return colors_; } + +std::string ConsoleLogger::ansiColor(LogLevel lvl) { + const LogColor &c = levelColor(lvl); + char buf[32]; + snprintf(buf, sizeof(buf), "\033[38;2;%d;%d;%dm", c.r, c.g, c.b); + return std::string(buf); +} + +void ConsoleLogger::output(LogLevel lvl, const char *msg) { + std::lock_guard lock(impl_->mutex_); + + auto now = std::chrono::system_clock::now(); + auto time = std::chrono::system_clock::to_time_t(now); + auto ms = std::chrono::duration_cast( + now.time_since_epoch()) % + 1000; + + std::tm tm; +#ifdef _WIN32 + localtime_s(&tm, &time); +#else + localtime_r(&time, &tm); +#endif + + const char *levelStr = levelString(lvl); + const char *reset = "\033[0m"; + + if (colors_) { + std::string color = ansiColor(lvl); + printf("%s[%02d:%02d:%02d.%03d] [%s] %s%s\n", color.c_str(), tm.tm_hour, + tm.tm_min, tm.tm_sec, (int)ms.count(), levelStr, msg, reset); + } else { + printf("[%02d:%02d:%02d.%03d] [%s] %s\n", tm.tm_hour, tm.tm_min, tm.tm_sec, + (int)ms.count(), levelStr, msg); + } +} + +const char *ConsoleLogger::levelString(LogLevel lvl) { + switch (lvl) { + case LogLevel::Trace: + return "TRACE"; + case LogLevel::Debug: + return "DEBUG"; + case LogLevel::Info: + return "INFO"; + case LogLevel::Registry: + return "REGISTRY"; + case LogLevel::Warn: + return "WARN"; + case LogLevel::Error: + return "ERROR"; + case LogLevel::Fatal: + return "FATAL"; + default: + return "UNKNOWN"; + } +} + +} // namespace extra2d diff --git a/Extra2D/src/services/timer_service.cpp b/Extra2D/src/services/timer_service.cpp new file mode 100644 index 0000000..9253660 --- /dev/null +++ b/Extra2D/src/services/timer_service.cpp @@ -0,0 +1,51 @@ +#include + +namespace extra2d { + +TimerService::TimerService() { + info_.name = "TimerService"; + info_.priority = ServicePriority::Timer; + info_.enabled = true; +} + +ServiceInfo TimerService::info() const { return info_; } + +bool TimerService::init() { + setState(ServiceState::Running); + return true; +} + +void TimerService::shutdown() { + mgr_.clear(); + setState(ServiceState::Stopped); +} + +void TimerService::update(f32 dt) { + if (state() == ServiceState::Running) { + mgr_.update(dt); + } +} + +u32 TimerService::add(f32 delay, Timer::Fn fn) { + return mgr_.add(delay, fn); +} + +u32 TimerService::addRepeat(f32 interval, Timer::Fn fn) { + return mgr_.addRepeat(interval, fn); +} + +void TimerService::cancel(u32 timerId) { + mgr_.cancel(timerId); +} + +void TimerService::pauseTimer(u32 timerId) { mgr_.pause(timerId); } + +void TimerService::resumeTimer(u32 timerId) { + mgr_.resume(timerId); +} + +void TimerService::clear() { mgr_.clear(); } + +size_t TimerService::count() const { return mgr_.count(); } + +} // namespace extra2d diff --git a/Extra2D/src/simdutf/simdutf.cpp b/Extra2D/src/simdutf/simdutf.cpp new file mode 100644 index 0000000..5a834c1 --- /dev/null +++ b/Extra2D/src/simdutf/simdutf.cpp @@ -0,0 +1,67273 @@ +/* auto-generated on 2026-01-13 09:03:21 +0100. Do not edit! */ +/* begin file src/simdutf.cpp */ +#include + +/* begin file src/encoding_types.cpp */ + +namespace simdutf { +std::string to_string(encoding_type bom) { + switch (bom) { + case UTF16_LE: + return "UTF16 little-endian"; + case UTF16_BE: + return "UTF16 big-endian"; + case UTF32_LE: + return "UTF32 little-endian"; + case UTF32_BE: + return "UTF32 big-endian"; + case UTF8: + return "UTF8"; + case unspecified: + return "unknown"; + default: + return "error"; + } +} + +namespace BOM { +// Note that BOM for UTF8 is discouraged. +encoding_type check_bom(const uint8_t *byte, size_t length) { + if (length >= 2 && byte[0] == 0xff and byte[1] == 0xfe) { + if (length >= 4 && byte[2] == 0x00 and byte[3] == 0x0) { + return encoding_type::UTF32_LE; + } else { + return encoding_type::UTF16_LE; + } + } else if (length >= 2 && byte[0] == 0xfe and byte[1] == 0xff) { + return encoding_type::UTF16_BE; + } else if (length >= 4 && byte[0] == 0x00 and byte[1] == 0x00 and + byte[2] == 0xfe and byte[3] == 0xff) { + return encoding_type::UTF32_BE; + } else if (length >= 3 && byte[0] == 0xef and byte[1] == 0xbb and + byte[2] == 0xbf) { + return encoding_type::UTF8; + } + return encoding_type::unspecified; +} + +encoding_type check_bom(const char *byte, size_t length) { + return check_bom(reinterpret_cast(byte), length); +} + +size_t bom_byte_size(encoding_type bom) { + switch (bom) { + case UTF16_LE: + return 2; + case UTF16_BE: + return 2; + case UTF32_LE: + return 4; + case UTF32_BE: + return 4; + case UTF8: + return 3; + case unspecified: + return 0; + default: + return 0; + } +} + +} // namespace BOM +} // namespace simdutf +/* end file src/encoding_types.cpp */ +/* begin file src/error.cpp */ +namespace simdutf { +// deliberately empty +} +/* end file src/error.cpp */ +// The large tables should be included once and they +// should not depend on a kernel. +/* begin file src/tables/utf8_to_utf16_tables.h */ +#ifndef SIMDUTF_UTF8_TO_UTF16_TABLES_H +#define SIMDUTF_UTF8_TO_UTF16_TABLES_H +#include + +namespace simdutf { +namespace { +namespace tables { +namespace utf8_to_utf16 { +/** + * utf8bigindex uses about 8 kB + * shufutf8 uses about 3344 B + * + * So we use a bit over 11 kB. It would be + * easy to save about 4 kB by only + * storing the index in utf8bigindex, and + * deriving the consumed bytes otherwise. + * However, this may come at a significant (10% to 20%) + * performance penalty. + */ + +const uint8_t shufutf8[209][16] = { + {0, 255, 1, 255, 2, 255, 3, 255, 4, 255, 5, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 3, 255, 4, 255, 6, 5, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 3, 255, 5, 4, 6, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 3, 255, 5, 4, 7, 6, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 4, 3, 5, 255, 6, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 4, 3, 5, 255, 7, 6, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 4, 3, 6, 5, 7, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 4, 3, 6, 5, 8, 7, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 6, 255, 7, 255, 8, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 6, 255, 7, 255, 9, 8, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 6, 255, 8, 7, 9, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 6, 255, 8, 7, 10, 9, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 7, 6, 8, 255, 9, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 7, 6, 8, 255, 10, 9, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 0, 0, 0, 0}, + {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 4, 255, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 5, 4, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 6, 5, 4, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 5, 255, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 6, 5, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 7, 6, 5, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 6, 255, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 7, 6, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 8, 7, 6, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 5, 255, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 6, 5, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 7, 6, 5, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 6, 255, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 7, 6, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 8, 7, 6, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 7, 255, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 8, 7, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 9, 8, 7, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 4, 255, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 5, 4, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 6, 5, 4, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 5, 255, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 6, 5, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 7, 6, 5, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 6, 255, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 7, 6, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 8, 7, 6, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 5, 255, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 6, 5, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 7, 6, 5, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 6, 255, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 7, 6, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 8, 7, 6, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 7, 255, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 8, 7, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 9, 8, 7, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 6, 255, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 7, 6, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 8, 7, 6, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 7, 255, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 8, 7, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 9, 8, 7, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 8, 255, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 9, 8, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 10, 9, 8, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 5, 255, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 6, 5, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 7, 6, 5, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 6, 255, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 7, 6, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 8, 7, 6, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 7, 255, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 8, 7, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 9, 8, 7, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 7, 6, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 8, 7, 6, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 7, 255, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 8, 7, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 9, 8, 7, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 8, 255, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 9, 8, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 10, 9, 8, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 7, 255, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 8, 7, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 9, 8, 7, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 8, 255, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 9, 8, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 10, 9, 8, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 9, 255, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 10, 9, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 11, 10, 9, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 1, 255, 255, 255, 5, 4, 3, 2, 0, 0, 0, 0}, + {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 2, 1, 255, 255, 6, 5, 4, 3, 0, 0, 0, 0}, + {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 3, 2, 1, 255, 7, 6, 5, 4, 0, 0, 0, 0}, + {0, 255, 255, 255, 4, 3, 2, 1, 5, 255, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 4, 3, 2, 1, 6, 5, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 4, 3, 2, 1, 7, 6, 5, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 4, 3, 2, 1, 8, 7, 6, 5, 0, 0, 0, 0}, + {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 2, 255, 255, 255, 6, 5, 4, 3, 0, 0, 0, 0}, + {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 3, 2, 255, 255, 7, 6, 5, 4, 0, 0, 0, 0}, + {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 4, 3, 2, 255, 8, 7, 6, 5, 0, 0, 0, 0}, + {1, 0, 255, 255, 5, 4, 3, 2, 6, 255, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 5, 4, 3, 2, 7, 6, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 5, 4, 3, 2, 8, 7, 6, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 5, 4, 3, 2, 9, 8, 7, 6, 0, 0, 0, 0}, + {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 3, 255, 255, 255, 7, 6, 5, 4, 0, 0, 0, 0}, + {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 4, 3, 255, 255, 8, 7, 6, 5, 0, 0, 0, 0}, + {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 5, 4, 3, 255, 9, 8, 7, 6, 0, 0, 0, 0}, + {2, 1, 0, 255, 6, 5, 4, 3, 7, 255, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 6, 5, 4, 3, 8, 7, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 6, 5, 4, 3, 9, 8, 7, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 6, 5, 4, 3, 10, 9, 8, 7, 0, 0, 0, 0}, + {3, 2, 1, 0, 4, 255, 255, 255, 5, 255, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 4, 255, 255, 255, 6, 5, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 4, 255, 255, 255, 7, 6, 5, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 4, 255, 255, 255, 8, 7, 6, 5, 0, 0, 0, 0}, + {3, 2, 1, 0, 5, 4, 255, 255, 6, 255, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 5, 4, 255, 255, 7, 6, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 5, 4, 255, 255, 8, 7, 6, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 5, 4, 255, 255, 9, 8, 7, 6, 0, 0, 0, 0}, + {3, 2, 1, 0, 6, 5, 4, 255, 7, 255, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 6, 5, 4, 255, 8, 7, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 6, 5, 4, 255, 9, 8, 7, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 6, 5, 4, 255, 10, 9, 8, 7, 0, 0, 0, 0}, + {3, 2, 1, 0, 7, 6, 5, 4, 8, 255, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 7, 6, 5, 4, 9, 8, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 7, 6, 5, 4, 10, 9, 8, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 0, 0, 0, 0}}; +/* number of two bytes : 64 */ +/* number of two + three bytes : 145 */ +/* number of two + three + four bytes : 209 */ +const uint8_t utf8bigindex[4096][2] = { + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, + {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, + {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, + {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, + {148, 6}, {209, 12}, {151, 6}, {163, 6}, {66, 6}, {209, 12}, {154, 6}, + {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, + {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, + {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {152, 7}, + {164, 7}, {145, 3}, {209, 12}, {155, 7}, {167, 7}, {69, 7}, {179, 7}, + {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {170, 7}, {71, 7}, + {182, 7}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, + {67, 5}, {119, 7}, {73, 5}, {91, 5}, {1, 7}, {209, 12}, {209, 12}, + {173, 7}, {148, 6}, {185, 7}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, + {85, 7}, {103, 7}, {68, 6}, {121, 7}, {74, 6}, {92, 6}, {2, 7}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {76, 6}, {94, 6}, + {4, 7}, {193, 6}, {82, 6}, {100, 6}, {8, 7}, {118, 6}, {16, 7}, + {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {145, 3}, {209, 12}, {156, 8}, {168, 8}, {146, 4}, + {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {171, 8}, + {72, 8}, {183, 8}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, + {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {186, 8}, {80, 8}, {98, 8}, {66, 6}, + {198, 8}, {86, 8}, {104, 8}, {68, 6}, {122, 8}, {74, 6}, {92, 6}, + {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {76, 6}, + {94, 6}, {5, 8}, {193, 6}, {82, 6}, {100, 6}, {9, 8}, {118, 6}, + {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, + {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, + {112, 8}, {71, 7}, {130, 8}, {77, 7}, {95, 7}, {6, 8}, {194, 7}, + {83, 7}, {101, 7}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, + {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, + {66, 6}, {197, 7}, {85, 7}, {103, 7}, {12, 8}, {121, 7}, {20, 8}, + {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, + {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, + {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, + {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, + {160, 9}, {172, 9}, {147, 5}, {184, 9}, {150, 5}, {162, 5}, {65, 5}, + {196, 9}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, + {64, 4}, {209, 12}, {209, 12}, {175, 9}, {148, 6}, {187, 9}, {81, 9}, + {99, 9}, {66, 6}, {199, 9}, {87, 9}, {105, 9}, {68, 6}, {123, 9}, + {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {111, 9}, {70, 6}, + {129, 9}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, + {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {190, 9}, {152, 7}, {164, 7}, {145, 3}, {202, 9}, + {89, 9}, {107, 9}, {69, 7}, {125, 9}, {75, 7}, {93, 7}, {64, 4}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, {131, 9}, {77, 7}, {95, 7}, + {7, 9}, {194, 7}, {83, 7}, {101, 7}, {11, 9}, {119, 7}, {19, 9}, + {35, 9}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {137, 9}, + {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, {13, 9}, + {121, 7}, {21, 9}, {37, 9}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, + {70, 6}, {127, 7}, {25, 9}, {41, 9}, {4, 7}, {193, 6}, {82, 6}, + {49, 9}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, + {205, 9}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, + {64, 4}, {209, 12}, {159, 8}, {115, 9}, {72, 8}, {133, 9}, {78, 8}, + {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, + {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, + {139, 9}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {104, 8}, + {14, 9}, {122, 8}, {22, 9}, {38, 9}, {3, 8}, {209, 12}, {157, 6}, + {110, 8}, {70, 6}, {128, 8}, {26, 9}, {42, 9}, {5, 8}, {193, 6}, + {82, 6}, {50, 9}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, + {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, + {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, + {28, 9}, {44, 9}, {6, 8}, {194, 7}, {83, 7}, {52, 9}, {10, 8}, + {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, + {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, + {56, 9}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, + {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, + {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, + {149, 4}, {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, + {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, + {176, 10}, {148, 6}, {188, 10}, {151, 6}, {163, 6}, {66, 6}, {200, 10}, + {154, 6}, {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, + {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, + {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {191, 10}, + {152, 7}, {164, 7}, {145, 3}, {203, 10}, {90, 10}, {108, 10}, {69, 7}, + {126, 10}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {114, 10}, + {71, 7}, {132, 10}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, + {101, 7}, {67, 5}, {119, 7}, {73, 5}, {91, 5}, {1, 7}, {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {138, 10}, {79, 7}, {97, 7}, {66, 6}, + {197, 7}, {85, 7}, {103, 7}, {68, 6}, {121, 7}, {74, 6}, {92, 6}, + {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {76, 6}, + {94, 6}, {4, 7}, {193, 6}, {82, 6}, {100, 6}, {8, 7}, {118, 6}, + {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {145, 3}, {206, 10}, {156, 8}, {168, 8}, + {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, + {116, 10}, {72, 8}, {134, 10}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, + {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, + {209, 12}, {209, 12}, {174, 8}, {148, 6}, {140, 10}, {80, 8}, {98, 8}, + {66, 6}, {198, 8}, {86, 8}, {104, 8}, {15, 10}, {122, 8}, {23, 10}, + {39, 10}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, + {27, 10}, {43, 10}, {5, 8}, {193, 6}, {82, 6}, {51, 10}, {9, 8}, + {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, + {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, + {158, 7}, {112, 8}, {71, 7}, {130, 8}, {29, 10}, {45, 10}, {6, 8}, + {194, 7}, {83, 7}, {53, 10}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, + {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, + {97, 7}, {66, 6}, {197, 7}, {85, 7}, {57, 10}, {12, 8}, {121, 7}, + {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, + {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, + {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, + {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, {184, 9}, {150, 5}, {162, 5}, + {65, 5}, {196, 9}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, + {91, 5}, {64, 4}, {209, 12}, {209, 12}, {175, 9}, {148, 6}, {142, 10}, + {81, 9}, {99, 9}, {66, 6}, {199, 9}, {87, 9}, {105, 9}, {68, 6}, + {123, 9}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {111, 9}, + {70, 6}, {129, 9}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, + {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {190, 9}, {152, 7}, {164, 7}, {145, 3}, + {202, 9}, {89, 9}, {107, 9}, {69, 7}, {125, 9}, {75, 7}, {93, 7}, + {64, 4}, {209, 12}, {158, 7}, {113, 9}, {71, 7}, {131, 9}, {30, 10}, + {46, 10}, {7, 9}, {194, 7}, {83, 7}, {54, 10}, {11, 9}, {119, 7}, + {19, 9}, {35, 9}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, + {137, 9}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {58, 10}, + {13, 9}, {121, 7}, {21, 9}, {37, 9}, {2, 7}, {209, 12}, {157, 6}, + {109, 7}, {70, 6}, {127, 7}, {25, 9}, {41, 9}, {4, 7}, {193, 6}, + {82, 6}, {49, 9}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {145, 3}, {205, 9}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, + {161, 4}, {64, 4}, {209, 12}, {159, 8}, {115, 9}, {72, 8}, {133, 9}, + {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, + {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, + {148, 6}, {139, 9}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, + {60, 10}, {14, 9}, {122, 8}, {22, 9}, {38, 9}, {3, 8}, {209, 12}, + {157, 6}, {110, 8}, {70, 6}, {128, 8}, {26, 9}, {42, 9}, {5, 8}, + {193, 6}, {82, 6}, {50, 9}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, + {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, + {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, + {130, 8}, {28, 9}, {44, 9}, {6, 8}, {194, 7}, {83, 7}, {52, 9}, + {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, + {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, + {85, 7}, {56, 9}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, + {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, + {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, + {147, 5}, {209, 12}, {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, + {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, + {209, 12}, {209, 12}, {148, 6}, {209, 12}, {151, 6}, {163, 6}, {66, 6}, + {209, 12}, {154, 6}, {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, + {64, 4}, {209, 12}, {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, + {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, + {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {192, 11}, {152, 7}, {164, 7}, {145, 3}, {204, 11}, {155, 7}, {167, 7}, + {69, 7}, {179, 7}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, + {170, 7}, {71, 7}, {182, 7}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, + {83, 7}, {101, 7}, {67, 5}, {119, 7}, {73, 5}, {91, 5}, {1, 7}, + {209, 12}, {209, 12}, {173, 7}, {148, 6}, {185, 7}, {79, 7}, {97, 7}, + {66, 6}, {197, 7}, {85, 7}, {103, 7}, {68, 6}, {121, 7}, {74, 6}, + {92, 6}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, + {76, 6}, {94, 6}, {4, 7}, {193, 6}, {82, 6}, {100, 6}, {8, 7}, + {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {207, 11}, {156, 8}, + {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, + {159, 8}, {117, 11}, {72, 8}, {135, 11}, {78, 8}, {96, 8}, {65, 5}, + {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, + {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, {141, 11}, {80, 8}, + {98, 8}, {66, 6}, {198, 8}, {86, 8}, {104, 8}, {68, 6}, {122, 8}, + {74, 6}, {92, 6}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, + {128, 8}, {76, 6}, {94, 6}, {5, 8}, {193, 6}, {82, 6}, {100, 6}, + {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, + {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, {77, 7}, {95, 7}, + {6, 8}, {194, 7}, {83, 7}, {101, 7}, {10, 8}, {119, 7}, {18, 8}, + {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, + {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, {12, 8}, + {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, + {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, + {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, + {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, + {64, 4}, {209, 12}, {160, 9}, {172, 9}, {147, 5}, {184, 9}, {150, 5}, + {162, 5}, {65, 5}, {196, 9}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, + {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {175, 9}, {148, 6}, + {143, 11}, {81, 9}, {99, 9}, {66, 6}, {199, 9}, {87, 9}, {105, 9}, + {68, 6}, {123, 9}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, + {111, 9}, {70, 6}, {129, 9}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, + {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {190, 9}, {152, 7}, {164, 7}, + {145, 3}, {202, 9}, {89, 9}, {107, 9}, {69, 7}, {125, 9}, {75, 7}, + {93, 7}, {64, 4}, {209, 12}, {158, 7}, {113, 9}, {71, 7}, {131, 9}, + {31, 11}, {47, 11}, {7, 9}, {194, 7}, {83, 7}, {55, 11}, {11, 9}, + {119, 7}, {19, 9}, {35, 9}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, + {148, 6}, {137, 9}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, + {59, 11}, {13, 9}, {121, 7}, {21, 9}, {37, 9}, {2, 7}, {209, 12}, + {157, 6}, {109, 7}, {70, 6}, {127, 7}, {25, 9}, {41, 9}, {4, 7}, + {193, 6}, {82, 6}, {49, 9}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, + {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {115, 9}, {72, 8}, + {133, 9}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, + {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, + {174, 8}, {148, 6}, {139, 9}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, + {86, 8}, {61, 11}, {14, 9}, {122, 8}, {22, 9}, {38, 9}, {3, 8}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {26, 9}, {42, 9}, + {5, 8}, {193, 6}, {82, 6}, {50, 9}, {9, 8}, {118, 6}, {17, 8}, + {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, + {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, + {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, + {71, 7}, {130, 8}, {28, 9}, {44, 9}, {6, 8}, {194, 7}, {83, 7}, + {52, 9}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, + {197, 7}, {85, 7}, {56, 9}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, + {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, + {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, + {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, + {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {209, 12}, + {209, 12}, {147, 5}, {209, 12}, {150, 5}, {162, 5}, {65, 5}, {209, 12}, + {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, + {209, 12}, {209, 12}, {176, 10}, {148, 6}, {188, 10}, {151, 6}, {163, 6}, + {66, 6}, {200, 10}, {154, 6}, {166, 6}, {68, 6}, {178, 6}, {74, 6}, + {92, 6}, {64, 4}, {209, 12}, {157, 6}, {169, 6}, {70, 6}, {181, 6}, + {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, + {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {191, 10}, {152, 7}, {164, 7}, {145, 3}, {203, 10}, {90, 10}, + {108, 10}, {69, 7}, {126, 10}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, + {158, 7}, {114, 10}, {71, 7}, {132, 10}, {77, 7}, {95, 7}, {65, 5}, + {194, 7}, {83, 7}, {101, 7}, {67, 5}, {119, 7}, {73, 5}, {91, 5}, + {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {138, 10}, {79, 7}, + {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, {68, 6}, {121, 7}, + {74, 6}, {92, 6}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, + {127, 7}, {76, 6}, {94, 6}, {4, 7}, {193, 6}, {82, 6}, {100, 6}, + {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {206, 10}, + {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, + {209, 12}, {159, 8}, {116, 10}, {72, 8}, {134, 10}, {78, 8}, {96, 8}, + {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, + {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, {140, 10}, + {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {62, 11}, {15, 10}, + {122, 8}, {23, 10}, {39, 10}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, + {70, 6}, {128, 8}, {27, 10}, {43, 10}, {5, 8}, {193, 6}, {82, 6}, + {51, 10}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, + {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, + {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, {29, 10}, + {45, 10}, {6, 8}, {194, 7}, {83, 7}, {53, 10}, {10, 8}, {119, 7}, + {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, + {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {57, 10}, + {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, + {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, + {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, + {161, 4}, {64, 4}, {209, 12}, {160, 9}, {172, 9}, {147, 5}, {184, 9}, + {150, 5}, {162, 5}, {65, 5}, {196, 9}, {153, 5}, {165, 5}, {67, 5}, + {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {175, 9}, + {148, 6}, {142, 10}, {81, 9}, {99, 9}, {66, 6}, {199, 9}, {87, 9}, + {105, 9}, {68, 6}, {123, 9}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, + {157, 6}, {111, 9}, {70, 6}, {129, 9}, {76, 6}, {94, 6}, {65, 5}, + {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {190, 9}, {152, 7}, + {164, 7}, {145, 3}, {202, 9}, {89, 9}, {107, 9}, {69, 7}, {125, 9}, + {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {113, 9}, {71, 7}, + {131, 9}, {30, 10}, {46, 10}, {7, 9}, {194, 7}, {83, 7}, {54, 10}, + {11, 9}, {119, 7}, {19, 9}, {35, 9}, {1, 7}, {209, 12}, {209, 12}, + {173, 7}, {148, 6}, {137, 9}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, + {85, 7}, {58, 10}, {13, 9}, {121, 7}, {21, 9}, {37, 9}, {2, 7}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {25, 9}, {41, 9}, + {4, 7}, {193, 6}, {82, 6}, {49, 9}, {8, 7}, {118, 6}, {16, 7}, + {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {145, 3}, {205, 9}, {156, 8}, {168, 8}, {146, 4}, + {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {115, 9}, + {72, 8}, {133, 9}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, + {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, {80, 8}, {98, 8}, {66, 6}, + {198, 8}, {86, 8}, {60, 10}, {14, 9}, {122, 8}, {22, 9}, {38, 9}, + {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {26, 9}, + {42, 9}, {5, 8}, {193, 6}, {82, 6}, {50, 9}, {9, 8}, {118, 6}, + {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, + {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, + {112, 8}, {71, 7}, {130, 8}, {28, 9}, {44, 9}, {6, 8}, {194, 7}, + {83, 7}, {52, 9}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, + {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, + {66, 6}, {197, 7}, {85, 7}, {56, 9}, {12, 8}, {121, 7}, {20, 8}, + {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, + {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, + {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, + {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, + {209, 12}, {209, 12}, {147, 5}, {209, 12}, {150, 5}, {162, 5}, {65, 5}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, + {64, 4}, {209, 12}, {209, 12}, {209, 12}, {148, 6}, {209, 12}, {151, 6}, + {163, 6}, {66, 6}, {209, 12}, {154, 6}, {166, 6}, {68, 6}, {178, 6}, + {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {169, 6}, {70, 6}, + {181, 6}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, + {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {152, 7}, {164, 7}, {145, 3}, {209, 12}, + {155, 7}, {167, 7}, {69, 7}, {179, 7}, {75, 7}, {93, 7}, {64, 4}, + {209, 12}, {158, 7}, {170, 7}, {71, 7}, {182, 7}, {77, 7}, {95, 7}, + {65, 5}, {194, 7}, {83, 7}, {101, 7}, {67, 5}, {119, 7}, {73, 5}, + {91, 5}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {185, 7}, + {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, {68, 6}, + {121, 7}, {74, 6}, {92, 6}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, + {70, 6}, {127, 7}, {76, 6}, {94, 6}, {4, 7}, {193, 6}, {82, 6}, + {100, 6}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, + {208, 12}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, + {64, 4}, {209, 12}, {159, 8}, {171, 8}, {72, 8}, {183, 8}, {78, 8}, + {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, + {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, + {186, 8}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {104, 8}, + {68, 6}, {122, 8}, {74, 6}, {92, 6}, {3, 8}, {209, 12}, {157, 6}, + {110, 8}, {70, 6}, {128, 8}, {76, 6}, {94, 6}, {5, 8}, {193, 6}, + {82, 6}, {100, 6}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, + {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, + {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, + {77, 7}, {95, 7}, {6, 8}, {194, 7}, {83, 7}, {101, 7}, {10, 8}, + {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, + {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, + {103, 7}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, + {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, + {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, + {149, 4}, {161, 4}, {64, 4}, {209, 12}, {160, 9}, {172, 9}, {147, 5}, + {184, 9}, {150, 5}, {162, 5}, {65, 5}, {196, 9}, {153, 5}, {165, 5}, + {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, + {175, 9}, {148, 6}, {144, 12}, {81, 9}, {99, 9}, {66, 6}, {199, 9}, + {87, 9}, {105, 9}, {68, 6}, {123, 9}, {74, 6}, {92, 6}, {64, 4}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, {129, 9}, {76, 6}, {94, 6}, + {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, + {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {190, 9}, + {152, 7}, {164, 7}, {145, 3}, {202, 9}, {89, 9}, {107, 9}, {69, 7}, + {125, 9}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {113, 9}, + {71, 7}, {131, 9}, {77, 7}, {95, 7}, {7, 9}, {194, 7}, {83, 7}, + {101, 7}, {11, 9}, {119, 7}, {19, 9}, {35, 9}, {1, 7}, {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, {79, 7}, {97, 7}, {66, 6}, + {197, 7}, {85, 7}, {103, 7}, {13, 9}, {121, 7}, {21, 9}, {37, 9}, + {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {25, 9}, + {41, 9}, {4, 7}, {193, 6}, {82, 6}, {49, 9}, {8, 7}, {118, 6}, + {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {145, 3}, {205, 9}, {156, 8}, {168, 8}, + {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, + {115, 9}, {72, 8}, {133, 9}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, + {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, + {209, 12}, {209, 12}, {174, 8}, {148, 6}, {139, 9}, {80, 8}, {98, 8}, + {66, 6}, {198, 8}, {86, 8}, {104, 8}, {14, 9}, {122, 8}, {22, 9}, + {38, 9}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, + {26, 9}, {42, 9}, {5, 8}, {193, 6}, {82, 6}, {50, 9}, {9, 8}, + {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, + {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, + {158, 7}, {112, 8}, {71, 7}, {130, 8}, {28, 9}, {44, 9}, {6, 8}, + {194, 7}, {83, 7}, {52, 9}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, + {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, + {97, 7}, {66, 6}, {197, 7}, {85, 7}, {56, 9}, {12, 8}, {121, 7}, + {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, + {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, + {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, + {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, + {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, {150, 5}, {162, 5}, + {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, + {91, 5}, {64, 4}, {209, 12}, {209, 12}, {176, 10}, {148, 6}, {188, 10}, + {151, 6}, {163, 6}, {66, 6}, {200, 10}, {154, 6}, {166, 6}, {68, 6}, + {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {169, 6}, + {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, + {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {191, 10}, {152, 7}, {164, 7}, {145, 3}, + {203, 10}, {90, 10}, {108, 10}, {69, 7}, {126, 10}, {75, 7}, {93, 7}, + {64, 4}, {209, 12}, {158, 7}, {114, 10}, {71, 7}, {132, 10}, {77, 7}, + {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, {67, 5}, {119, 7}, + {73, 5}, {91, 5}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, + {138, 10}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, + {68, 6}, {121, 7}, {74, 6}, {92, 6}, {2, 7}, {209, 12}, {157, 6}, + {109, 7}, {70, 6}, {127, 7}, {76, 6}, {94, 6}, {4, 7}, {193, 6}, + {82, 6}, {100, 6}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {145, 3}, {206, 10}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, + {161, 4}, {64, 4}, {209, 12}, {159, 8}, {116, 10}, {72, 8}, {134, 10}, + {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, + {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, + {148, 6}, {140, 10}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, + {63, 12}, {15, 10}, {122, 8}, {23, 10}, {39, 10}, {3, 8}, {209, 12}, + {157, 6}, {110, 8}, {70, 6}, {128, 8}, {27, 10}, {43, 10}, {5, 8}, + {193, 6}, {82, 6}, {51, 10}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, + {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, + {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, + {130, 8}, {29, 10}, {45, 10}, {6, 8}, {194, 7}, {83, 7}, {53, 10}, + {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, + {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, + {85, 7}, {57, 10}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, + {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, + {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {160, 9}, {172, 9}, + {147, 5}, {184, 9}, {150, 5}, {162, 5}, {65, 5}, {196, 9}, {153, 5}, + {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {142, 10}, {81, 9}, {99, 9}, {66, 6}, + {199, 9}, {87, 9}, {105, 9}, {68, 6}, {123, 9}, {74, 6}, {92, 6}, + {64, 4}, {209, 12}, {157, 6}, {111, 9}, {70, 6}, {129, 9}, {76, 6}, + {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, + {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {190, 9}, {152, 7}, {164, 7}, {145, 3}, {202, 9}, {89, 9}, {107, 9}, + {69, 7}, {125, 9}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, + {113, 9}, {71, 7}, {131, 9}, {30, 10}, {46, 10}, {7, 9}, {194, 7}, + {83, 7}, {54, 10}, {11, 9}, {119, 7}, {19, 9}, {35, 9}, {1, 7}, + {209, 12}, {209, 12}, {173, 7}, {148, 6}, {137, 9}, {79, 7}, {97, 7}, + {66, 6}, {197, 7}, {85, 7}, {58, 10}, {13, 9}, {121, 7}, {21, 9}, + {37, 9}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, + {25, 9}, {41, 9}, {4, 7}, {193, 6}, {82, 6}, {49, 9}, {8, 7}, + {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {205, 9}, {156, 8}, + {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, + {159, 8}, {115, 9}, {72, 8}, {133, 9}, {78, 8}, {96, 8}, {65, 5}, + {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, + {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, {139, 9}, {80, 8}, + {98, 8}, {66, 6}, {198, 8}, {86, 8}, {60, 10}, {14, 9}, {122, 8}, + {22, 9}, {38, 9}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, + {128, 8}, {26, 9}, {42, 9}, {5, 8}, {193, 6}, {82, 6}, {50, 9}, + {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, + {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, {28, 9}, {44, 9}, + {6, 8}, {194, 7}, {83, 7}, {52, 9}, {10, 8}, {119, 7}, {18, 8}, + {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, + {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {56, 9}, {12, 8}, + {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, + {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, + {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, + {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, + {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, {150, 5}, + {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, + {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {148, 6}, + {209, 12}, {151, 6}, {163, 6}, {66, 6}, {209, 12}, {154, 6}, {166, 6}, + {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, + {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, + {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {192, 11}, {152, 7}, {164, 7}, + {145, 3}, {204, 11}, {155, 7}, {167, 7}, {69, 7}, {179, 7}, {75, 7}, + {93, 7}, {64, 4}, {209, 12}, {158, 7}, {170, 7}, {71, 7}, {182, 7}, + {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, {67, 5}, + {119, 7}, {73, 5}, {91, 5}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, + {148, 6}, {185, 7}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, + {103, 7}, {68, 6}, {121, 7}, {74, 6}, {92, 6}, {2, 7}, {209, 12}, + {157, 6}, {109, 7}, {70, 6}, {127, 7}, {76, 6}, {94, 6}, {4, 7}, + {193, 6}, {82, 6}, {100, 6}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {145, 3}, {207, 11}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, + {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {117, 11}, {72, 8}, + {135, 11}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, + {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, + {174, 8}, {148, 6}, {141, 11}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, + {86, 8}, {104, 8}, {68, 6}, {122, 8}, {74, 6}, {92, 6}, {3, 8}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {76, 6}, {94, 6}, + {5, 8}, {193, 6}, {82, 6}, {100, 6}, {9, 8}, {118, 6}, {17, 8}, + {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, + {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, + {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, + {71, 7}, {130, 8}, {77, 7}, {95, 7}, {6, 8}, {194, 7}, {83, 7}, + {101, 7}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, + {197, 7}, {85, 7}, {103, 7}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, + {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, + {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, + {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, + {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {160, 9}, + {172, 9}, {147, 5}, {184, 9}, {150, 5}, {162, 5}, {65, 5}, {196, 9}, + {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, + {209, 12}, {209, 12}, {175, 9}, {148, 6}, {143, 11}, {81, 9}, {99, 9}, + {66, 6}, {199, 9}, {87, 9}, {105, 9}, {68, 6}, {123, 9}, {74, 6}, + {92, 6}, {64, 4}, {209, 12}, {157, 6}, {111, 9}, {70, 6}, {129, 9}, + {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, + {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, {145, 3}, {202, 9}, {89, 9}, + {107, 9}, {69, 7}, {125, 9}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, + {158, 7}, {113, 9}, {71, 7}, {131, 9}, {31, 11}, {47, 11}, {7, 9}, + {194, 7}, {83, 7}, {55, 11}, {11, 9}, {119, 7}, {19, 9}, {35, 9}, + {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {137, 9}, {79, 7}, + {97, 7}, {66, 6}, {197, 7}, {85, 7}, {59, 11}, {13, 9}, {121, 7}, + {21, 9}, {37, 9}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, + {127, 7}, {25, 9}, {41, 9}, {4, 7}, {193, 6}, {82, 6}, {49, 9}, + {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {205, 9}, + {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, {133, 9}, {78, 8}, {96, 8}, + {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, + {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, {139, 9}, + {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {61, 11}, {14, 9}, + {122, 8}, {22, 9}, {38, 9}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, + {70, 6}, {128, 8}, {26, 9}, {42, 9}, {5, 8}, {193, 6}, {82, 6}, + {50, 9}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, + {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, + {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, {28, 9}, + {44, 9}, {6, 8}, {194, 7}, {83, 7}, {52, 9}, {10, 8}, {119, 7}, + {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, + {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {56, 9}, + {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, + {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, + {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, + {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, + {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, + {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {176, 10}, + {148, 6}, {188, 10}, {151, 6}, {163, 6}, {66, 6}, {200, 10}, {154, 6}, + {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, + {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, + {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {191, 10}, {152, 7}, + {164, 7}, {145, 3}, {203, 10}, {90, 10}, {108, 10}, {69, 7}, {126, 10}, + {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {114, 10}, {71, 7}, + {132, 10}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, + {67, 5}, {119, 7}, {73, 5}, {91, 5}, {1, 7}, {209, 12}, {209, 12}, + {173, 7}, {148, 6}, {138, 10}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, + {85, 7}, {103, 7}, {68, 6}, {121, 7}, {74, 6}, {92, 6}, {2, 7}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {76, 6}, {94, 6}, + {4, 7}, {193, 6}, {82, 6}, {100, 6}, {8, 7}, {118, 6}, {16, 7}, + {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {145, 3}, {206, 10}, {156, 8}, {168, 8}, {146, 4}, + {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {116, 10}, + {72, 8}, {134, 10}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, + {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {140, 10}, {80, 8}, {98, 8}, {66, 6}, + {198, 8}, {86, 8}, {62, 11}, {15, 10}, {122, 8}, {23, 10}, {39, 10}, + {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {27, 10}, + {43, 10}, {5, 8}, {193, 6}, {82, 6}, {51, 10}, {9, 8}, {118, 6}, + {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, + {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, + {112, 8}, {71, 7}, {130, 8}, {29, 10}, {45, 10}, {6, 8}, {194, 7}, + {83, 7}, {53, 10}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, + {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, + {66, 6}, {197, 7}, {85, 7}, {57, 10}, {12, 8}, {121, 7}, {20, 8}, + {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, + {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, + {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, + {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, + {160, 9}, {172, 9}, {147, 5}, {184, 9}, {150, 5}, {162, 5}, {65, 5}, + {196, 9}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, + {64, 4}, {209, 12}, {209, 12}, {175, 9}, {148, 6}, {142, 10}, {81, 9}, + {99, 9}, {66, 6}, {199, 9}, {87, 9}, {105, 9}, {68, 6}, {123, 9}, + {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {111, 9}, {70, 6}, + {129, 9}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, + {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, + {209, 12}, {209, 12}, {190, 9}, {152, 7}, {164, 7}, {145, 3}, {202, 9}, + {89, 9}, {107, 9}, {69, 7}, {125, 9}, {75, 7}, {93, 7}, {64, 4}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, {131, 9}, {30, 10}, {46, 10}, + {7, 9}, {194, 7}, {83, 7}, {54, 10}, {11, 9}, {119, 7}, {19, 9}, + {35, 9}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {137, 9}, + {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {58, 10}, {13, 9}, + {121, 7}, {21, 9}, {37, 9}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, + {70, 6}, {127, 7}, {25, 9}, {41, 9}, {4, 7}, {193, 6}, {82, 6}, + {49, 9}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, + {205, 9}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, + {64, 4}, {209, 12}, {159, 8}, {115, 9}, {72, 8}, {133, 9}, {78, 8}, + {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, + {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, + {139, 9}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {60, 10}, + {14, 9}, {122, 8}, {22, 9}, {38, 9}, {3, 8}, {209, 12}, {157, 6}, + {110, 8}, {70, 6}, {128, 8}, {26, 9}, {42, 9}, {5, 8}, {193, 6}, + {82, 6}, {50, 9}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, + {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, + {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, + {28, 9}, {44, 9}, {6, 8}, {194, 7}, {83, 7}, {52, 9}, {10, 8}, + {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, + {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, + {56, 9}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, + {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, + {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, + {0, 6}}; +} // namespace utf8_to_utf16 +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // SIMDUTF_UTF8_TO_UTF16_TABLES_H +/* end file src/tables/utf8_to_utf16_tables.h */ +/* begin file src/tables/utf16_to_utf8_tables.h */ +// file generated by scripts/sse_convert_utf16_to_utf8.py +#ifndef SIMDUTF_UTF16_TO_UTF8_TABLES_H +#define SIMDUTF_UTF16_TO_UTF8_TABLES_H + +namespace simdutf { +namespace { +namespace tables { +namespace utf16_to_utf8 { + +// 1 byte for length, 16 bytes for mask +const uint8_t pack_1_2_utf8_bytes[256][17] = { + {16, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, + {15, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {15, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {15, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {15, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {15, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {11, 1, 0, 3, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 3, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 3, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 1, 0, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 0, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}}; + +// 1 byte for length, 16 bytes for mask +const uint8_t pack_1_2_3_utf8_bytes[256][17] = { + {12, 2, 3, 1, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80}, + {9, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {11, 3, 1, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 2, 3, 1, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {11, 2, 3, 1, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 3, 1, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 2, 3, 1, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 3, 1, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 2, 3, 1, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 1, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {11, 2, 3, 1, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 3, 1, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 2, 3, 1, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 2, 3, 1, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 2, 3, 1, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 0, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 2, 3, 1, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 1, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 2, 3, 1, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 2, 3, 1, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {2, 3, 1, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {5, 2, 3, 1, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 2, 3, 1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 3, 1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 0, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 1, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 2, 3, 1, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 3, 1, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 0, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 2, 3, 1, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 3, 1, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 0, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 2, 3, 1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 3, 1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 0, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 2, 3, 1, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 2, 3, 1, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {11, 2, 3, 1, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 3, 1, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 2, 3, 1, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 2, 3, 1, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 2, 3, 1, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 3, 1, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 0, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 2, 3, 1, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 2, 3, 1, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {10, 2, 3, 1, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 0, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 2, 3, 1, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 1, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 2, 3, 1, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 0, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 2, 3, 1, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 3, 1, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 0, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 2, 3, 1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 3, 1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 0, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 2, 3, 1, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 2, 3, 1, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {9, 2, 3, 1, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 1, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 2, 3, 1, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 1, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 2, 3, 1, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 3, 1, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 0, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 2, 3, 1, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}}; + +} // namespace utf16_to_utf8 +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // SIMDUTF_UTF16_TO_UTF8_TABLES_H +/* end file src/tables/utf16_to_utf8_tables.h */ +/* begin file src/tables/utf32_to_utf16_tables.h */ +// file generated by scripts/sse_convert_utf32_to_utf16.py +#ifndef SIMDUTF_UTF32_TO_UTF16_TABLES_H +#define SIMDUTF_UTF32_TO_UTF16_TABLES_H + +namespace simdutf { +namespace { +namespace tables { +namespace utf32_to_utf16 { + +const uint8_t pack_utf32_to_utf16le[16][16] = { + {0, 1, 4, 5, 8, 9, 12, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 0x80}, + {0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 14, 15, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 0x80, 0x80}, + {0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, 0x80, 0x80, 0x80, 0x80}, + {0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, 0x80, 0x80}, + {0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0x80, 0x80}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, +}; + +const uint8_t pack_utf32_to_utf16be[16][16] = { + {1, 0, 5, 4, 9, 8, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 3, 2, 5, 4, 9, 8, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 5, 4, 7, 6, 9, 8, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 13, 12, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 5, 4, 9, 8, 11, 10, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 3, 2, 5, 4, 9, 8, 11, 10, 13, 12, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 0x80, 0x80}, + {1, 0, 5, 4, 9, 8, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 3, 2, 5, 4, 9, 8, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 5, 4, 7, 6, 9, 8, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 13, 12, 15, 14, 0x80, 0x80}, + {1, 0, 5, 4, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {1, 0, 3, 2, 5, 4, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {1, 0, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, +}; + +} // namespace utf32_to_utf16 +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // SIMDUTF_UTF16_TO_UTF8_TABLES_H +/* end file src/tables/utf32_to_utf16_tables.h */ +// End of tables. + +// Implementations: they need to be setup before including +// scalar/* code, as the scalar code is sometimes enabled +// only for peculiar build targets. + +// The best choice should always come first! +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO +SIMDUTF_DISABLE_UNUSED_WARNING +#endif +/* begin file src/simdutf/arm64.h */ +#ifndef SIMDUTF_ARM64_H +#define SIMDUTF_ARM64_H + +#ifdef SIMDUTF_FALLBACK_H + #error "arm64.h must be included before fallback.h" +#endif + + +#ifndef SIMDUTF_IMPLEMENTATION_ARM64 + #define SIMDUTF_IMPLEMENTATION_ARM64 (SIMDUTF_IS_ARM64) +#endif +#if SIMDUTF_IMPLEMENTATION_ARM64 && SIMDUTF_IS_ARM64 + #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 0 +#endif + + +#if SIMDUTF_IMPLEMENTATION_ARM64 + +namespace simdutf { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 {} // namespace arm64 +} // namespace simdutf + +/* begin file src/simdutf/arm64/implementation.h */ +#ifndef SIMDUTF_ARM64_IMPLEMENTATION_H +#define SIMDUTF_ARM64_IMPLEMENTATION_H + + +namespace simdutf { +namespace arm64 { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("arm64", "ARM NEON", + internal::instruction_set::NEON) {} +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; + const char *find(const char *start, const char *end, + char character) const noexcept override; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept override; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace arm64 +} // namespace simdutf + +#endif // SIMDUTF_ARM64_IMPLEMENTATION_H +/* end file src/simdutf/arm64/implementation.h */ + +/* begin file src/simdutf/arm64/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "arm64" +// #define SIMDUTF_IMPLEMENTATION arm64 +#define SIMDUTF_SIMD_HAS_BYTEMASK 1 +/* end file src/simdutf/arm64/begin.h */ + + // Declarations +/* begin file src/simdutf/arm64/intrinsics.h */ +#ifndef SIMDUTF_ARM64_INTRINSICS_H +#define SIMDUTF_ARM64_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +#endif // SIMDUTF_ARM64_INTRINSICS_H +/* end file src/simdutf/arm64/intrinsics.h */ +/* begin file src/simdutf/arm64/bitmanipulation.h */ +#ifndef SIMDUTF_ARM64_BITMANIPULATION_H +#define SIMDUTF_ARM64_BITMANIPULATION_H + +namespace simdutf { +namespace arm64 { +namespace { + +/* result might be undefined when input_num is zero */ +simdutf_really_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; + #else // SIMDUTF_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); + #endif // SIMDUTF_REGULAR_VISUAL_STUDIO +} +#endif +template T clear_least_significant_bit(T x) { + return (x & (x - 1)); +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf + +#endif // SIMDUTF_ARM64_BITMANIPULATION_H +/* end file src/simdutf/arm64/bitmanipulation.h */ +/* begin file src/simdutf/arm64/simd.h */ +#ifndef SIMDUTF_ARM64_SIMD_H +#define SIMDUTF_ARM64_SIMD_H + +#include + +namespace simdutf { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +namespace { + // Start of private section with Visual Studio workaround + + #ifndef simdutf_make_uint8x16_t + #define simdutf_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, \ + x11, x12, x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) + #endif + #ifndef simdutf_make_int8x16_t + #define simdutf_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, \ + x11, x12, x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) + #endif + + #ifndef simdutf_make_uint8x8_t + #define simdutf_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) + #endif + #ifndef simdutf_make_int8x8_t + #define simdutf_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) + #endif + #ifndef simdutf_make_uint16x8_t + #define simdutf_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) + #endif + #ifndef simdutf_make_int16x8_t + #define simdutf_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) + #endif + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDUTF_REGULAR_VISUAL_STUDIO + +template struct simd8; + +// +// Base class of simd8 and simd8, both of which use uint8x16_t +// internally. +// +template > struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + void dump() const { +#ifdef SIMDUTF_LOGGING + uint8_t temp[16]; + vst1q_u8(temp, *this); + printf("[%04x, %04x, %04x, %04x, %04x, %04x, %04x, %04x,%04x, %04x, %04x, " + "%04x, %04x, %04x, %04x, %04x]\n", + temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], + temp[7], temp[8], temp[9], temp[10], temp[11], temp[12], temp[13], + temp[14], temp[15]); +#endif // SIMDUTF_LOGGING + } + // Conversion from/to SIMD register + simdutf_really_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdutf_really_inline operator const uint8x16_t &() const { + return this->value; + } + + // Bit operations + simdutf_really_inline simd8 operator|(const simd8 other) const { + return vorrq_u8(*this, other); + } + simdutf_really_inline simd8 operator&(const simd8 other) const { + return vandq_u8(*this, other); + } + simdutf_really_inline simd8 operator^(const simd8 other) const { + return veorq_u8(*this, other); + } + simdutf_really_inline simd8 &operator|=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; + } + + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return vceqq_u8(lhs, rhs); + } + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base_u8 { + static simdutf_really_inline simd8 splat(bool _value) { + return vmovq_n_u8(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8(const uint8x16_t _value) + : base_u8(_value) {} + // False constructor + simdutf_really_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} + simdutf_really_inline void store(uint8_t dst[16]) const { + return vst1q_u8(dst, *this); + } + + // We return uint32_t instead of uint16_t because that seems to be more + // efficient for most purposes (cutting it down to uint16_t costs performance + // in some compilers). + simdutf_really_inline uint32_t to_bitmask() const { +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. This method is expected to be faster than none() + // and is equivalent when the vector register is the result of a comparison, + // with byte values 0xff and 0x00. + simdutf_really_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } +}; + +// Unsigned bytes +template <> struct simd8 : base_u8 { + static simdutf_really_inline simd8 splat(uint8_t _value) { + return vmovq_n_u8(_value); + } + static simdutf_really_inline simd8 zero() { return vdupq_n_u8(0); } + static simdutf_really_inline simd8 load(const uint8_t *values) { + return vld1q_u8(values); + } + simdutf_really_inline simd8(const uint8x16_t _value) + : base_u8(_value) {} + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8(simdutf_make_uint8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15)) {} +#else + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8(uint8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Store to array + simdutf_really_inline void store(uint8_t dst[16]) const { + return vst1q_u8(dst, *this); + } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 + operator-(const simd8 other) const { + return vsubq_u8(*this, other); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *this; + } + + // Order-specific operations + simdutf_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return vcgeq_u8(*this, other); + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return vcgtq_u8(*this, other); + } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return simd8(*this > other); + } + + // Bit-specific operations + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return vtstq_u8(*this, bits); + } + + simdutf_really_inline bool is_ascii() const { + return this->max_val() < 0b10000000u; + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + return this->max_val() != 0; + } + template simdutf_really_inline simd8 shr() const { + return vshrq_n_u8(*this, N); + } + simdutf_really_inline uint16_t sum_bytes() const { return vaddvq_u8(*this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + return vqtbl1q_u8(*this, simd8(original)); + } +}; + +// Signed bytes +template <> struct simd8 { + int8x16_t value; + static const int SIZE = sizeof(value); + + static simdutf_really_inline simd8 splat(int8_t _value) { + return vmovq_n_s8(_value); + } + static simdutf_really_inline simd8 zero() { return vdupq_n_s8(0); } + static simdutf_really_inline simd8 load(const int8_t values[16]) { + return vld1q_s8(values); + } + + // Use ST2 instead of UXTL+UXTL2 to interleave zeroes. UXTL is actually a + // USHLL #0, and shifting in NEON is actually quite slow. + // + // While this needs the registers to be in a specific order, bigger cores can + // interleave these with no overhead, and it still performs decently on little + // cores. + // movi v1.3d, #0 + // mov v0.16b, value[0] + // st2 {v0.16b, v1.16b}, [ptr], #32 + // mov v0.16b, value[1] + // st2 {v0.16b, v1.16b}, [ptr], #32 + // ... + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + simdutf_constexpr auto matches = match_system(big_endian); + const int8x16x2_t pair = matches + ? int8x16x2_t{{this->value, vmovq_n_s8(0)}} + : int8x16x2_t{{vmovq_n_s8(0), this->value}}; + vst2q_s8(reinterpret_cast(p), pair); + } + + // In places where the table can be reused, which is most uses in simdutf, it + // is worth it to do 4 table lookups, as there is no direct zero extension + // from u8 to u32. + simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { + const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, + 2, 255, 255, 255, 3, 255, 255, 255}; + const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, + 6, 255, 255, 255, 7, 255, 255, 255}; + const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, + 10, 255, 255, 255, 11, 255, 255, 255}; + const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, + 14, 255, 255, 255, 15, 255, 255, 255}; + + // encourage store pairing and interleaving + const auto shuf1 = this->apply_lookup_16_to(tb1); + const auto shuf2 = this->apply_lookup_16_to(tb2); + shuf1.store(reinterpret_cast(p)); + shuf2.store(reinterpret_cast(p + 4)); + + const auto shuf3 = this->apply_lookup_16_to(tb3); + const auto shuf4 = this->apply_lookup_16_to(tb4); + shuf3.store(reinterpret_cast(p + 8)); + shuf4.store(reinterpret_cast(p + 12)); + } + // Conversion from/to SIMD register + simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {} + simdutf_really_inline operator const int8x16_t &() const { + return this->value; + } +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline operator const uint8x16_t() const { + return vreinterpretq_u8_s8(this->value); + } +#endif + simdutf_really_inline operator int8x16_t &() { return this->value; } + + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8(simdutf_make_int8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15)) {} +#else + simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8(int8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} +#endif + + // Store to array + simdutf_really_inline void store(int8_t dst[16]) const { + return vst1q_s8(dst, value); + } + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same + // type. In theory, we could check this occurrence with std::same_as and + // std::enabled_if but it is C++14 and relatively ugly and hard to read. +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline explicit simd8(const uint8x16_t other) + : simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdutf_really_inline operator simd8() const { + return vreinterpretq_u8_s8(this->value); + } + + simdutf_really_inline simd8 + operator|(const simd8 other) const { + return vorrq_s8(value, other.value); + } + + simdutf_really_inline int8_t max_val() const { return vmaxvq_s8(value); } + simdutf_really_inline int8_t min_val() const { return vminvq_s8(value); } + simdutf_really_inline bool is_ascii() const { return this->min_val() >= 0; } + + // Order-sensitive comparisons + simdutf_really_inline simd8 operator>(const simd8 other) const { + return vcgtq_s8(value, other.value); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return vcltq_s8(value, other.value); + } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + return vqtbl1q_s8(*this, simd8(original)); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "ARM kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + // Add each of the elements next to each other, successively, to stuff each + // 8 byte mask into one. + uint8x16_t sum0 = + vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[0]), bit_mask), + vandq_u8(uint8x16_t(this->chunks[1]), bit_mask)); + uint8x16_t sum1 = + vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[2]), bit_mask), + vandq_u8(uint8x16_t(this->chunks[3]), bit_mask)); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(uint8x16_t(this->chunks[0])) >= mask, + simd8(uint8x16_t(this->chunks[1])) >= mask, + simd8(uint8x16_t(this->chunks[2])) >= mask, + simd8(uint8x16_t(this->chunks[3])) >= mask) + .to_bitmask(); + } +}; // struct simd8x64 +/* begin file src/simdutf/arm64/simd16-inl.h */ +template struct simd16; + +template > struct base_u16 { + uint16x8_t value; + /// the size of vector in bytes + static const int SIZE = sizeof(value); + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); + // Conversion from/to SIMD register + simdutf_really_inline base_u16() = default; + simdutf_really_inline base_u16(const uint16x8_t _value) : value(_value) {} + simdutf_really_inline operator const uint16x8_t &() const { + return this->value; + } + simdutf_really_inline operator uint16x8_t &() { return this->value; } + // Bit operations + simdutf_really_inline simd16 operator|(const simd16 other) const { + return vorrq_u16(*this, other); + } + simdutf_really_inline simd16 operator&(const simd16 other) const { + return vandq_u16(*this, other); + } + simdutf_really_inline simd16 operator^(const simd16 other) const { + return veorq_u16(*this, other); + } + simdutf_really_inline simd16 bit_andnot(const simd16 other) const { + return vbicq_u16(*this, other); + } + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd16 &operator|=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline simd16 &operator&=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline simd16 &operator^=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } + + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return vceqq_u16(lhs, rhs); + } + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return vextq_u18(prev_chunk, *this, 8 - N); + } +}; + +template > +struct base16 : base_u16 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdutf_really_inline base16() : base_u16() {} + simdutf_really_inline base16(const uint16x8_t _value) : base_u16(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) : base16(vld1q_u16(ptr)) {} + + static const int SIZE = sizeof(base_u16::value); + void dump() const { +#ifdef SIMDUTF_LOGGING + uint16_t temp[8]; + vst1q_u16(temp, *this); + printf("[%04x, %04x, %04x, %04x, %04x, %04x, %04x, %04x]\n", temp[0], + temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], temp[7]); +#endif // SIMDUTF_LOGGING + } + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return vextq_u18(prev_chunk, *this, 8 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return vmovq_n_u16(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const uint16x8_t _value) + : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return vmovq_n_u16(_value); + } + static simdutf_really_inline simd16 zero() { return vdupq_n_u16(0); } + static simdutf_really_inline simd16 load(const T values[8]) { + return vld1q_u16(reinterpret_cast(values)); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const uint16x8_t _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return vst1q_u16(dst, *this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return vaddq_u16(*this, other); + } + simdutf_really_inline simd16 operator-(const simd16 other) const { + return vsubq_u16(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd16 &operator-=(const simd16 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; + +// Signed code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO + simdutf_really_inline simd16(const uint16x8_t _value) + : base16_numeric(_value) {} +#endif + simdutf_really_inline simd16(const int16x8_t _value) + : base16_numeric(vreinterpretq_u16_s16(_value)) {} + + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + simdutf_really_inline operator simd16() const; + simdutf_really_inline operator const uint16x8_t &() const { + return this->value; + } + simdutf_really_inline operator const int16x8_t() const { + return vreinterpretq_s16_u16(this->value); + } + + simdutf_really_inline int16_t max_val() const { + return vmaxvq_s16(vreinterpretq_s16_u16(this->value)); + } + simdutf_really_inline int16_t min_val() const { + return vminvq_s16(vreinterpretq_s16_u16(this->value)); + } + // Order-sensitive comparisons + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return vmaxq_s16(vreinterpretq_s16_u16(this->value), + vreinterpretq_s16_u16(other.value)); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return vmaxq_s16(vreinterpretq_s16_u16(this->value), + vreinterpretq_s16_u16(other.value)); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return vcgtq_s16(vreinterpretq_s16_u16(this->value), + vreinterpretq_s16_u16(other.value)); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return vcltq_s16(vreinterpretq_s16_u16(this->value), + vreinterpretq_s16_u16(other.value)); + } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const uint16x8_t _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + simdutf_really_inline int16_t max_val() const { return vmaxvq_u16(*this); } + simdutf_really_inline int16_t min_val() const { return vminvq_u16(*this); } + // Saturated math + simdutf_really_inline simd16 + saturating_add(const simd16 other) const { + return vqaddq_u16(*this, other); + } + simdutf_really_inline simd16 + saturating_sub(const simd16 other) const { + return vqsubq_u16(*this, other); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return vmaxq_u16(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return vminq_u16(*this, other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + gt_bits(const simd16 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + lt_bits(const simd16 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return vcleq_u16(*this, other); + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return vcgeq_u16(*this, other); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return vcgtq_u16(*this, other); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return vcltq_u16(*this, other); + } + + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + template simdutf_really_inline simd16 shr() const { + return simd16(vshrq_n_u16(*this, N)); + } + template simdutf_really_inline simd16 shl() const { + return simd16(vshlq_n_u16(*this, N)); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return vqmovn_high_u16(vqmovn_u16(v0), v1); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(*this))); + } + + void dump() const { + uint16_t temp[8]; + vst1q_u16(temp, *this); + printf("[%04x, %04x, %04x, %04x, %04x, %04x, %04x, %04x]\n", temp[0], + temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], temp[7]); + } + + simdutf_really_inline uint32_t sum() const { return vaddlvq_u16(value); } +}; + +simdutf_really_inline simd16::operator simd16() const { + return this->value; +} + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 4, + "ARM kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); + } + + simdutf_really_inline simd16 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); + this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); + this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + // Add each of the elements next to each other, successively, to stuff each + // 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8( + vreinterpretq_u8_u16(this->chunks[0] & vreinterpretq_u16_u8(bit_mask)), + vreinterpretq_u8_u16(this->chunks[1] & vreinterpretq_u16_u8(bit_mask))); + uint8x16_t sum1 = vpaddq_u8( + vreinterpretq_u8_u16(this->chunks[2] & vreinterpretq_u16_u8(bit_mask)), + vreinterpretq_u8_u16(this->chunks[3] & vreinterpretq_u16_u8(bit_mask))); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); + } + simdutf_really_inline uint64_t gt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + return simd16x32( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), + (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), + (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) + .to_bitmask(); + } +}; // struct simd16x32 +template <> +simdutf_really_inline uint64_t simd16x32::not_in_range( + const uint16_t low, const uint16_t high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + simd16x32 x(simd16((this->chunks[0] > mask_high) | + (this->chunks[0] < mask_low)), + simd16((this->chunks[1] > mask_high) | + (this->chunks[1] < mask_low)), + simd16((this->chunks[2] > mask_high) | + (this->chunks[2] < mask_low)), + simd16((this->chunks[3] > mask_high) | + (this->chunks[3] < mask_low))); + return x.to_bitmask(); +} + +simdutf_really_inline simd16 min(const simd16 a, + simd16 b) { + return vminq_u16(a.value, b.value); +} +/* end file src/simdutf/arm64/simd16-inl.h */ +/* begin file src/simdutf/arm64/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + static const size_t SIZE = sizeof(uint32x4_t); + static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + uint32x4_t value; + + simdutf_really_inline simd32(const uint32x4_t v) : value(v) {} + + template + simdutf_really_inline simd32(const Pointer *ptr) + : value(vld1q_u32(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { return vaddvq_u32(value); } + + simdutf_really_inline simd32 swap_bytes() const { + return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(value))); + } + + template simdutf_really_inline simd32 shr() const { + return vshrq_n_u32(value, N); + } + + template simdutf_really_inline simd32 shl() const { + return vshlq_n_u32(value, N); + } + + void dump() const { +#ifdef SIMDUTF_LOGGING + uint32_t temp[4]; + vst1q_u32(temp, value); + printf("[%08x, %08x, %08x, %08x]\n", temp[0], temp[1], temp[2], temp[3]); +#endif // SIMDUTF_LOGGING + } + + // operators + simdutf_really_inline simd32 &operator+=(const simd32 other) { + value = vaddq_u32(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd32 zero() { + return vdupq_n_u32(0); + } + + simdutf_really_inline static simd32 splat(uint32_t v) { + return vdupq_n_u32(v); + } +}; + +//---------------------------------------------------------------------- + +template <> struct simd32 { + uint32x4_t value; + + simdutf_really_inline simd32(const uint32x4_t v) : value(v) {} + + simdutf_really_inline bool any() const { return vmaxvq_u32(value) != 0; } +}; + +//---------------------------------------------------------------------- + +template +simdutf_really_inline simd32 operator|(const simd32 a, + const simd32 b) { + return vorrq_u32(a.value, b.value); +} + +simdutf_really_inline simd32 min(const simd32 a, + const simd32 b) { + return vminq_u32(a.value, b.value); +} + +simdutf_really_inline simd32 max(const simd32 a, + const simd32 b) { + return vmaxq_u32(a.value, b.value); +} + +simdutf_really_inline simd32 operator==(const simd32 a, + uint32_t b) { + return vceqq_u32(a.value, vdupq_n_u32(b)); +} + +simdutf_really_inline simd32 operator&(const simd32 a, + const simd32 b) { + return vandq_u32(a.value, b.value); +} + +simdutf_really_inline simd32 operator&(const simd32 a, + uint32_t b) { + return vandq_u32(a.value, vdupq_n_u32(b)); +} + +simdutf_really_inline simd32 operator|(const simd32 a, + uint32_t b) { + return vorrq_u32(a.value, vdupq_n_u32(b)); +} + +simdutf_really_inline simd32 operator+(const simd32 a, + const simd32 b) { + return vaddq_u32(a.value, b.value); +} + +simdutf_really_inline simd32 operator-(const simd32 a, + uint32_t b) { + return vsubq_u32(a.value, vdupq_n_u32(b)); +} + +simdutf_really_inline simd32 operator>=(const simd32 a, + const simd32 b) { + return vcgeq_u32(a.value, b.value); +} + +simdutf_really_inline simd32 operator!(const simd32 v) { + return vmvnq_u32(v.value); +} + +simdutf_really_inline simd32 operator>(const simd32 a, + const simd32 b) { + return vcgtq_u32(a.value, b.value); +} + +simdutf_really_inline simd32 select(const simd32 cond, + const simd32 v_true, + const simd32 v_false) { + return vbslq_u32(cond.value, v_true.value, v_false.value); +} +/* end file src/simdutf/arm64/simd32-inl.h */ +/* begin file src/simdutf/arm64/simd64-inl.h */ +template struct simd64; + +template <> struct simd64 { + uint64x2_t value; + + simdutf_really_inline simd64(const uint64x2_t v) : value(v) {} + + template + simdutf_really_inline simd64(const Pointer *ptr) + : value(vld1q_u64(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { return vaddvq_u64(value); } + + // operators + simdutf_really_inline simd64 &operator+=(const simd64 other) { + value = vaddq_u64(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd64 zero() { + return vdupq_n_u64(0); + } + + simdutf_really_inline static simd64 splat(uint64_t v) { + return vdupq_n_u64(v); + } +}; +/* end file src/simdutf/arm64/simd64-inl.h */ + +simdutf_really_inline simd64 sum_8bytes(const simd8 v) { + // We do it as 3 instructions. There might be a faster way. + // We hope that these 3 instructions are cheap. + uint16x8_t first_sum = vpaddlq_u8(v); + uint32x4_t second_sum = vpaddlq_u16(first_sum); + return vpaddlq_u32(second_sum); +} + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf + +#endif // SIMDUTF_ARM64_SIMD_H +/* end file src/simdutf/arm64/simd.h */ + +/* begin file src/simdutf/arm64/end.h */ +#undef SIMDUTF_SIMD_HAS_BYTEMASK +/* end file src/simdutf/arm64/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_ARM64 + +#endif // SIMDUTF_ARM64_H +/* end file src/simdutf/arm64.h */ +/* begin file src/simdutf/icelake.h */ +#ifndef SIMDUTF_ICELAKE_H +#define SIMDUTF_ICELAKE_H + + +#ifdef __has_include + // How do we detect that a compiler supports vbmi2? + // For sure if the following header is found, we are ok? + #if __has_include() + #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 + #endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1930 + // Visual Studio 2022 and up support VBMI2 under x64 even if the header + // avx512vbmi2intrin.h is not found. + // Visual Studio 2019 technically supports VBMI2, but the implementation + // might be unreliable. Search for visualstudio2019icelakeissue in our + // tests. + #ifndef SIMDUTF_COMPILER_SUPPORTS_VBMI2 + #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 + #endif + #endif +#endif + +#if SIMDUTF_GCC9OROLDER && SIMDUTF_IS_X86_64 + #define SIMDUTF_IMPLEMENTATION_ICELAKE 0 + #warning \ + "You are using a legacy GCC compiler, we are disabling AVX-512 support" +#endif + +// We allow icelake on x64 as long as the compiler is known to support VBMI2. +#ifndef SIMDUTF_IMPLEMENTATION_ICELAKE + #define SIMDUTF_IMPLEMENTATION_ICELAKE \ + ((SIMDUTF_IS_X86_64) && (SIMDUTF_COMPILER_SUPPORTS_VBMI2)) +#endif + +// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdutf/simdutf/issues/1247 +#if ((SIMDUTF_IMPLEMENTATION_ICELAKE) && (SIMDUTF_IS_X86_64) && (__AVX2__) && \ + (SIMDUTF_HAS_AVX512F && SIMDUTF_HAS_AVX512DQ && SIMDUTF_HAS_AVX512VL && \ + SIMDUTF_HAS_AVX512VBMI2) && \ + (!SIMDUTF_IS_32BITS)) + #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#if SIMDUTF_IMPLEMENTATION_ICELAKE + #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE + #define SIMDUTF_TARGET_ICELAKE + #else + #define SIMDUTF_TARGET_ICELAKE \ + SIMDUTF_TARGET_REGION( \ + "avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2," \ + "avx512vl,avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512vpopcntdq") + #endif + +namespace simdutf { +namespace icelake {} // namespace icelake +} // namespace simdutf + + // + // These two need to be included outside SIMDUTF_TARGET_REGION + // +/* begin file src/simdutf/icelake/intrinsics.h */ +#ifndef SIMDUTF_ICELAKE_INTRINSICS_H +#define SIMDUTF_ICELAKE_INTRINSICS_H + + +#ifdef SIMDUTF_VISUAL_STUDIO + // under clang within visual studio, this will include + #include // visual studio or clang + #include +#else + + #if SIMDUTF_GCC11ORMORE +// We should not get warnings while including yet we do +// under some versions of GCC. +// If the x86intrin.h header has uninitialized values that are problematic, +// it is a GCC issue, we want to ignore these warnings. +SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) + #endif + + #include // elsewhere + + #if SIMDUTF_GCC11ORMORE +// cancels the suppression of the -Wuninitialized +SIMDUTF_POP_DISABLE_WARNINGS + #endif + + #ifndef _tzcnt_u64 + #define _tzcnt_u64(x) __tzcnt_u64(x) + #endif // _tzcnt_u64 +#endif // SIMDUTF_VISUAL_STUDIO + +#ifdef SIMDUTF_CLANG_VISUAL_STUDIO + /** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdutf, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ + #include // for _blsr_u64 + #include // for _pext_u64, _pdep_u64 + #include // for __lzcnt64 + #include // for most things (AVX2, AVX512, _popcnt64) + #include + #include + #include + #include + // Important: we need the AVX-512 headers: + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + // unfortunately, we may not get _blsr_u64, but, thankfully, clang + // has it as a macro. + #ifndef _blsr_u64 + // we roll our own + #define _blsr_u64(n) ((n - 1) & n) + #endif // _blsr_u64 +#endif // SIMDUTF_CLANG_VISUAL_STUDIO + +#if defined(__GNUC__) && !defined(__clang__) + + #if __GNUC__ == 8 + #define SIMDUTF_GCC8 1 + #elif __GNUC__ == 9 + #define SIMDUTF_GCC9 1 + #endif // __GNUC__ == 8 || __GNUC__ == 9 + +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDUTF_GCC8 + #pragma GCC push_options + #pragma GCC target("avx512f") +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i +_mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, + uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, + uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, + uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, + uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, + uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, + uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, + uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, + uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, + uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, + uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, + uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, + uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64( + uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + + (uint64_t(a56) << 56)); +} + #pragma GCC pop_options +#endif // SIMDUTF_GCC8 + +#endif // SIMDUTF_HASWELL_INTRINSICS_H +/* end file src/simdutf/icelake/intrinsics.h */ +/* begin file src/simdutf/icelake/implementation.h */ +#ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H +#define SIMDUTF_ICELAKE_IMPLEMENTATION_H + + +namespace simdutf { +namespace icelake { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation( + "icelake", + "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 " + "extensions)", + internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | + internal::instruction_set::BMI2 | + internal::instruction_set::AVX512BW | + internal::instruction_set::AVX512CD | + internal::instruction_set::AVX512VL | + internal::instruction_set::AVX512VBMI2 | + internal::instruction_set::AVX512VPOPCNTDQ) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; + const char *find(const char *start, const char *end, + char character) const noexcept override; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept override; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace icelake +} // namespace simdutf + +#endif // SIMDUTF_ICELAKE_IMPLEMENTATION_H +/* end file src/simdutf/icelake/implementation.h */ + + // + // The rest need to be inside the region + // +/* begin file src/simdutf/icelake/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "icelake" +// #define SIMDUTF_IMPLEMENTATION icelake + +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_TARGET_ICELAKE +#endif + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +// clang-format off +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +// clang-format on +#endif // end of workaround +/* end file src/simdutf/icelake/begin.h */ + // Declarations +/* begin file src/simdutf/icelake/bitmanipulation.h */ +#ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H +#define SIMDUTF_ICELAKE_BITMANIPULATION_H + +namespace simdutf { +namespace icelake { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline unsigned __int64 count_ones32(uint32_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline long long int count_ones32(uint32_t input_num) { + return _popcnt32(input_num); +} +#endif + +#if SIMDUTF_NEED_TRAILING_ZEROES +// simdutf_really_inline int trailing_zeroes(uint64_t input_num) { +// #if SIMDUTF_REGULAR_VISUAL_STUDIO +// return (int)_tzcnt_u64(input_num); +// #else // SIMDUTF_REGULAR_VISUAL_STUDIO +// return __builtin_ctzll(input_num); +// #endif // SIMDUTF_REGULAR_VISUAL_STUDIO +// } +#endif + +} // unnamed namespace +} // namespace icelake +} // namespace simdutf + +#endif // SIMDUTF_ICELAKE_BITMANIPULATION_H +/* end file src/simdutf/icelake/bitmanipulation.h */ +/* begin file src/simdutf/icelake/simd.h */ +#ifndef SIMDUTF_ICELAKE_SIMD_H +#define SIMDUTF_ICELAKE_SIMD_H + +namespace simdutf { +namespace icelake { +namespace { +namespace simd { + +/* begin file src/simdutf/icelake/simd16-inl.h */ +template struct simd16; + +template <> struct simd16 { + static const size_t SIZE = sizeof(__m512i); + static const size_t ELEMENTS = SIZE / sizeof(uint16_t); + + template + static simdutf_really_inline simd16 load(const Pointer *ptr) { + return simd16(ptr); + } + + __m512i value; + + simdutf_really_inline simd16(const __m512i v) : value(v) {} + + template + simdutf_really_inline simd16(const Pointer *ptr) + : value(_mm512_loadu_si512(reinterpret_cast(ptr))) {} + + // operators + simdutf_really_inline simd16 &operator+=(const simd16 other) { + value = _mm512_add_epi32(value, other.value); + return *this; + } + + simdutf_really_inline simd16 &operator-=(const simd16 other) { + value = _mm512_sub_epi32(value, other.value); + return *this; + } + + // methods + simdutf_really_inline simd16 swap_bytes() const { + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + + return _mm512_shuffle_epi8(value, byteflip); + } + + simdutf_really_inline uint64_t sum() const { + const auto lo = _mm512_and_si512(value, _mm512_set1_epi32(0xffff)); + const auto hi = _mm512_srli_epi32(value, 16); + const auto sum32 = _mm512_add_epi32(lo, hi); + + return _mm512_reduce_add_epi32(sum32); + } + + // static members + simdutf_really_inline static simd16 zero() { + return _mm512_setzero_si512(); + } + + simdutf_really_inline static simd16 splat(uint16_t v) { + return _mm512_set1_epi16(v); + } +}; + +template <> struct simd16 { + __mmask32 value; + + simdutf_really_inline simd16(const __mmask32 v) : value(v) {} +}; + +// ------------------------------------------------------------ + +simdutf_really_inline simd16 min(const simd16 b, + const simd16 a) { + return _mm512_min_epu16(a.value, b.value); +} + +simdutf_really_inline simd16 operator&(const simd16 a, + uint16_t b) { + return _mm512_and_si512(a.value, _mm512_set1_epi16(b)); +} + +simdutf_really_inline simd16 operator^(const simd16 a, + uint16_t b) { + return _mm512_xor_si512(a.value, _mm512_set1_epi16(b)); +} + +simdutf_really_inline simd16 operator^(const simd16 a, + const simd16 b) { + return _mm512_xor_si512(a.value, b.value); +} + +simdutf_really_inline simd16 operator==(const simd16 a, + uint16_t b) { + return _mm512_cmpeq_epi16_mask(a.value, _mm512_set1_epi16(b)); +} +/* end file src/simdutf/icelake/simd16-inl.h */ +/* begin file src/simdutf/icelake/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + static const size_t SIZE = sizeof(__m512i); + static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m512i value; + + simdutf_really_inline simd32(const __m512i v) : value(v) {} + + template + simdutf_really_inline simd32(const Pointer *ptr) + : value(_mm512_loadu_si512(reinterpret_cast(ptr))) {} + + uint64_t sum() const { + const __m512i mask = _mm512_set1_epi64(0xffffffff); + const __m512i t0 = _mm512_and_si512(value, mask); + const __m512i t1 = _mm512_srli_epi64(value, 32); + const __m512i t2 = _mm512_add_epi64(t0, t1); + return _mm512_reduce_add_epi64(t2); + } + + // operators + simdutf_really_inline simd32 &operator+=(const simd32 other) { + value = _mm512_add_epi32(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd32 zero() { + return _mm512_setzero_si512(); + } + + simdutf_really_inline static simd32 splat(uint32_t v) { + return _mm512_set1_epi32(v); + } +}; + +simdutf_really_inline simd32 min(const simd32 b, + const simd32 a) { + return _mm512_min_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 operator&(const simd32 b, + const simd32 a) { + return _mm512_and_si512(a.value, b.value); +} +/* end file src/simdutf/icelake/simd32-inl.h */ + +} // namespace simd +} // unnamed namespace +} // namespace icelake +} // namespace simdutf + +#endif // SIMDUTF_ICELAKE_SIMD_H +/* end file src/simdutf/icelake/simd.h */ + +/* begin file src/simdutf/icelake/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/icelake/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_ICELAKE +#endif // SIMDUTF_ICELAKE_H +/* end file src/simdutf/icelake.h */ +/* begin file src/simdutf/haswell.h */ +#ifndef SIMDUTF_HASWELL_H +#define SIMDUTF_HASWELL_H + +#ifdef SIMDUTF_WESTMERE_H + #error "haswell.h must be included before westmere.h" +#endif +#ifdef SIMDUTF_FALLBACK_H + #error "haswell.h must be included before fallback.h" +#endif + + +// Default Haswell to on if this is x86-64. Even if we are not compiled for it, +// it could be selected at runtime. +#ifndef SIMDUTF_IMPLEMENTATION_HASWELL + // + // You do not want to restrict it like so: SIMDUTF_IS_X86_64 && __AVX2__ + // because we want to rely on *runtime dispatch*. + // + #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE + #define SIMDUTF_IMPLEMENTATION_HASWELL 0 + #else + #define SIMDUTF_IMPLEMENTATION_HASWELL (SIMDUTF_IS_X86_64) + #endif + +#endif +// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdutf/simdutf/issues/1247 +#if ((SIMDUTF_IMPLEMENTATION_HASWELL) && (SIMDUTF_IS_X86_64) && (__AVX2__)) + #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#if SIMDUTF_IMPLEMENTATION_HASWELL + + #define SIMDUTF_TARGET_HASWELL SIMDUTF_TARGET_REGION("avx2,bmi,lzcnt,popcnt") + +namespace simdutf { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell {} // namespace haswell +} // namespace simdutf + + // + // These two need to be included outside SIMDUTF_TARGET_REGION + // +/* begin file src/simdutf/haswell/implementation.h */ +#ifndef SIMDUTF_HASWELL_IMPLEMENTATION_H +#define SIMDUTF_HASWELL_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use +// SIMDUTF_TARGET_REGION +namespace simdutf { +namespace haswell { + +using namespace simdutf; + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("haswell", "Intel/AMD AVX2", + internal::instruction_set::AVX2 | + internal::instruction_set::BMI1 | + internal::instruction_set::BMI2) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; + const char *find(const char *start, const char *end, + char character) const noexcept override; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept override; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace haswell +} // namespace simdutf + +#endif // SIMDUTF_HASWELL_IMPLEMENTATION_H +/* end file src/simdutf/haswell/implementation.h */ +/* begin file src/simdutf/haswell/intrinsics.h */ +#ifndef SIMDUTF_HASWELL_INTRINSICS_H +#define SIMDUTF_HASWELL_INTRINSICS_H + + +#ifdef SIMDUTF_VISUAL_STUDIO + // under clang within visual studio, this will include + #include // visual studio or clang +#else + + #if SIMDUTF_GCC11ORMORE +// We should not get warnings while including yet we do +// under some versions of GCC. +// If the x86intrin.h header has uninitialized values that are problematic, +// it is a GCC issue, we want to ignore these warnings. +SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) + #endif + + #include // elsewhere + + #if SIMDUTF_GCC11ORMORE +// cancels the suppression of the -Wuninitialized +SIMDUTF_POP_DISABLE_WARNINGS + #endif + +#endif // SIMDUTF_VISUAL_STUDIO + +#ifdef SIMDUTF_CLANG_VISUAL_STUDIO + /** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdutf, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ + #include // for _blsr_u64 + #include // for __lzcnt64 + #include // for most things (AVX2, AVX512, _popcnt64) + #include + #include + #include + #include + // unfortunately, we may not get _blsr_u64, but, thankfully, clang + // has it as a macro. + #ifndef _blsr_u64 + // we roll our own + #define _blsr_u64(n) (((n) - 1) & (n)) + #endif // _blsr_u64 + // Same issue with _blsmsk_u32: + #ifndef _blsmsk_u32 + // we roll our own + #define _blsmsk_u32(n) (((n) - 1) ^ (n)) + #endif // _blsmsk_u32 +#endif // SIMDUTF_CLANG_VISUAL_STUDIO + +#endif // SIMDUTF_HASWELL_INTRINSICS_H +/* end file src/simdutf/haswell/intrinsics.h */ + + // + // The rest need to be inside the region + // +/* begin file src/simdutf/haswell/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "haswell" +// #define SIMDUTF_IMPLEMENTATION haswell +#define SIMDUTF_SIMD_HAS_BYTEMASK 1 + +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_TARGET_HASWELL +#endif + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +// clang-format off +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +// clang-format on +#endif // end of workaround +/* end file src/simdutf/haswell/begin.h */ + // Declarations +/* begin file src/simdutf/haswell/bitmanipulation.h */ +#ifndef SIMDUTF_HASWELL_BITMANIPULATION_H +#define SIMDUTF_HASWELL_BITMANIPULATION_H + +namespace simdutf { +namespace haswell { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + #if SIMDUTF_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); + #else // SIMDUTF_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); + #endif // SIMDUTF_REGULAR_VISUAL_STUDIO +} +#endif + +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +} // unnamed namespace +} // namespace haswell +} // namespace simdutf + +#endif // SIMDUTF_HASWELL_BITMANIPULATION_H +/* end file src/simdutf/haswell/bitmanipulation.h */ +/* begin file src/simdutf/haswell/simd.h */ +#ifndef SIMDUTF_HASWELL_SIMD_H +#define SIMDUTF_HASWELL_SIMD_H + +namespace simdutf { +namespace haswell { +namespace { +namespace simd { + +// Forward-declared so they can be used by splat and friends. +template struct base { + __m256i value; + + // Zero constructor + simdutf_really_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdutf_really_inline base(const __m256i _value) : value(_value) {} + + simdutf_really_inline operator const __m256i &() const { return this->value; } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + __m256i first = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(*this)); + __m256i second = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(*this, 1)); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + first = _mm256_shuffle_epi8(first, swap); + second = _mm256_shuffle_epi8(second, swap); + } + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), first); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 16), second); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), + _mm256_cvtepu8_epi32(_mm256_castsi256_si128(*this))); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 8), + _mm256_cvtepu8_epi32(_mm256_castsi256_si128( + _mm256_srli_si256(*this, 8)))); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(ptr + 16), + _mm256_cvtepu8_epi32(_mm256_extractf128_si256(*this, 1))); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 24), + _mm256_cvtepu8_epi32(_mm_srli_si128( + _mm256_extractf128_si256(*this, 1), 8))); + } + // Bit operations + simdutf_really_inline Child operator|(const Child other) const { + return _mm256_or_si256(*this, other); + } + simdutf_really_inline Child operator&(const Child other) const { + return _mm256_and_si256(*this, other); + } + simdutf_really_inline Child operator^(const Child other) const { + return _mm256_xor_si256(*this, other); + } + simdutf_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } +}; + +// Forward-declared so they can be used by splat and friends. +template struct simd8; + +template > +struct base8 : base> { + simdutf_really_inline base8() : base>() {} + + simdutf_really_inline base8(const __m256i _value) : base>(_value) {} + + friend simdutf_always_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return _mm256_cmpeq_epi8(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8( + *this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdutf_really_inline simd8 splat(bool _value) { + return _mm256_set1_epi8(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} + + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + + simdutf_really_inline uint32_t to_bitmask() const { + return uint32_t(_mm256_movemask_epi8(value)); + } +}; + +template struct base8_numeric : base8 { + static simdutf_really_inline simd8 splat(T _value) { + return _mm256_set1_epi8(_value); + } + static simdutf_really_inline simd8 zero() { + return _mm256_setzero_si256(); + } + static simdutf_really_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15); + } + + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const __m256i _value) + : base8(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[32]) const { + return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); + } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 operator-(const simd8 other) const { + return _mm256_sub_epi8(*this, other); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdutf_really_inline operator simd8() const; + + simdutf_really_inline bool is_ascii() const { + return _mm256_movemask_epi8(*this) == 0; + } + // Order-sensitive comparisons + simdutf_really_inline simd8 operator>(const simd8 other) const { + return _mm256_cmpgt_epi8(*this, other); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return _mm256_cmpgt_epi8(other, *this); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, + uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, + uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, + uint8_t v31) + : simd8(_mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, + v22, v23, v24, v25, v26, v27, v28, v29, v30, + v31)) {} + + // Saturated math + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return _mm256_subs_epu8(*this, other); + } + + // Order-specific operations + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return _mm256_min_epu8(other, *this); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + + // Bit-specific operations + simdutf_really_inline bool is_ascii() const { + return _mm256_movemask_epi8(*this) == 0; + } + simdutf_really_inline bool bits_not_set_anywhere() const { + return _mm256_testz_si256(*this, *this); + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + + template simdutf_really_inline simd8 shr() const { + return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); + } + + simdutf_really_inline uint64_t sum_bytes() const { + const auto tmp = _mm256_sad_epu8(value, _mm256_setzero_si256()); + + return _mm256_extract_epi64(tmp, 0) + _mm256_extract_epi64(tmp, 1) + + _mm256_extract_epi64(tmp, 2) + _mm256_extract_epi64(tmp, 3); + } +}; +simdutf_really_inline simd8::operator simd8() const { + return this->value; +} + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, + "Haswell kernel should use two registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + + return simd8x64( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), + (simd8(__m256i(this->chunks[1])) >= mask)) + .to_bitmask(); + } +}; // struct simd8x64 + +/* begin file src/simdutf/haswell/simd16-inl.h */ +#ifdef __GNUC__ + #if __GNUC__ < 8 + #define _mm256_set_m128i(xmm1, xmm2) \ + _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \ + _mm256_castsi128_si256(xmm2), 2) + #define _mm256_setr_m128i(xmm2, xmm1) \ + _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \ + _mm256_castsi128_si256(xmm2), 2) + #endif +#endif + +template struct simd16; + +template > +struct base16 : base> { + using bitmask_type = uint32_t; + + simdutf_really_inline base16() : base>() {} + simdutf_really_inline base16(const __m256i _value) + : base>(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(_mm256_loadu_si256(reinterpret_cast(ptr))) {} + + friend simdutf_always_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return _mm256_cmpeq_epi16(lhs, rhs); + } + + /// the size of vector in bytes + static const int SIZE = sizeof(base>::value); + + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return _mm256_set1_epi16(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + + simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} + + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + + simdutf_really_inline bitmask_type to_bitmask() const { + return _mm256_movemask_epi8(*this); + } + + simdutf_really_inline simd16 operator~() const { return *this ^ true; } +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return _mm256_set1_epi16(_value); + } + + static simdutf_really_inline simd16 zero() { + return _mm256_setzero_si256(); + } + + static simdutf_really_inline simd16 load(const T values[8]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + + simdutf_really_inline base16_numeric() : base16() {} + + simdutf_really_inline base16_numeric(const __m256i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return _mm256_add_epi16(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return _mm256_max_epu16(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return _mm256_min_epu16(*this, other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return other.max_val(*this) == other; + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return other.min_val(*this) == other; + } + + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + + simdutf_really_inline simd16 any_bits_set() const { + return ~this->bits_not_set(); + } + + template simdutf_really_inline simd16 shr() const { + return simd16(_mm256_srli_epi16(*this, N)); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + return _mm256_shuffle_epi8(*this, swap); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + // Note: the AVX2 variant of pack operates on 128-bit lanes, thus + // we have to shuffle lanes in order to produce bytes in the + // correct order. + + // get the 0th lanes + const __m128i lo_0 = _mm256_extracti128_si256(v0, 0); + const __m128i lo_1 = _mm256_extracti128_si256(v1, 0); + + // get the 1st lanes + const __m128i hi_0 = _mm256_extracti128_si256(v0, 1); + const __m128i hi_1 = _mm256_extracti128_si256(v1, 1); + + // build new vectors (shuffle lanes) + const __m256i t0 = _mm256_set_m128i(lo_1, lo_0); + const __m256i t1 = _mm256_set_m128i(hi_1, hi_0); + + // pack code units in linear order from v0 and v1 + return _mm256_packus_epi16(t0, t1); + } + + simdutf_really_inline uint64_t sum() const { + const auto lo_u16 = _mm256_and_si256(value, _mm256_set1_epi32(0x0000ffff)); + const auto hi_u16 = _mm256_srli_epi32(value, 16); + const auto sum_u32 = _mm256_add_epi32(lo_u16, hi_u16); + + const auto lo_u32 = + _mm256_and_si256(sum_u32, _mm256_set1_epi64x(0xffffffff)); + const auto hi_u32 = _mm256_srli_epi64(sum_u32, 32); + const auto sum_u64 = _mm256_add_epi64(lo_u32, hi_u32); + + return uint64_t(_mm256_extract_epi64(sum_u64, 0)) + + uint64_t(_mm256_extract_epi64(sum_u64, 1)) + + uint64_t(_mm256_extract_epi64(sum_u64, 2)) + + uint64_t(_mm256_extract_epi64(sum_u64, 3)); + } +}; + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 2, + "Haswell kernel should use two registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline simd16x32(const simd16 chunk0, + const simd16 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdutf_really_inline simd16 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16)); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + } + simdutf_really_inline uint64_t gt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] > mask, this->chunks[1] > mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(static_cast(low - 1)); + const simd16 mask_high = simd16::splat(static_cast(high + 1)); + return simd16x32( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low)) + .to_bitmask(); + } +}; // struct simd16x32 + +simd16 min(const simd16 a, simd16 b) { + return _mm256_min_epu16(a.value, b.value); +} +/* end file src/simdutf/haswell/simd16-inl.h */ +/* begin file src/simdutf/haswell/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + static const size_t SIZE = sizeof(__m256i); + static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m256i value; + + simdutf_really_inline simd32(const __m256i v) : value(v) {} + + template + simdutf_really_inline simd32(const Pointer *ptr) + : value(_mm256_loadu_si256(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { + const __m256i mask = _mm256_set1_epi64x(0xffffffff); + const __m256i t0 = _mm256_and_si256(value, mask); + const __m256i t1 = _mm256_srli_epi64(value, 32); + const __m256i t2 = _mm256_add_epi64(t0, t1); + + return uint64_t(_mm256_extract_epi64(t2, 0)) + + uint64_t(_mm256_extract_epi64(t2, 1)) + + uint64_t(_mm256_extract_epi64(t2, 2)) + + uint64_t(_mm256_extract_epi64(t2, 3)); + } + + simdutf_really_inline simd32 swap_bytes() const { + const __m256i shuffle = + _mm256_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12, + 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12); + + return _mm256_shuffle_epi8(value, shuffle); + } + + // operators + simdutf_really_inline simd32 &operator+=(const simd32 other) { + value = _mm256_add_epi32(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd32 zero() { + return _mm256_setzero_si256(); + } + + simdutf_really_inline static simd32 splat(uint32_t v) { + return _mm256_set1_epi32(v); + } +}; + +//---------------------------------------------------------------------- + +template <> struct simd32 { + // static const size_t SIZE = sizeof(__m128i); + // static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m256i value; + + simdutf_really_inline simd32(const __m256i v) : value(v) {} + + simdutf_really_inline bool any() const { + return _mm256_movemask_epi8(value) != 0; + } +}; + +//---------------------------------------------------------------------- + +template +simdutf_really_inline simd32 operator|(const simd32 a, + const simd32 b) { + return _mm256_or_si256(a.value, b.value); +} + +simdutf_really_inline simd32 min(const simd32 b, + const simd32 a) { + return _mm256_min_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 max(const simd32 a, + const simd32 b) { + return _mm256_max_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 operator&(const simd32 b, + const simd32 a) { + return _mm256_and_si256(a.value, b.value); +} + +simdutf_really_inline simd32 operator+(const simd32 a, + const simd32 b) { + return _mm256_add_epi32(a.value, b.value); +} + +simdutf_really_inline simd32 operator==(const simd32 a, + const simd32 b) { + return _mm256_cmpeq_epi32(a.value, b.value); +} + +simdutf_really_inline simd32 operator>=(const simd32 a, + const simd32 b) { + return _mm256_cmpeq_epi32(_mm256_max_epu32(a.value, b.value), a.value); +} + +simdutf_really_inline simd32 operator!(const simd32 v) { + return _mm256_xor_si256(v.value, _mm256_set1_epi8(-1)); +} + +simdutf_really_inline simd32 operator>(const simd32 a, + const simd32 b) { + return !(b >= a); +} +/* end file src/simdutf/haswell/simd32-inl.h */ +/* begin file src/simdutf/haswell/simd64-inl.h */ +template struct simd64; + +template <> struct simd64 { + // static const size_t SIZE = sizeof(__m256i); + // static const size_t ELEMENTS = SIZE / sizeof(uint64_t); + + __m256i value; + + simdutf_really_inline simd64(const __m256i v) : value(v) {} + + template + simdutf_really_inline simd64(const Pointer *ptr) + : value(_mm256_loadu_si256(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { + return _mm256_extract_epi64(value, 0) + _mm256_extract_epi64(value, 1) + + _mm256_extract_epi64(value, 2) + _mm256_extract_epi64(value, 3); + } + + // operators + simdutf_really_inline simd64 &operator+=(const simd64 other) { + value = _mm256_add_epi64(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd64 zero() { + return _mm256_setzero_si256(); + } + + simdutf_really_inline static simd64 splat(uint64_t v) { + return _mm256_set1_epi64x(v); + } +}; +/* end file src/simdutf/haswell/simd64-inl.h */ + +simdutf_really_inline simd64 sum_8bytes(const simd8 v) { + return _mm256_sad_epu8(v.value, simd8::zero()); +} + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdutf + +#endif // SIMDUTF_HASWELL_SIMD_H +/* end file src/simdutf/haswell/simd.h */ + +/* begin file src/simdutf/haswell/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +#undef SIMDUTF_SIMD_HAS_BYTEMASK + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/haswell/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_HASWELL +#endif // SIMDUTF_HASWELL_COMMON_H +/* end file src/simdutf/haswell.h */ +/* begin file src/simdutf/westmere.h */ +#ifndef SIMDUTF_WESTMERE_H +#define SIMDUTF_WESTMERE_H + +#ifdef SIMDUTF_FALLBACK_H + #error "westmere.h must be included before fallback.h" +#endif + + +// Default Westmere to on if this is x86-64, unless we'll always select Haswell. +#ifndef SIMDUTF_IMPLEMENTATION_WESTMERE + // + // You do not want to set it to (SIMDUTF_IS_X86_64 && + // !SIMDUTF_REQUIRES_HASWELL) because you want to rely on runtime dispatch! + // + #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || SIMDUTF_CAN_ALWAYS_RUN_HASWELL + #define SIMDUTF_IMPLEMENTATION_WESTMERE 0 + #else + #define SIMDUTF_IMPLEMENTATION_WESTMERE (SIMDUTF_IS_X86_64) + #endif + +#endif + +#if (SIMDUTF_IMPLEMENTATION_WESTMERE && SIMDUTF_IS_X86_64 && __SSE4_2__) + #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE 0 +#endif + +#if SIMDUTF_IMPLEMENTATION_WESTMERE + + #define SIMDUTF_TARGET_WESTMERE SIMDUTF_TARGET_REGION("sse4.2,popcnt") + +namespace simdutf { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere {} // namespace westmere +} // namespace simdutf + + // + // These two need to be included outside SIMDUTF_TARGET_REGION + // +/* begin file src/simdutf/westmere/implementation.h */ +#ifndef SIMDUTF_WESTMERE_IMPLEMENTATION_H +#define SIMDUTF_WESTMERE_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use +// SIMDUTF_TARGET_REGION +namespace simdutf { +namespace westmere { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("westmere", "Intel/AMD SSE4.2", + internal::instruction_set::SSE42) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; + const char *find(const char *start, const char *end, + char character) const noexcept override; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept override; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace westmere +} // namespace simdutf + +#endif // SIMDUTF_WESTMERE_IMPLEMENTATION_H +/* end file src/simdutf/westmere/implementation.h */ +/* begin file src/simdutf/westmere/intrinsics.h */ +#ifndef SIMDUTF_WESTMERE_INTRINSICS_H +#define SIMDUTF_WESTMERE_INTRINSICS_H + +#ifdef SIMDUTF_VISUAL_STUDIO + // under clang within visual studio, this will include + #include // visual studio or clang +#else + + #if SIMDUTF_GCC11ORMORE +// We should not get warnings while including yet we do +// under some versions of GCC. +// If the x86intrin.h header has uninitialized values that are problematic, +// it is a GCC issue, we want to ignore these warnings. +SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) + #endif + + #include // elsewhere + + #if SIMDUTF_GCC11ORMORE +// cancels the suppression of the -Wuninitialized +SIMDUTF_POP_DISABLE_WARNINGS + #endif + +#endif // SIMDUTF_VISUAL_STUDIO + +#ifdef SIMDUTF_CLANG_VISUAL_STUDIO + /** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ + #include // for _mm_alignr_epi8 +#endif + +#endif // SIMDUTF_WESTMERE_INTRINSICS_H +/* end file src/simdutf/westmere/intrinsics.h */ + + // + // The rest need to be inside the region + // +/* begin file src/simdutf/westmere/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "westmere" +// #define SIMDUTF_IMPLEMENTATION westmere +#define SIMDUTF_SIMD_HAS_BYTEMASK 1 + +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else +SIMDUTF_TARGET_WESTMERE +#endif +/* end file src/simdutf/westmere/begin.h */ + + // Declarations +/* begin file src/simdutf/westmere/bitmanipulation.h */ +#ifndef SIMDUTF_WESTMERE_BITMANIPULATION_H +#define SIMDUTF_WESTMERE_BITMANIPULATION_H + +namespace simdutf { +namespace westmere { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + #if SIMDUTF_REGULAR_VISUAL_STUDIO + unsigned long ret; + _BitScanForward64(&ret, input_num); + return (int)ret; + #else // SIMDUTF_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); + #endif // SIMDUTF_REGULAR_VISUAL_STUDIO +} +#endif + +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +} // unnamed namespace +} // namespace westmere +} // namespace simdutf + +#endif // SIMDUTF_WESTMERE_BITMANIPULATION_H +/* end file src/simdutf/westmere/bitmanipulation.h */ +/* begin file src/simdutf/westmere/simd.h */ +#ifndef SIMDUTF_WESTMERE_SIMD_H +#define SIMDUTF_WESTMERE_SIMD_H + +namespace simdutf { +namespace westmere { +namespace { +namespace simd { + +template struct base { + __m128i value; + + // Zero constructor + simdutf_really_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdutf_really_inline base(const __m128i _value) : value(_value) {} + // Conversion to SIMD register + simdutf_really_inline operator const __m128i &() const { return this->value; } + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + __m128i first = _mm_cvtepu8_epi16(*this); + __m128i second = _mm_cvtepu8_epi16(_mm_srli_si128(*this, 8)); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + first = _mm_shuffle_epi8(first, swap); + second = _mm_shuffle_epi8(second, swap); + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(p), first); + _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), second); + } + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + _mm_storeu_si128(reinterpret_cast<__m128i *>(p), _mm_cvtepu8_epi32(*this)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 4), + _mm_cvtepu8_epi32(_mm_srli_si128(*this, 4))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), + _mm_cvtepu8_epi32(_mm_srli_si128(*this, 8))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 12), + _mm_cvtepu8_epi32(_mm_srli_si128(*this, 12))); + } + // Bit operations + simdutf_really_inline Child operator|(const Child other) const { + return _mm_or_si128(*this, other); + } + simdutf_really_inline Child operator&(const Child other) const { + return _mm_and_si128(*this, other); + } + simdutf_really_inline Child operator^(const Child other) const { + return _mm_xor_si128(*this, other); + } + simdutf_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } +}; + +// Forward-declared so they can be used by splat and friends. +template struct simd8; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdutf_really_inline T first() const { return _mm_extract_epi8(*this, 0); } + simdutf_really_inline T last() const { return _mm_extract_epi8(*this, 15); } + simdutf_really_inline base8() : base>() {} + simdutf_really_inline base8(const __m128i _value) : base>(_value) {} + + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return _mm_cmpeq_epi8(lhs, rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdutf_really_inline simd8 splat(bool _value) { + return _mm_set1_epi8(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8() : base8() {} + simdutf_really_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + + simdutf_really_inline int to_bitmask() const { + return _mm_movemask_epi8(*this); + } + simdutf_really_inline simd8 operator~() const { return *this ^ true; } +}; + +template struct base8_numeric : base8 { + static simdutf_really_inline simd8 splat(T _value) { + return _mm_set1_epi8(_value); + } + static simdutf_really_inline simd8 zero() { return _mm_setzero_si128(); } + static simdutf_really_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[16]) const { + return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 operator-(const simd8 other) const { + return _mm_sub_epi8(*this, other); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization + simdutf_really_inline operator simd8() const; + simdutf_really_inline bool is_ascii() const { + return _mm_movemask_epi8(*this) == 0; + } + + // Order-sensitive comparisons + simdutf_really_inline simd8 operator>(const simd8 other) const { + return _mm_cmpgt_epi8(*this, other); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return _mm_cmpgt_epi8(other, *this); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8(_mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15)) {} + + // Saturated math + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return _mm_subs_epu8(*this, other); + } + + // Order-specific operations + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return _mm_min_epu8(*this, other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + + // Bit-specific operations + simdutf_really_inline simd8 bits_not_set() const { + return *this == uint8_t(0); + } + simdutf_really_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline bool is_ascii() const { + return _mm_movemask_epi8(*this) == 0; + } + + simdutf_really_inline bool bits_not_set_anywhere() const { + return _mm_testz_si128(*this, *this); + } + simdutf_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + template simdutf_really_inline simd8 shr() const { + return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); + } + template simdutf_really_inline simd8 shl() const { + return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); + } + + simdutf_really_inline uint64_t sum_bytes() const { + const auto tmp = _mm_sad_epu8(value, _mm_setzero_si128()); + return _mm_extract_epi64(tmp, 0) + _mm_extract_epi64(tmp, 1); + } +}; + +simdutf_really_inline simd8::operator simd8() const { + return this->value; +} + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "Westmere kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(__m128i(this->chunks[0])) >= mask, + simd8(__m128i(this->chunks[1])) >= mask, + simd8(__m128i(this->chunks[2])) >= mask, + simd8(__m128i(this->chunks[3])) >= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +/* begin file src/simdutf/westmere/simd16-inl.h */ +template struct simd16; + +template > +struct base16 : base> { + simdutf_really_inline base16() : base>() {} + + simdutf_really_inline base16(const __m128i _value) + : base>(_value) {} + + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return _mm_cmpeq_epi16(lhs, rhs); + } + + /// the size of vector in bytes + static const int SIZE = sizeof(base>::value); + + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return _mm_set1_epi16(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} + + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + + simdutf_really_inline int to_bitmask() const { + return _mm_movemask_epi8(*this); + } + + simdutf_really_inline simd16 operator~() const { return *this ^ true; } +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return _mm_set1_epi16(_value); + } + + static simdutf_really_inline simd16 zero() { return _mm_setzero_si128(); } + + static simdutf_really_inline simd16 load(const T values[8]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + + simdutf_really_inline base16_numeric() : base16() {} + + simdutf_really_inline base16_numeric(const __m128i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return _mm_add_epi16(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + + // Array constructor + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return _mm_max_epu16(*this, other); + } + + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return _mm_min_epu16(*this, other); + } + + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return other.max_val(*this) == other; + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return other.min_val(*this) == other; + } + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + + simdutf_really_inline simd16 any_bits_set() const { + return ~this->bits_not_set(); + } + + template simdutf_really_inline simd16 shr() const { + return simd16(_mm_srli_epi16(*this, N)); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + return _mm_shuffle_epi8(*this, swap); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return _mm_packus_epi16(v0, v1); + } + + simdutf_really_inline uint64_t sum() const { + const auto lo_u16 = _mm_and_si128(value, _mm_set1_epi32(0x0000ffff)); + const auto hi_u16 = _mm_srli_epi32(value, 16); + const auto sum_u32 = _mm_add_epi32(lo_u16, hi_u16); + + const auto lo_u32 = _mm_and_si128(sum_u32, _mm_set1_epi64x(0xffffffff)); + const auto hi_u32 = _mm_srli_epi64(sum_u32, 32); + const auto sum_u64 = _mm_add_epi64(lo_u32, hi_u32); + + return uint64_t(_mm_extract_epi64(sum_u64, 0)) + + uint64_t(_mm_extract_epi64(sum_u64, 1)); + } +}; + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 4, + "Westmere kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); + } + + simdutf_really_inline simd16 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); + this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); + this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(static_cast(low - 1)); + const simd16 mask_high = simd16::splat(static_cast(high + 1)); + return simd16x32( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), + (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), + (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) + .to_bitmask(); + } +}; // struct simd16x32 + +simd16 min(const simd16 a, simd16 b) { + return _mm_min_epu16(a.value, b.value); +} +/* end file src/simdutf/westmere/simd16-inl.h */ +/* begin file src/simdutf/westmere/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + static const size_t SIZE = sizeof(__m128i); + static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m128i value; + + simdutf_really_inline simd32(const __m128i v) : value(v) {} + + template + simdutf_really_inline simd32(const Pointer *ptr) + : value(_mm_loadu_si128(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { + return uint64_t(_mm_extract_epi32(value, 0)) + + uint64_t(_mm_extract_epi32(value, 1)) + + uint64_t(_mm_extract_epi32(value, 2)) + + uint64_t(_mm_extract_epi32(value, 3)); + } + + simdutf_really_inline simd32 swap_bytes() const { + const __m128i shuffle = + _mm_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12); + + return _mm_shuffle_epi8(value, shuffle); + } + + template simdutf_really_inline simd32 shr() const { + return _mm_srli_epi32(value, N); + } + + template simdutf_really_inline simd32 shl() const { + return _mm_slli_epi32(value, N); + } + void dump() const { +#ifdef SIMDUTF_LOGGING + printf("[%08x, %08x, %08x, %08x]\n", uint32_t(_mm_extract_epi32(value, 0)), + uint32_t(_mm_extract_epi32(value, 1)), + uint32_t(_mm_extract_epi32(value, 2)), + uint32_t(_mm_extract_epi32(value, 3))); +#endif // SIMDUTF_LOGGING + } + + // operators + simdutf_really_inline simd32 &operator+=(const simd32 other) { + value = _mm_add_epi32(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd32 zero() { + return _mm_setzero_si128(); + } + + simdutf_really_inline static simd32 splat(uint32_t v) { + return _mm_set1_epi32(v); + } +}; + +//---------------------------------------------------------------------- + +template <> struct simd32 { + // static const size_t SIZE = sizeof(__m128i); + // static const size_t ELEMENTS = SIZE / sizeof(uint32_t); + + __m128i value; + + simdutf_really_inline simd32(const __m128i v) : value(v) {} + + simdutf_really_inline bool any() const { + return _mm_movemask_epi8(value) != 0; + } + + simdutf_really_inline uint8_t to_4bit_bitmask() const { + return uint8_t(_mm_movemask_ps(_mm_castsi128_ps(value))); + } +}; + +//---------------------------------------------------------------------- + +template +simdutf_really_inline simd32 operator|(const simd32 a, + const simd32 b) { + return _mm_or_si128(a.value, b.value); +} + +simdutf_really_inline simd32 min(const simd32 a, + const simd32 b) { + return _mm_min_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 max(const simd32 a, + const simd32 b) { + return _mm_max_epu32(a.value, b.value); +} + +simdutf_really_inline simd32 operator==(const simd32 a, + uint32_t b) { + return _mm_cmpeq_epi32(a.value, _mm_set1_epi32(b)); +} + +simdutf_really_inline simd32 operator&(const simd32 a, + const simd32 b) { + return _mm_and_si128(a.value, b.value); +} + +simdutf_really_inline simd32 operator&(const simd32 a, + uint32_t b) { + return _mm_and_si128(a.value, _mm_set1_epi32(b)); +} + +simdutf_really_inline simd32 operator|(const simd32 a, + uint32_t b) { + return _mm_or_si128(a.value, _mm_set1_epi32(b)); +} + +simdutf_really_inline simd32 operator+(const simd32 a, + const simd32 b) { + return _mm_add_epi32(a.value, b.value); +} + +simdutf_really_inline simd32 operator-(const simd32 a, + uint32_t b) { + return _mm_sub_epi32(a.value, _mm_set1_epi32(b)); +} + +simdutf_really_inline simd32 operator==(const simd32 a, + const simd32 b) { + return _mm_cmpeq_epi32(a.value, b.value); +} + +simdutf_really_inline simd32 operator>=(const simd32 a, + const simd32 b) { + return _mm_cmpeq_epi32(_mm_max_epu32(a.value, b.value), a.value); +} + +simdutf_really_inline simd32 operator!(const simd32 v) { + return _mm_xor_si128(v.value, _mm_set1_epi8(-1)); +} + +simdutf_really_inline simd32 operator>(const simd32 a, + const simd32 b) { + return !(b >= a); +} + +simdutf_really_inline simd32 select(const simd32 cond, + const simd32 v_true, + const simd32 v_false) { + return _mm_blendv_epi8(v_false.value, v_true.value, cond.value); +} +/* end file src/simdutf/westmere/simd32-inl.h */ +/* begin file src/simdutf/westmere/simd64-inl.h */ +template struct simd64; + +template <> struct simd64 { + // static const size_t SIZE = sizeof(__m128i); + // static const size_t ELEMENTS = SIZE / sizeof(uint64_t); + + __m128i value; + + simdutf_really_inline simd64(const __m128i v) : value(v) {} + + template + simdutf_really_inline simd64(const Pointer *ptr) + : value(_mm_loadu_si128(reinterpret_cast(ptr))) {} + + simdutf_really_inline uint64_t sum() const { + return _mm_extract_epi64(value, 0) + _mm_extract_epi64(value, 1); + } + + // operators + simdutf_really_inline simd64 &operator+=(const simd64 other) { + value = _mm_add_epi64(value, other.value); + return *this; + } + + // static members + simdutf_really_inline static simd64 zero() { + return _mm_setzero_si128(); + } + + simdutf_really_inline static simd64 splat(uint64_t v) { + return _mm_set1_epi64x(v); + } +}; +/* end file src/simdutf/westmere/simd64-inl.h */ + +simdutf_really_inline simd64 sum_8bytes(const simd8 v) { + return _mm_sad_epu8(v.value, simd8::zero()); +} + +simdutf_really_inline simd8 as_vector_u8(const simd32 v) { + return simd8(v.value); +} + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdutf + +#endif // SIMDUTF_WESTMERE_SIMD_INPUT_H +/* end file src/simdutf/westmere/simd.h */ + +/* begin file src/simdutf/westmere/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +#undef SIMDUTF_SIMD_HAS_BYTEMASK +/* end file src/simdutf/westmere/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_WESTMERE +#endif // SIMDUTF_WESTMERE_COMMON_H +/* end file src/simdutf/westmere.h */ +/* begin file src/simdutf/ppc64.h */ +#ifndef SIMDUTF_PPC64_H +#define SIMDUTF_PPC64_H + +#ifdef SIMDUTF_FALLBACK_H + #error "ppc64.h must be included before fallback.h" +#endif + + +#ifndef SIMDUTF_IMPLEMENTATION_PPC64 + #define SIMDUTF_IMPLEMENTATION_PPC64 (SIMDUTF_IS_PPC64) +#endif +#define SIMDUTF_CAN_ALWAYS_RUN_PPC64 \ + SIMDUTF_IMPLEMENTATION_PPC64 &&SIMDUTF_IS_PPC64 + + +#if SIMDUTF_IMPLEMENTATION_PPC64 + +namespace simdutf { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 {} // namespace ppc64 +} // namespace simdutf + +/* begin file src/simdutf/ppc64/implementation.h */ +#ifndef SIMDUTF_PPC64_IMPLEMENTATION_H +#define SIMDUTF_PPC64_IMPLEMENTATION_H + + +namespace simdutf { +namespace ppc64 { + +namespace { +using namespace simdutf; + +template simdutf_really_inline size_t align_down(size_t size) { + return N * (size / N); +} +} // namespace + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept; + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; + + size_t binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + base64_options options) const noexcept; + const char *find(const char *start, const char *end, + char character) const noexcept; + + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 + +#ifdef SIMDUTF_INTERNAL_TESTS + virtual std::vector internal_tests() const override; +#endif +#if SIMDUTF_FEATURE_UTF16 + + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +}; + +} // namespace ppc64 +} // namespace simdutf + +#endif // SIMDUTF_PPC64_IMPLEMENTATION_H +/* end file src/simdutf/ppc64/implementation.h */ + +/* begin file src/simdutf/ppc64/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "ppc64" +// #define SIMDUTF_IMPLEMENTATION ppc64 +/* end file src/simdutf/ppc64/begin.h */ + + // Declarations +/* begin file src/simdutf/ppc64/intrinsics.h */ +#ifndef SIMDUTF_PPC64_INTRINSICS_H +#define SIMDUTF_PPC64_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool + #undef bool +#endif + +#ifdef vector + #undef vector +#endif + +#endif // SIMDUTF_PPC64_INTRINSICS_H +/* end file src/simdutf/ppc64/intrinsics.h */ +/* begin file src/simdutf/ppc64/bitmanipulation.h */ +#ifndef SIMDUTF_PPC64_BITMANIPULATION_H +#define SIMDUTF_PPC64_BITMANIPULATION_H + +namespace simdutf { +namespace ppc64 { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdutf_really_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} +#endif + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf + +#endif // SIMDUTF_PPC64_BITMANIPULATION_H +/* end file src/simdutf/ppc64/bitmanipulation.h */ +/* begin file src/simdutf/ppc64/simd.h */ +#ifndef SIMDUTF_PPC64_SIMD_H +#define SIMDUTF_PPC64_SIMD_H + +#include + +namespace simdutf { +namespace ppc64 { +namespace { +namespace simd { + +using vec_bool_t = __vector __bool char; +using vec_bool16_t = __vector __bool short; +using vec_bool32_t = __vector __bool int; +using vec_u8_t = __vector unsigned char; +using vec_i8_t = __vector signed char; +using vec_u16_t = __vector unsigned short; +using vec_i16_t = __vector signed short; +using vec_u32_t = __vector unsigned int; +using vec_i32_t = __vector signed int; +using vec_u64_t = __vector unsigned long long; +using vec_i64_t = __vector signed long long; + +// clang-format off +template struct vector_u8_type_for_element_aux { + using type = typename std::conditional::value, vec_bool_t, + typename std::conditional::value, vec_u8_t, + typename std::conditional::value, vec_i8_t, void>::type>::type>::type; + + static_assert(not std::is_same::value, + "accepted element types are 8 bit integers or bool"); +}; + +template struct vector_u16_type_for_element_aux { + using type = typename std::conditional::value, vec_bool16_t, + typename std::conditional::value, vec_u16_t, + typename std::conditional::value, vec_i16_t, void>::type>::type>::type; + + static_assert(not std::is_same::value, + "accepted element types are 16 bit integers or bool"); +}; + +template struct vector_u32_type_for_element_aux { + using type = typename std::conditional::value, vec_bool32_t, + typename std::conditional::value, vec_u32_t, + typename std::conditional::value, vec_i32_t, void>::type>::type>::type; + + static_assert(not std::is_same::value, + "accepted element types are 32 bit integers or bool"); +}; +// clang-format on + +template +using vector_u8_type_for_element = + typename vector_u8_type_for_element_aux::type; + +template +using vector_u16_type_for_element = + typename vector_u16_type_for_element_aux::type; + +template +using vector_u32_type_for_element = + typename vector_u32_type_for_element_aux::type; + +template uint16_t move_mask_u8(T vec) { + const vec_u8_t perm_mask = {15 * 8, 14 * 8, 13 * 8, 12 * 8, 11 * 8, 10 * 8, + 9 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, + 3 * 8, 2 * 8, 1 * 8, 0 * 8}; + + const auto result = (vec_u64_t)vec_vbpermq((vec_u8_t)vec, perm_mask); +#if SIMDUTF_IS_BIG_ENDIAN + return static_cast(result[0]); +#else + return static_cast(result[1]); +#endif +} + +/* begin file src/simdutf/ppc64/simd8-inl.h */ +// file included directly + +template struct base8 { + using vector_type = vector_u8_type_for_element; + vector_type value; + static const int SIZE = sizeof(vector_type); + static const int ELEMENTS = sizeof(vector_type) / sizeof(T); + + // Zero constructor + simdutf_really_inline base8() : value{vec_splats(T(0))} {} + + // Conversion from SIMD register + simdutf_really_inline base8(const vector_type _value) : value{_value} {} + + // Splat scalar + simdutf_really_inline base8(T v) : value{vec_splats(v)} {} + + // Conversion to SIMD register + simdutf_really_inline operator const vector_type &() const { + return this->value; + } + + template simdutf_really_inline void store(U *ptr) const { + vec_xst(value, 0, reinterpret_cast(ptr)); + } + + template void operator|=(const SIMD8 other) { + this->value = vec_or(this->value, other.value); + } + + template vector_type prev_aux(vector_type prev_chunk) const { + vector_type chunk = this->value; +#if !SIMDUTF_IS_BIG_ENDIAN + chunk = (vector_type)vec_reve(this->value); + prev_chunk = (vector_type)vec_reve((vector_type)prev_chunk); +#endif + chunk = (vector_type)vec_sld((vector_type)prev_chunk, (vector_type)chunk, + 16 - N); +#if !SIMDUTF_IS_BIG_ENDIAN + chunk = (vector_type)vec_reve((vector_type)chunk); +#endif + return chunk; + } + + simdutf_really_inline bool is_ascii() const { + return move_mask_u8(this->value) == 0; + } + + simdutf_really_inline uint16_t to_bitmask() const { + return move_mask_u8(value); + } + + template + simdutf_really_inline void store_bytes_as_utf16(char16_t *p) const { + const vector_type zero = vec_splats(T(0)); + + if (big_endian) { + const vec_u8_t perm_lo = {16, 0, 16, 1, 16, 2, 16, 3, + 16, 4, 16, 5, 16, 6, 16, 7}; + const vec_u8_t perm_hi = {16, 8, 16, 9, 16, 10, 16, 11, + 16, 12, 16, 13, 16, 14, 16, 15}; + + const vector_type v0 = vec_perm(value, zero, perm_lo); + const vector_type v1 = vec_perm(value, zero, perm_hi); + +#if defined(__clang__) + vec_xst(v0, 0, reinterpret_cast(p)); + vec_xst(v1, 16, reinterpret_cast(p)); +#else + vec_xst(v0, 0, reinterpret_cast(p)); + vec_xst(v1, 16, reinterpret_cast(p)); +#endif // defined(__clang__) + } else { + const vec_u8_t perm_lo = {0, 16, 1, 16, 2, 16, 3, 16, + 4, 16, 5, 16, 6, 16, 7, 16}; + const vec_u8_t perm_hi = {8, 16, 9, 16, 10, 16, 11, 16, + 12, 16, 13, 16, 14, 16, 15, 16}; + + const vector_type v0 = vec_perm(value, zero, perm_lo); + const vector_type v1 = vec_perm(value, zero, perm_hi); + +#if defined(__clang__) + vec_xst(v0, 0, reinterpret_cast(p)); + vec_xst(v1, 16, reinterpret_cast(p)); +#else + vec_xst(v0, 0, reinterpret_cast(p)); + vec_xst(v1, 16, reinterpret_cast(p)); +#endif // defined(__clang__) + } + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + store_bytes_as_utf16(p); + } + + simdutf_really_inline void store_bytes_as_utf32(char32_t *p) const { + const vector_type zero = vec_splats(T(0)); + +#if SIMDUTF_IS_BIG_ENDIAN + const vec_u8_t perm0 = {16, 16, 16, 0, 16, 16, 16, 1, + 16, 16, 16, 2, 16, 16, 16, 3}; + + const vec_u8_t perm1 = {16, 16, 16, 4, 16, 16, 16, 5, + 16, 16, 16, 6, 16, 16, 16, 7}; + + const vec_u8_t perm2 = {16, 16, 16, 8, 16, 16, 16, 9, + 16, 16, 16, 10, 16, 16, 16, 11}; + + const vec_u8_t perm3 = {16, 16, 16, 12, 16, 16, 16, 13, + 16, 16, 16, 14, 16, 16, 16, 15}; +#else + const vec_u8_t perm0 = {0, 16, 16, 16, 1, 16, 16, 16, + 2, 16, 16, 16, 3, 16, 16, 16}; + + const vec_u8_t perm1 = {4, 16, 16, 16, 5, 16, 16, 16, + 6, 16, 16, 16, 7, 16, 16, 16}; + + const vec_u8_t perm2 = {8, 16, 16, 16, 9, 16, 16, 16, + 10, 16, 16, 16, 11, 16, 16, 16}; + + const vec_u8_t perm3 = {12, 16, 16, 16, 13, 16, 16, 16, + 14, 16, 16, 16, 15, 16, 16, 16}; +#endif // SIMDUTF_IS_BIG_ENDIAN + + const vector_type v0 = vec_perm(value, zero, perm0); + const vector_type v1 = vec_perm(value, zero, perm1); + const vector_type v2 = vec_perm(value, zero, perm2); + const vector_type v3 = vec_perm(value, zero, perm3); + + constexpr size_t n = base8::SIZE; + +#if defined(__clang__) + vec_xst(v0, 0 * n, reinterpret_cast(p)); + vec_xst(v1, 1 * n, reinterpret_cast(p)); + vec_xst(v2, 2 * n, reinterpret_cast(p)); + vec_xst(v3, 3 * n, reinterpret_cast(p)); +#else + vec_xst(v0, 0 * n, reinterpret_cast(p)); + vec_xst(v1, 1 * n, reinterpret_cast(p)); + vec_xst(v2, 2 * n, reinterpret_cast(p)); + vec_xst(v3, 3 * n, reinterpret_cast(p)); +#endif // defined(__clang__) + } + + simdutf_really_inline void store_words_as_utf32(char32_t *p) const { + const vector_type zero = vec_splats(T(0)); + +#if SIMDUTF_IS_BIG_ENDIAN + const vec_u8_t perm0 = {16, 16, 0, 1, 16, 16, 2, 3, + 16, 16, 4, 5, 16, 16, 6, 7}; + const vec_u8_t perm1 = {16, 16, 8, 9, 16, 16, 10, 11, + 16, 16, 12, 13, 16, 16, 14, 15}; +#else + const vec_u8_t perm0 = {0, 1, 16, 16, 2, 3, 16, 16, + 4, 5, 16, 16, 6, 7, 16, 16}; + const vec_u8_t perm1 = {8, 9, 16, 16, 10, 11, 16, 16, + 12, 13, 16, 16, 14, 15, 16, 16}; +#endif // SIMDUTF_IS_BIG_ENDIAN + + const vector_type v0 = vec_perm(value, zero, perm0); + const vector_type v1 = vec_perm(value, zero, perm1); + + constexpr size_t n = base8::SIZE; + +#if defined(__clang__) + vec_xst(v0, 0 * n, reinterpret_cast(p)); + vec_xst(v1, 1 * n, reinterpret_cast(p)); +#else + vec_xst(v0, 0 * n, reinterpret_cast(p)); + vec_xst(v1, 1 * n, reinterpret_cast(p)); +#endif // defined(__clang__) + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + store_bytes_as_utf32(p); + } +}; + +// Forward declaration +template struct simd8; + +template +simd8 operator==(const simd8 a, const simd8 b); + +template +simd8 operator!=(const simd8 a, const simd8 b); + +template simd8 operator&(const simd8 a, const simd8 b); + +template simd8 operator|(const simd8 a, const simd8 b); + +template simd8 operator^(const simd8 a, const simd8 b); + +template simd8 operator+(const simd8 a, const simd8 b); + +template simd8 operator<(const simd8 a, const simd8 b); + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + using super = base8; + + static simdutf_really_inline simd8 splat(bool _value) { + return (vector_type)vec_splats((unsigned char)(-(!!_value))); + } + + simdutf_really_inline simd8() : super(vector_type()) {} + simdutf_really_inline simd8(const vector_type _value) : super(_value) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + + template + simdutf_really_inline simd8(simd8 other) + : simd8(vector_type(other.value)) {} + + simdutf_really_inline uint16_t to_bitmask() const { + return move_mask_u8(value); + } + + simdutf_really_inline bool any() const { + return !vec_all_eq(this->value, (vector_type)vec_splats(0)); + } + + simdutf_really_inline bool all() const { return to_bitmask() == 0xffff; } + + simdutf_really_inline simd8 operator~() const { + return this->value ^ (vector_type)splat(true); + } +}; + +template struct base8_numeric : base8 { + using super = base8; + using vector_type = typename super::vector_type; + + static simdutf_really_inline simd8 splat(T value) { + return (vector_type)vec_splats(value); + } + + static simdutf_really_inline simd8 zero() { return splat(0); } + + template + static simdutf_really_inline simd8 load(const U *values) { + return vec_xl(0, reinterpret_cast(values)); + } + + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const vector_type _value) + : base8(_value) {} + + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + simdutf_really_inline simd8 &operator-=(const simd8 other) { + this->value = vec_sub(this->value, other.value); + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return (vector_type)vec_perm((vector_type)lookup_table, + (vector_type)lookup_table, this->value); + } + + template + simdutf_really_inline simd8 + lookup_32(const simd8 lookup_table_lo, + const simd8 lookup_table_hi) const { + return (vector_type)vec_perm(lookup_table_lo.value, lookup_table_hi.value, + this->value); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + using Self = simd8; + + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const vector_type _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((vector_type){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + simdutf_really_inline bool is_ascii() const { + return move_mask_u8(this->value) == 0; + } + + template + simdutf_really_inline simd8(simd8 other) + : simd8(vector_type(other.value)) {} + + template + simdutf_really_inline Self prev(const Self prev_chunk) const { + return prev_aux(prev_chunk.value); + } + + // Saturated math + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return (vector_type)vec_subs(this->value, (vector_type)other); + } + + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + + // Bit-specific operations + simdutf_really_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (vector_type)vec_splats(0)); + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + + template simdutf_really_inline simd8 shr() const { + return simd8( + (vector_type)vec_sr(this->value, (vector_type)vec_splat_u8(N))); + } + + template simdutf_really_inline simd8 shl() const { + return simd8( + (vector_type)vec_sl(this->value, (vector_type)vec_splat_u8(N))); + } + void dump() const { +#ifdef SIMDUTF_LOGGING + uint8_t tmp[16]; + store(tmp); + for (int i = 0; i < 16; i++) { + if (i == 0) { + printf("[%02x", tmp[i]); + } else if (i == 15) { + printf(" %02x]", tmp[i]); + } else { + printf(" %02x", tmp[i]); + } + } + putchar('\n'); +#endif // SIMDUTF_LOGGING + } + + void dump_ascii() const { +#ifdef SIMDUTF_LOGGING + uint8_t tmp[16]; + store(tmp); + for (int i = 0; i < 16; i++) { + if (i == 0) { + printf("[%c", tmp[i]); + } else if (i == 15) { + printf("%c]", tmp[i]); + } else { + printf("%c", tmp[i]); + } + } + putchar('\n'); +#endif // SIMDUTF_LOGGING + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const vector_type _value) + : base8_numeric(_value) {} + + template + simdutf_really_inline simd8(simd8 other) + : simd8(vector_type(other.value)) {} + + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + + simdutf_really_inline operator simd8() const; + + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return (vector_type)vec_adds(this->value, other.value); + } + + void dump() const { + int8_t tmp[16]; + store(tmp); + for (int i = 0; i < 16; i++) { + if (i == 0) { + printf("[%02x", tmp[i]); + } else if (i == 15) { + printf("%02x]", tmp[i]); + } else { + printf("%02x", tmp[i]); + } + } + putchar('\n'); + } +}; + +template +simd8 operator==(const simd8 a, const simd8 b) { + return vec_cmpeq(a.value, b.value); +} + +template +simd8 operator!=(const simd8 a, const simd8 b) { + return vec_cmpne(a.value, b.value); +} + +template simd8 operator&(const simd8 a, const simd8 b) { + return vec_and(a.value, b.value); +} + +template simd8 operator&(const simd8 a, U b) { + return vec_and(a.value, vec_splats(T(b))); +} + +template simd8 operator|(const simd8 a, const simd8 b) { + return vec_or(a.value, b.value); +} + +template simd8 operator^(const simd8 a, const simd8 b) { + return vec_xor(a.value, b.value); +} + +template simd8 operator^(const simd8 a, U b) { + return vec_xor(a.value, vec_splats(T(b))); +} + +template simd8 operator+(const simd8 a, const simd8 b) { + return vec_add(a.value, b.value); +} + +template simd8 operator+(const simd8 a, U b) { + return vec_add(a.value, vec_splats(T(b))); +} + +simdutf_really_inline simd8::operator simd8() const { + return (simd8::vector_type)value; +} + +template +simd8 operator<(const simd8 a, const simd8 b) { + return vec_cmplt(a.value, b.value); +} + +template +simd8 operator>(const simd8 a, const simd8 b) { + return vec_cmpgt(a.value, b.value); +} + +template +simd8 operator>=(const simd8 a, const simd8 b) { + return vec_cmpge(a.value, b.value); +} + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static constexpr size_t ELEMENTS = simd8::ELEMENTS; + + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + simd8x64(simd8x64 &&) = default; + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + ELEMENTS * 0); + this->chunks[1].store(ptr + ELEMENTS * 1); + this->chunks[2].store(ptr + ELEMENTS * 2); + this->chunks[3].store(ptr + ELEMENTS * 3); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(this->chunks[0]) >= mask, + simd8(this->chunks[1]) >= mask, + simd8(this->chunks[2]) >= mask, + simd8(this->chunks[3]) >= mask) + .to_bitmask(); + } + + void dump() const { + puts(""); + for (int i = 0; i < 4; i++) { + printf("chunk[%d] = ", i); + this->chunks[i].dump(); + } + } +}; // struct simd8x64 + +simdutf_really_inline simd8 avg(const simd8 a, + const simd8 b) { + return vec_avg(a.value, b.value); +} +/* end file src/simdutf/ppc64/simd8-inl.h */ +/* begin file src/simdutf/ppc64/simd16-inl.h */ +// file included directly + +template struct simd16; + +template struct base16 { + using vector_type = vector_u16_type_for_element; + static const int SIZE = sizeof(vector_type); + static const int ELEMENTS = sizeof(vector_type) / sizeof(T); + + vector_type value; + + // Zero constructor + simdutf_really_inline base16() : value{vector_type()} {} + + // Conversion from SIMD register + simdutf_really_inline base16(const vector_type _value) : value{_value} {} + void dump() const { +#ifdef SIMDUTF_LOGGING + uint16_t tmp[8]; + vec_xst(value, 0, reinterpret_cast(tmp)); + for (int i = 0; i < 8; i++) { + if (i == 0) { + printf("[%04x", tmp[i]); + } else if (i == 8 - 1) { + printf(" %04x]", tmp[i]); + } else { + printf(" %04x", tmp[i]); + } + } + putchar('\n'); +#endif // SIMDUTF_LOGGING + } +}; + +// Forward declaration +template struct simd16; + +template +simd16 operator==(const simd16 a, const simd16 b); + +template +simd16 operator==(const simd16 a, U b); + +template simd16 operator&(const simd16 a, const simd16 b); + +template simd16 operator|(const simd16 a, const simd16 b); + +template simd16 operator|(const simd16 a, U b); + +template simd16 operator^(const simd16 a, U b); + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return (vector_type)vec_splats(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + + simdutf_really_inline simd16(const vector_type _value) + : base16(_value) {} + + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + + simdutf_really_inline uint16_t to_bitmask() const { + return move_mask_u8(value); + } + + simdutf_really_inline bool any() const { + const auto tmp = vec_u64_t(value); + + return tmp[0] || tmp[1]; // Note: logical or, not binary one + } + + simdutf_really_inline bool is_zero() const { + const auto tmp = vec_u64_t(value); + + return (tmp[0] | tmp[1]) == 0; + } + + simdutf_really_inline simd16 &operator|=(const simd16 rhs) { + value = vec_or(this->value, rhs.value); + return *this; + } +}; + +template struct base16_numeric : base16 { + using vector_type = typename base16::vector_type; + + static simdutf_really_inline simd16 splat(T _value) { + return vec_splats(_value); + } + + static simdutf_really_inline simd16 zero() { return splat(0); } + + template + static simdutf_really_inline simd16 load(const U *ptr) { + return vec_xl(0, reinterpret_cast(ptr)); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const vector_type _value) + : base16(_value) {} + + // Store to array + template simdutf_really_inline void store(U *dst) const { +#if defined(__clang__) + return vec_xst(this->value, 0, reinterpret_cast(dst)); +#else + return vec_xst(this->value, 0, reinterpret_cast(dst)); +#endif // defined(__clang__) + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { + return vec_xor(this->value, vec_splats(T(0xffff))); + } +}; + +// Signed code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const vector_type _value) + : base16_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline operator simd16() const; +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const vector_type _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + + // Array constructor + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + simdutf_really_inline bool is_ascii() const { + return vec_all_lt(value, vec_splats(uint16_t(128))); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return vec_max(this->value, other.value); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return vec_min(this->value, other.value); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return other.max_val(*this) == other; + } + + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return other.min_val(*this) == other; + } + + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return vec_cmplt(value, other.value); + } + + // Bit-specific operations + template simdutf_really_inline simd16 shr() const { + return vec_sr(value, vec_splats(uint16_t(N))); + } + + template simdutf_really_inline simd16 shl() const { + return vec_sl(value, vec_splats(uint16_t(N))); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return vec_revb(value); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return vec_packs(v0.value, v1.value); + } +}; + +template +simd16 operator==(const simd16 a, const simd16 b) { + return vec_cmpeq(a.value, b.value); +} + +template +simd16 operator==(const simd16 a, U b) { + return vec_cmpeq(a.value, vec_splats(T(b))); +} + +template +simd16 operator&(const simd16 a, const simd16 b) { + return vec_and(a.value, b.value); +} + +template simd16 operator&(const simd16 a, U b) { + return vec_and(a.value, vec_splats(T(b))); +} + +template +simd16 operator|(const simd16 a, const simd16 b) { + return vec_or(a.value, b.value); +} + +template simd16 operator|(const simd16 a, U b) { + return vec_or(a.value, vec_splats(T(b))); +} + +template +simd16 operator^(const simd16 a, const simd16 b) { + return vec_xor(a.value, b.value); +} + +template simd16 operator^(const simd16 a, U b) { + return vec_xor(a.value, vec_splats(T(b))); +} + +simdutf_really_inline simd16::operator simd16() const { + return (vec_u16_t)(value); +} + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 4, + "AltiVec kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); + } + + simdutf_really_inline simd16 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); + this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); + this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(static_cast(low - 1)); + const simd16 mask_high = simd16::splat(static_cast(high + 1)); + return simd16x32( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), + (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), + (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) + .to_bitmask(); + } +}; // struct simd16x32 +/* end file src/simdutf/ppc64/simd16-inl.h */ +/* begin file src/simdutf/ppc64/simd32-inl.h */ +// file included directly + +template struct simd32; + +template struct base32 { + using vector_type = vector_u32_type_for_element; + static const int SIZE = sizeof(vector_type); + static const int ELEMENTS = sizeof(vector_type) / sizeof(T); + + vector_type value; + + // Zero constructor + simdutf_really_inline base32() : value{vector_type()} {} + + // Conversion from SIMD register + simdutf_really_inline base32(const vector_type _value) : value{_value} {} + + // Splat for scalar + simdutf_really_inline base32(T scalar) : value{vec_splats(scalar)} {} + + template + simdutf_really_inline base32(const Pointer *ptr) + : base32(vec_xl(0, reinterpret_cast(ptr))) {} + + // Store to array + template simdutf_really_inline void store(U *dst) const { +#if defined(__clang__) + return vec_xst(this->value, 0, reinterpret_cast(dst)); +#else + return vec_xst(this->value, 0, reinterpret_cast(dst)); +#endif // defined(__clang__) + } + void dump(const char *name = nullptr) const { +#ifdef SIMDUTF_LOGGING + if (name != nullptr) { + printf("%-10s = ", name); + } + + uint32_t tmp[4]; + vec_xst(value, 0, reinterpret_cast(tmp)); + for (int i = 0; i < 4; i++) { + if (i == 0) { + printf("[%08x", tmp[i]); + } else if (i == 4 - 1) { + printf(" %08x]", tmp[i]); + } else { + printf(" %08x", tmp[i]); + } + } + putchar('\n'); +#endif // SIMDUTF_LOGGING + } +}; + +template struct base32_numeric : base32 { + using super = base32; + using vector_type = typename super::vector_type; + + static simdutf_really_inline simd32 splat(T _value) { + return vec_splats(_value); + } + + static simdutf_really_inline simd32 zero() { return splat(0); } + + template + static simdutf_really_inline simd32 load(const U *values) { + return vec_xl(0, reinterpret_cast(values)); + } + + simdutf_really_inline base32_numeric() : base32() {} + + simdutf_really_inline base32_numeric(const vector_type _value) + : base32(_value) {} + + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd32 operator+(const simd32 other) const { + return vec_add(this->value, other.value); + } + + simdutf_really_inline simd32 operator-(const simd32 other) const { + return vec_sub(this->value, other.value); + } + + simdutf_really_inline simd32 &operator+=(const simd32 other) { + *this = *this + other; + return *static_cast *>(this); + } + + simdutf_really_inline simd32 &operator-=(const simd32 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; + +// Forward declaration +template struct simd32; + +template +simd32 operator==(const simd32 a, const simd32 b); + +template +simd32 operator!=(const simd32 a, const simd32 b); + +template +simd32 operator>(const simd32 a, const simd32 b); + +template simd32 operator==(const simd32 a, T b); + +template simd32 operator!=(const simd32 a, T b); + +template simd32 operator&(const simd32 a, const simd32 b); + +template simd32 operator|(const simd32 a, const simd32 b); + +template simd32 operator^(const simd32 a, const simd32 b); + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd32 : base32 { + static simdutf_really_inline simd32 splat(bool _value) { + return (vector_type)vec_splats(uint32_t(-(!!_value))); + } + + simdutf_really_inline simd32(const vector_type _value) + : base32(_value) {} + + // Splat constructor + simdutf_really_inline simd32(bool _value) : base32(splat(_value)) {} + + simdutf_really_inline uint16_t to_bitmask() const { + return move_mask_u8(value); + } + + simdutf_really_inline bool any() const { + const vec_u64_t tmp = (vec_u64_t)value; + + return tmp[0] || tmp[1]; // Note: logical or, not binary one + } + + simdutf_really_inline bool is_zero() const { + const vec_u64_t tmp = (vec_u64_t)value; + + return (tmp[0] | tmp[1]) == 0; + } + + simdutf_really_inline simd32 operator~() const { + return (vec_bool32_t)vec_xor(this->value, vec_splats(uint32_t(0xffffffff))); + } +}; + +// Unsigned code units +template <> struct simd32 : base32_numeric { + simdutf_really_inline simd32() : base32_numeric() {} + + simdutf_really_inline simd32(const vector_type _value) + : base32_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd32(uint32_t _value) : simd32(splat(_value)) {} + + // Array constructor + simdutf_really_inline simd32(const char32_t *values) + : simd32(load(reinterpret_cast(values))) {} + + // Bit-specific operations + template simdutf_really_inline simd32 shr() const { + return vec_sr(value, vec_splats(uint32_t(N))); + } + + template simdutf_really_inline simd32 shl() const { + return vec_sl(value, vec_splats(uint32_t(N))); + } + + // Change the endianness + simdutf_really_inline simd32 swap_bytes() const { + return vec_revb(value); + } + + simdutf_really_inline uint64_t sum() const { + return uint64_t(value[0]) + uint64_t(value[1]) + uint64_t(value[2]) + + uint64_t(value[3]); + } + + static simdutf_really_inline simd16 + pack(const simd32 &v0, const simd32 &v1) { + return vec_packs(v0.value, v1.value); + } +}; + +template +simd32 operator==(const simd32 a, const simd32 b) { + return vec_cmpeq(a.value, b.value); +} + +template +simd32 operator!=(const simd32 a, const simd32 b) { + return vec_cmpne(a.value, b.value); +} + +template simd32 operator==(const simd32 a, T b) { + return vec_cmpeq(a.value, vec_splats(b)); +} + +template simd32 operator!=(const simd32 a, T b) { + return vec_cmpne(a.value, vec_splats(b)); +} + +template +simd32 operator>(const simd32 a, const simd32 b) { + return vec_cmpgt(a.value, b.value); +} + +template +simd32 operator>=(const simd32 a, const simd32 b) { + return vec_cmpge(a.value, b.value); +} + +template +simd32 operator&(const simd32 a, const simd32 b) { + return vec_and(a.value, b.value); +} + +template simd32 operator&(const simd32 a, U b) { + return vec_and(a.value, vec_splats(T(b))); +} + +template +simd32 operator|(const simd32 a, const simd32 b) { + return vec_or(a.value, b.value); +} + +template +simd32 operator^(const simd32 a, const simd32 b) { + return vec_xor(a.value, b.value); +} + +template simd32 operator^(const simd32 a, U b) { + return vec_xor(a.value, vec_splats(T(b))); +} + +template simd32 max_val(const simd32 a, const simd32 b) { + return vec_max(a.value, b.value); +} + +template +simdutf_really_inline simd32 min(const simd32 b, const simd32 a) { + return vec_min(a.value, b.value); +} +/* end file src/simdutf/ppc64/simd32-inl.h */ + +template +simd8 select(const simd8 cond, const simd8 val_true, + const simd8 val_false) { + return vec_sel(val_false.value, val_true.value, cond.value); +} + +template +simd8 select(const T cond, const simd8 val_true, + const simd8 val_false) { + return vec_sel(val_false.value, val_true.value, vec_splats(cond)); +} + +template +simd16 select(const simd16 cond, const simd16 val_true, + const simd16 val_false) { + return vec_sel(val_false.value, val_true.value, cond.value); +} + +template +simd16 select(const T cond, const simd16 val_true, + const simd16 val_false) { + return vec_sel(val_false.value, val_true.value, vec_splats(cond)); +} + +template +simd32 select(const simd32 cond, const simd32 val_true, + const simd32 val_false) { + return vec_sel(val_false.value, val_true.value, cond.value); +} + +template +simd32 select(const T cond, const simd32 val_true, + const simd32 val_false) { + return vec_sel(val_false.value, val_true.value, vec_splats(cond)); +} + +using vector_u8 = simd8; +using vector_u16 = simd16; +using vector_u32 = simd32; +using vector_i8 = simd8; + +simdutf_really_inline vector_u8 as_vector_u8(const vector_u16 v) { + return vector_u8::vector_type(v.value); +} + +simdutf_really_inline vector_u8 as_vector_u8(const vector_u32 v) { + return vector_u8::vector_type(v.value); +} + +simdutf_really_inline vector_u8 as_vector_u8(const vector_i8 v) { + return vector_u8::vector_type(v.value); +} + +simdutf_really_inline vector_u8 as_vector_u8(const simd16 v) { + return vector_u8::vector_type(v.value); +} + +simdutf_really_inline vector_i8 as_vector_i8(const vector_u8 v) { + return vector_i8::vector_type(v.value); +} + +simdutf_really_inline vector_u16 as_vector_u16(const vector_u8 v) { + return vector_u16::vector_type(v.value); +} + +simdutf_really_inline vector_u16 as_vector_u16(const simd16 v) { + return vector_u16::vector_type(v.value); +} + +simdutf_really_inline vector_u32 as_vector_u32(const vector_u8 v) { + return vector_u32::vector_type(v.value); +} + +simdutf_really_inline vector_u32 as_vector_u32(const vector_u16 v) { + return vector_u32::vector_type(v.value); +} + +simdutf_really_inline vector_u32 max(vector_u32 a, vector_u32 b) { + return vec_max(a.value, b.value); +} + +simdutf_really_inline vector_u32 max(vector_u32 a, vector_u32 b, vector_u32 c) { + return max(max(a, b), c); +} + +simdutf_really_inline vector_u32 sum4bytes(vector_u8 bytes, vector_u32 acc) { + return vec_sum4s(bytes.value, acc.value); +} + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf + +#endif // SIMDUTF_PPC64_SIMD_INPUT_H +/* end file src/simdutf/ppc64/simd.h */ + +/* begin file src/simdutf/ppc64/end.h */ +/* end file src/simdutf/ppc64/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_PPC64 + +#endif // SIMDUTF_PPC64_H +/* end file src/simdutf/ppc64.h */ +/* begin file src/simdutf/rvv.h */ +#ifndef SIMDUTF_RVV_H +#define SIMDUTF_RVV_H + +#ifdef SIMDUTF_FALLBACK_H + #error "rvv.h must be included before fallback.h" +#endif + + +#define SIMDUTF_CAN_ALWAYS_RUN_RVV SIMDUTF_IS_RVV + +#ifndef SIMDUTF_IMPLEMENTATION_RVV + #define SIMDUTF_IMPLEMENTATION_RVV \ + (SIMDUTF_CAN_ALWAYS_RUN_RVV || \ + (SIMDUTF_IS_RISCV64 && SIMDUTF_HAS_RVV_INTRINSICS && \ + SIMDUTF_HAS_RVV_TARGET_REGION)) +#endif + +#if SIMDUTF_IMPLEMENTATION_RVV + + #if SIMDUTF_CAN_ALWAYS_RUN_RVV + #define SIMDUTF_TARGET_RVV + #else + #define SIMDUTF_TARGET_RVV SIMDUTF_TARGET_REGION("arch=+v") + #endif + #if !SIMDUTF_IS_ZVBB && SIMDUTF_HAS_ZVBB_INTRINSICS + #define SIMDUTF_TARGET_ZVBB SIMDUTF_TARGET_REGION("arch=+v,+zvbb") + #endif + +namespace simdutf { +namespace rvv {} // namespace rvv +} // namespace simdutf + +/* begin file src/simdutf/rvv/implementation.h */ +#ifndef SIMDUTF_RVV_IMPLEMENTATION_H +#define SIMDUTF_RVV_IMPLEMENTATION_H + + +namespace simdutf { +namespace rvv { + +namespace { +using namespace simdutf; +} // namespace + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("rvv", "RISC-V Vector Extension", + internal::instruction_set::RVV), + _supports_zvbb(internal::detect_supported_architectures() & + internal::instruction_set::ZVBB) {} +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; + + size_t binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + base64_options options) const noexcept; + const char *find(const char *start, const char *end, + char character) const noexcept; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +private: + const bool _supports_zvbb; + +#if SIMDUTF_IS_ZVBB + bool supports_zvbb() const { return true; } +#elif SIMDUTF_HAS_ZVBB_INTRINSICS + bool supports_zvbb() const { return _supports_zvbb; } +#else + bool supports_zvbb() const { return false; } +#endif +}; + +} // namespace rvv +} // namespace simdutf + +#endif // SIMDUTF_RVV_IMPLEMENTATION_H +/* end file src/simdutf/rvv/implementation.h */ +/* begin file src/simdutf/rvv/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "rvv" +// #define SIMDUTF_IMPLEMENTATION rvv + +#if SIMDUTF_CAN_ALWAYS_RUN_RVV +// nothing needed. +#else +SIMDUTF_TARGET_RVV +#endif +/* end file src/simdutf/rvv/begin.h */ +/* begin file src/simdutf/rvv/intrinsics.h */ +#ifndef SIMDUTF_RVV_INTRINSICS_H +#define SIMDUTF_RVV_INTRINSICS_H + + +#include + +#if __riscv_v_intrinsic >= 1000000 || __GCC__ >= 14 + #define simdutf_vrgather_u8m1x2(tbl, idx) \ + __riscv_vcreate_v_u8m1_u8m2( \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 0), \ + __riscv_vsetvlmax_e8m1()), \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 1), \ + __riscv_vsetvlmax_e8m1())); + + #define simdutf_vrgather_u8m1x4(tbl, idx) \ + __riscv_vcreate_v_u8m1_u8m4( \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 0), \ + __riscv_vsetvlmax_e8m1()), \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 1), \ + __riscv_vsetvlmax_e8m1()), \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 2), \ + __riscv_vsetvlmax_e8m1()), \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 3), \ + __riscv_vsetvlmax_e8m1())); +#else + // This has worse codegen on gcc + #define simdutf_vrgather_u8m1x2(tbl, idx) \ + __riscv_vset_v_u8m1_u8m2( \ + __riscv_vlmul_ext_v_u8m1_u8m2(__riscv_vrgather_vv_u8m1( \ + tbl, __riscv_vget_v_u8m2_u8m1(idx, 0), __riscv_vsetvlmax_e8m1())), \ + 1, \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 1), \ + __riscv_vsetvlmax_e8m1())) + + #define simdutf_vrgather_u8m1x4(tbl, idx) \ + __riscv_vset_v_u8m1_u8m4( \ + __riscv_vset_v_u8m1_u8m4( \ + __riscv_vset_v_u8m1_u8m4( \ + __riscv_vlmul_ext_v_u8m1_u8m4(__riscv_vrgather_vv_u8m1( \ + tbl, __riscv_vget_v_u8m4_u8m1(idx, 0), \ + __riscv_vsetvlmax_e8m1())), \ + 1, \ + __riscv_vrgather_vv_u8m1(tbl, \ + __riscv_vget_v_u8m4_u8m1(idx, 1), \ + __riscv_vsetvlmax_e8m1())), \ + 2, \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 2), \ + __riscv_vsetvlmax_e8m1())), \ + 3, \ + __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 3), \ + __riscv_vsetvlmax_e8m1())) +#endif + +/* Zvbb adds dedicated support for endianness swaps with vrev8, but if we can't + * use that, we have to emulate it with the standard V extension. + * Using LMUL=1 vrgathers could be faster than the srl+macc variant, but that + * would increase register pressure, and vrgather implementations performance + * varies a lot. */ +enum class simdutf_ByteFlip { NONE, V, ZVBB }; + +template +simdutf_really_inline static uint16_t simdutf_byteflip(uint16_t v) { + if (method != simdutf_ByteFlip::NONE) + return (uint16_t)((v * 1u) << 8 | (v * 1u) >> 8); + return v; +} + +#ifdef SIMDUTF_TARGET_ZVBB +SIMDUTF_UNTARGET_REGION +SIMDUTF_TARGET_ZVBB +#endif + +template +simdutf_really_inline static vuint16m1_t simdutf_byteflip(vuint16m1_t v, + size_t vl) { +#if SIMDUTF_HAS_ZVBB_INTRINSICS + if (method == simdutf_ByteFlip::ZVBB) + return __riscv_vrev8_v_u16m1(v, vl); +#endif + if (method == simdutf_ByteFlip::V) + return __riscv_vmacc_vx_u16m1(__riscv_vsrl_vx_u16m1(v, 8, vl), 0x100, v, + vl); + return v; +} + +template +simdutf_really_inline static vuint16m2_t simdutf_byteflip(vuint16m2_t v, + size_t vl) { +#if SIMDUTF_HAS_ZVBB_INTRINSICS + if (method == simdutf_ByteFlip::ZVBB) + return __riscv_vrev8_v_u16m2(v, vl); +#endif + if (method == simdutf_ByteFlip::V) + return __riscv_vmacc_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 8, vl), 0x100, v, + vl); + return v; +} + +template +simdutf_really_inline static vuint16m4_t simdutf_byteflip(vuint16m4_t v, + size_t vl) { +#if SIMDUTF_HAS_ZVBB_INTRINSICS + if (method == simdutf_ByteFlip::ZVBB) + return __riscv_vrev8_v_u16m4(v, vl); +#endif + if (method == simdutf_ByteFlip::V) + return __riscv_vmacc_vx_u16m4(__riscv_vsrl_vx_u16m4(v, 8, vl), 0x100, v, + vl); + return v; +} + +template +simdutf_really_inline static vuint16m8_t simdutf_byteflip(vuint16m8_t v, + size_t vl) { +#if SIMDUTF_HAS_ZVBB_INTRINSICS + if (method == simdutf_ByteFlip::ZVBB) + return __riscv_vrev8_v_u16m8(v, vl); +#endif + if (method == simdutf_ByteFlip::V) + return __riscv_vmacc_vx_u16m8(__riscv_vsrl_vx_u16m8(v, 8, vl), 0x100, v, + vl); + return v; +} + +#ifdef SIMDUTF_TARGET_ZVBB +SIMDUTF_UNTARGET_REGION +SIMDUTF_TARGET_RVV +#endif + +#endif // SIMDUTF_RVV_INTRINSICS_H +/* end file src/simdutf/rvv/intrinsics.h */ +/* begin file src/simdutf/rvv/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_RVV +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +/* end file src/simdutf/rvv/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_RVV + +#endif // SIMDUTF_RVV_H +/* end file src/simdutf/rvv.h */ +/* begin file src/simdutf/lasx.h */ +#ifndef SIMDUTF_LASX_H +#define SIMDUTF_LASX_H + +#ifdef SIMDUTF_FALLBACK_H + #error "lasx.h must be included before fallback.h" +#endif + + +#ifndef SIMDUTF_IMPLEMENTATION_LASX + #define SIMDUTF_IMPLEMENTATION_LASX (SIMDUTF_IS_LSX) +#endif +#if SIMDUTF_IMPLEMENTATION_LASX && SIMDUTF_IS_LASX + #define SIMDUTF_CAN_ALWAYS_RUN_LASX 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_LASX 0 +#endif + +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) + +#if SIMDUTF_IMPLEMENTATION_LASX + #define SIMDUTF_TARGET_LASX SIMDUTF_TARGET_REGION("lasx,lsx") + + // For runtime dispatching to work, we need the lsxintrin to appear + // before we call SIMDUTF_TARGET_LASX. It is unclear why. + #include + +namespace simdutf { +/** + * Implementation for LoongArch ASX. + */ +namespace lasx {} // namespace lasx +} // namespace simdutf + +/* begin file src/simdutf/lasx/implementation.h */ +#ifndef SIMDUTF_LASX_IMPLEMENTATION_H +#define SIMDUTF_LASX_IMPLEMENTATION_H + + +namespace simdutf { +namespace lasx { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("lasx", "LOONGARCH ASX", + internal::instruction_set::LSX | + internal::instruction_set::LASX) {} +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; + size_t binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + base64_options options) const noexcept; + const char *find(const char *start, const char *end, + char character) const noexcept; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace lasx +} // namespace simdutf + +#endif // SIMDUTF_LASX_IMPLEMENTATION_H +/* end file src/simdutf/lasx/implementation.h */ + +/* begin file src/simdutf/lasx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lasx" +// #define SIMDUTF_IMPLEMENTATION lasx +#define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 + +#if SIMDUTF_CAN_ALWAYS_RUN_LASX +// nothing needed. +#else +SIMDUTF_TARGET_LASX +#endif +/* end file src/simdutf/lasx/begin.h */ + + // Declarations +/* begin file src/simdutf/lasx/intrinsics.h */ +#ifndef SIMDUTF_LASX_INTRINSICS_H +#define SIMDUTF_LASX_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include +#include + +#if defined(__loongarch_asx) + #ifdef __clang__ + #define VREGS_PREFIX "$vr" + #define XREGS_PREFIX "$xr" + #else // GCC + #define VREGS_PREFIX "$f" + #define XREGS_PREFIX "$f" + #endif + #define __ALL_REGS \ + "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26," \ + "27,28,29,30,31" +// Convert __m128i to __m256i +static inline __m256i ____m256i(__m128i in) { + __m256i out = __lasx_xvldi(0); + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " XREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " VREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + : [out] "+f"(out) + : [in] "f"(in)); + return out; +} +// Convert two __m128i to __m256i +static inline __m256i lasx_set_q(__m128i inhi, __m128i inlo) { + __m256i out; + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[hi], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[lo], " VREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".ifnc %[out], %[hi] \n\t" + ".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " XREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[hi], " VREGS_PREFIX "\\j \n\t" + " xvori.b $xr\\i, $xr\\j, 0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".endif \n\t" + : [out] "=f"(out), [hi] "+f"(inhi) + : [lo] "f"(inlo)); + return out; +} +// Convert __m256i low part to __m128i +static inline __m128i lasx_extracti128_lo(__m256i in) { + __m128i out; + __asm__ volatile(".ifnc %[out], %[in] \n\t" + ".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " XREGS_PREFIX "\\j \n\t" + " vori.b $vr\\i, $vr\\j, 0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".endif \n\t" + : [out] "=f"(out) + : [in] "f"(in)); + return out; +} +// Convert __m256i high part to __m128i +static inline __m128i lasx_extracti128_hi(__m256i in) { + __m128i out; + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " XREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x11 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + : [out] "=f"(out) + : [in] "f"(in)); + return out; +} +#endif + +/* +Encoding of argument for LoongArch64 xvldi instruction. See: +https://jia.je/unofficial-loongarch-intrinsics-guide/lasx/misc/#__m256i-__lasx_xvldi-imm_n1024_1023-imm + +1: imm[12:8]=0b10000: broadcast imm[7:0] as 32-bit elements to all lanes + +2: imm[12:8]=0b10001: broadcast imm[7:0] << 8 as 32-bit elements to all lanes + +3: imm[12:8]=0b10010: broadcast imm[7:0] << 16 as 32-bit elements to all lanes + +4: imm[12:8]=0b10011: broadcast imm[7:0] << 24 as 32-bit elements to all lanes + +5: imm[12:8]=0b10100: broadcast imm[7:0] as 16-bit elements to all lanes + +6: imm[12:8]=0b10101: broadcast imm[7:0] << 8 as 16-bit elements to all lanes + +7: imm[12:8]=0b10110: broadcast (imm[7:0] << 8) | 0xFF as 32-bit elements to all +lanes + +8: imm[12:8]=0b10111: broadcast (imm[7:0] << 16) | 0xFFFF as 32-bit elements to +all lanes + +9: imm[12:8]=0b11000: broadcast imm[7:0] as 8-bit elements to all lanes + +10: imm[12:8]=0b11001: repeat each bit of imm[7:0] eight times, and broadcast +the result as 64-bit elements to all lanes +*/ + +namespace lasx_vldi { + +template class const_u16 { + constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); + constexpr static const uint8_t b1 = ((v >> 1 * 8) & 0xff); + + constexpr static bool is_case5 = uint16_t(b0) == v; + constexpr static bool is_case6 = (uint16_t(b1) << 8) == v; + constexpr static bool is_case9 = (b0 == b1); + constexpr static bool is_case10 = + ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)); + +public: + constexpr static uint16_t operation = is_case5 ? 0b10100 + : is_case6 ? 0b10101 + : is_case9 ? 0b11000 + : is_case10 ? 0x11001 + : 0xffff; + + constexpr static uint16_t byte = + is_case5 ? b0 + : is_case6 ? b1 + : is_case9 ? b0 + : is_case10 ? ((b0 ? 0x55 : 0x00) | (b1 ? 0xaa : 0x00)) + : 0xffff; + + constexpr static int value = int((operation << 8) | byte) - 8192; + constexpr static bool valid = operation != 0xffff; +}; + +template class const_u32 { + constexpr static const uint8_t b0 = (v & 0xff); + constexpr static const uint8_t b1 = ((v >> 8) & 0xff); + constexpr static const uint8_t b2 = ((v >> 16) & 0xff); + constexpr static const uint8_t b3 = ((v >> 24) & 0xff); + + constexpr static bool is_case1 = (uint32_t(b0) == v); + constexpr static bool is_case2 = ((uint32_t(b1) << 8) == v); + constexpr static bool is_case3 = ((uint32_t(b2) << 16) == v); + constexpr static bool is_case4 = ((uint32_t(b3) << 24) == v); + constexpr static bool is_case5 = (b0 == b2) && (b1 == 0) && (b3 == 0); + constexpr static bool is_case6 = (b1 == b3) && (b0 == 0) && (b2 == 0); + constexpr static bool is_case7 = (b3 == 0) && (b2 == 0) && (b0 == 0xff); + constexpr static bool is_case8 = (b3 == 0) && (b1 == 0xff) && (b0 == 0xff); + constexpr static bool is_case9 = (b0 == b1) && (b0 == b2) && (b0 == b3); + constexpr static bool is_case10 = + ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)) && + ((b2 == 0xff) || (b2 == 0x00)) && ((b3 == 0xff) || (b3 == 0x00)); + +public: + constexpr static uint16_t operation = is_case1 ? 0b10000 + : is_case2 ? 0b10001 + : is_case3 ? 0b10010 + : is_case4 ? 0b10011 + : is_case5 ? 0b10100 + : is_case6 ? 0b10101 + : is_case7 ? 0b10110 + : is_case8 ? 0b10111 + : is_case9 ? 0b11000 + : is_case10 ? 0b11001 + : 0xffff; + + constexpr static uint16_t byte = + is_case1 ? b0 + : is_case2 ? b1 + : is_case3 ? b2 + : is_case4 ? b3 + : is_case5 ? b0 + : is_case6 ? b1 + : is_case7 ? b1 + : is_case8 ? b2 + : is_case9 ? b0 + : is_case10 ? ((b0 ? 0x11 : 0x00) | (b1 ? 0x22 : 0x00) | + (b2 ? 0x44 : 0x00) | (b3 ? 0x88 : 0x00)) + : 0xffff; + + constexpr static int value = int((operation << 8) | byte) - 8192; + constexpr static bool valid = operation != 0xffff; +}; + +template class const_u64 { + constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); + constexpr static const uint8_t b1 = ((v >> 1 * 8) & 0xff); + constexpr static const uint8_t b2 = ((v >> 2 * 8) & 0xff); + constexpr static const uint8_t b3 = ((v >> 3 * 8) & 0xff); + constexpr static const uint8_t b4 = ((v >> 4 * 8) & 0xff); + constexpr static const uint8_t b5 = ((v >> 5 * 8) & 0xff); + constexpr static const uint8_t b6 = ((v >> 6 * 8) & 0xff); + constexpr static const uint8_t b7 = ((v >> 7 * 8) & 0xff); + + constexpr static bool is_case10 = + ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)) && + ((b2 == 0xff) || (b2 == 0x00)) && ((b3 == 0xff) || (b3 == 0x00)) && + ((b4 == 0xff) || (b4 == 0x00)) && ((b5 == 0xff) || (b5 == 0x00)) && + ((b6 == 0xff) || (b6 == 0x00)) && ((b7 == 0xff) || (b7 == 0x00)); + +public: + constexpr static bool is_32bit = + ((v & 0xffffffff) == (v >> 32)) && const_u32<(v >> 32)>::value; + constexpr static uint8_t op_32bit = const_u32<(v >> 32)>::operation; + constexpr static uint8_t byte_32bit = const_u32<(v >> 32)>::byte; + + constexpr static uint16_t operation = is_32bit ? op_32bit + : is_case10 ? 0x11001 + : 0xffff; + + constexpr static uint16_t byte = + is_32bit ? byte_32bit + : is_case10 + ? ((b0 ? 0x01 : 0x00) | (b1 ? 0x02 : 0x00) | (b2 ? 0x04 : 0x00) | + (b3 ? 0x08 : 0x00) | (b4 ? 0x10 : 0x00) | (b5 ? 0x20 : 0x00) | + (b6 ? 0x40 : 0x00) | (b7 ? 0x80 : 0x00)) + : 0xffff; + + constexpr static int value = int((operation << 8) | byte) - 8192; + constexpr static bool valid = operation != 0xffff; +}; + +} // namespace lasx_vldi + +// Uncomment when running under QEMU affected +// by bug https://gitlab.com/qemu-project/qemu/-/issues/2865 +// Versions <= 9.2.2 are affected, likely anything newer is correct. +#ifndef QEMU_VLDI_BUG +// #define QEMU_VLDI_BUG 1 +#endif + +#ifdef QEMU_VLDI_BUG + #define lasx_splat_u16(v) __lasx_xvreplgr2vr_h(v) + #define lasx_splat_u32(v) __lasx_xvreplgr2vr_w(v) +#else +template constexpr __m256i lasx_splat_u16_aux() { + constexpr bool is_imm10 = (int16_t(x) < 512) && (int16_t(x) > -512); + constexpr uint16_t imm10 = is_imm10 ? x : 0; + constexpr bool is_vldi = lasx_vldi::const_u16::valid; + constexpr int vldi_imm = is_vldi ? lasx_vldi::const_u16::value : 0; + + return is_imm10 ? __lasx_xvrepli_h(int16_t(imm10)) + : is_vldi ? __lasx_xvldi(vldi_imm) + : __lasx_xvreplgr2vr_h(x); +} + +template constexpr __m256i lasx_splat_u32_aux() { + constexpr bool is_imm10 = (int32_t(x) < 512) && (int32_t(x) > -512); + constexpr uint32_t imm10 = is_imm10 ? x : 0; + constexpr bool is_vldi = lasx_vldi::const_u32::valid; + constexpr int vldi_imm = is_vldi ? lasx_vldi::const_u32::value : 0; + + return is_imm10 ? __lasx_xvrepli_w(int32_t(imm10)) + : is_vldi ? __lasx_xvldi(vldi_imm) + : __lasx_xvreplgr2vr_w(x); +} + + #define lasx_splat_u16(v) lasx_splat_u16_aux<(v)>() + #define lasx_splat_u32(v) lasx_splat_u32_aux<(v)>() +#endif // QEMU_VLDI_BUG + +#ifndef lsx_splat_u16 + #ifdef QEMU_VLDI_BUG + #define lsx_splat_u16(v) __lsx_vreplgr2vr_h(v) + #define lsx_splat_u32(v) __lsx_vreplgr2vr_w(v) + #else +namespace { +template constexpr __m128i lsx_splat_u16_aux() { + return ((int16_t(x) < 512) && (int16_t(x) > -512)) + ? __lsx_vrepli_h( + ((int16_t(x) < 512) && (int16_t(x) > -512)) ? int16_t(x) : 0) + : (lasx_vldi::const_u16::valid + ? __lsx_vldi(lasx_vldi::const_u16::valid + ? lasx_vldi::const_u16::value + : 0) + : __lsx_vreplgr2vr_h(x)); +} + +template constexpr __m128i lsx_splat_u32_aux() { + return ((int32_t(x) < 512) && (int32_t(x) > -512)) + ? __lsx_vrepli_w( + ((int32_t(x) < 512) && (int32_t(x) > -512)) ? int32_t(x) : 0) + : (lasx_vldi::const_u32::valid + ? __lsx_vldi(lasx_vldi::const_u32::valid + ? lasx_vldi::const_u32::value + : 0) + : __lsx_vreplgr2vr_w(x)); +} +} // namespace + #define lsx_splat_u16(v) lsx_splat_u16_aux<(v)>() + #define lsx_splat_u32(v) lsx_splat_u32_aux<(v)>() + #endif // QEMU_VLDI_BUG +#endif // lsx_splat_u16 + +#endif // SIMDUTF_LASX_INTRINSICS_H +/* end file src/simdutf/lasx/intrinsics.h */ +/* begin file src/simdutf/lasx/bitmanipulation.h */ +#ifndef SIMDUTF_LASX_BITMANIPULATION_H +#define SIMDUTF_LASX_BITMANIPULATION_H + +#include + +namespace simdutf { +namespace lasx { +namespace { + +simdutf_really_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); +} + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} +#endif + +} // unnamed namespace +} // namespace lasx +} // namespace simdutf + +#endif // SIMDUTF_LASX_BITMANIPULATION_H +/* end file src/simdutf/lasx/bitmanipulation.h */ +/* begin file src/simdutf/lasx/simd.h */ +#ifndef SIMDUTF_LASX_SIMD_H +#define SIMDUTF_LASX_SIMD_H + + +namespace simdutf { +namespace lasx { +namespace { +namespace simd { + +__attribute__((aligned(32))) static const uint8_t prev_shuf_table[32][32] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, + {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, + 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0}, + {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0}, + {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0}, + {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0}, + {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0}, + {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, +}; + +__attribute__((aligned(32))) static const uint8_t bitsel_mask_table[32][32] = { + {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0}}; + +// Forward-declared so they can be used by splat and friends. +template struct base { + __m256i value; + + // Zero constructor + simdutf_really_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdutf_really_inline base(const __m256i _value) : value(_value) {} + // Conversion to SIMD register + simdutf_really_inline operator const __m256i &() const { return this->value; } + simdutf_really_inline operator __m256i &() { return this->value; } + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + if (big_endian) { + __m256i zero = __lasx_xvldi(0); + __m256i in8 = __lasx_xvpermi_d(this->value, 0b11011000); + __m256i inlow = __lasx_xvilvl_b(in8, zero); + __m256i inhigh = __lasx_xvilvh_b(in8, zero); + __lasx_xvst(inlow, reinterpret_cast(ptr), 0); + __lasx_xvst(inhigh, reinterpret_cast(ptr), 32); + } else { + __m256i inlow = __lasx_vext2xv_hu_bu(this->value); + __m256i inhigh = __lasx_vext2xv_hu_bu( + __lasx_xvpermi_q(this->value, this->value, 0b00000001)); + __lasx_xvst(inlow, reinterpret_cast<__m256i *>(ptr), 0); + __lasx_xvst(inhigh, reinterpret_cast<__m256i *>(ptr), 32); + } + } + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + __m256i in32_0 = __lasx_vext2xv_wu_bu(this->value); + __lasx_xvst(in32_0, reinterpret_cast(ptr), 0); + + __m256i in8_1 = __lasx_xvpermi_d(this->value, 0b00000001); + __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); + __lasx_xvst(in32_1, reinterpret_cast(ptr), 32); + + __m256i in8_2 = __lasx_xvpermi_d(this->value, 0b00000010); + __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); + __lasx_xvst(in32_2, reinterpret_cast(ptr), 64); + + __m256i in8_3 = __lasx_xvpermi_d(this->value, 0b00000011); + __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); + __lasx_xvst(in32_3, reinterpret_cast(ptr), 96); + } + // Bit operations + simdutf_really_inline Child operator|(const Child other) const { + return __lasx_xvor_v(this->value, other); + } + simdutf_really_inline Child operator&(const Child other) const { + return __lasx_xvand_v(this->value, other); + } + simdutf_really_inline Child operator^(const Child other) const { + return __lasx_xvxor_v(this->value, other); + } + simdutf_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } +}; + +template struct simd8; + +template > +struct base8 : base> { + simdutf_really_inline base8() : base>() {} + simdutf_really_inline base8(const __m256i _value) : base>(_value) {} + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return __lasx_xvseq_b(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + static_assert(N <= 16, "unsupported shift value"); + + if (!N) + return this->value; + + __m256i zero = __lasx_xvldi(0); + __m256i result, shuf; + if (N < 16) { + shuf = __lasx_xvld(prev_shuf_table[N], 0); + + result = __lasx_xvshuf_b( + __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, + shuf); + __m256i srl_prev = __lasx_xvbsrl_v( + __lasx_xvpermi_q(zero, prev_chunk.value, 0b00110001), (16 - N)); + __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); + result = __lasx_xvbitsel_v(result, srl_prev, mask); + + return result; + } else if (N == 16) { + return __lasx_xvpermi_q(this->value, prev_chunk.value, 0b00100001); + } + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdutf_really_inline simd8 splat(bool _value) { + return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8() : base8() {} + simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} + + simdutf_really_inline uint32_t to_bitmask() const { + __m256i mask = __lasx_xvmsknz_b(this->value); + uint32_t mask0 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t mask1 = __lasx_xvpickve2gr_wu(mask, 4); + return (mask0 | (mask1 << 16)); + } + simdutf_really_inline bool any() const { + if (__lasx_xbz_b(this->value)) + return false; + return true; + } + simdutf_really_inline simd8 operator~() const { return *this ^ true; } +}; + +template struct base8_numeric : base8 { + static simdutf_really_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdutf_really_inline simd8 zero() { return __lasx_xvldi(0); } + static simdutf_really_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15); + } + + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const __m256i _value) + : base8(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[32]) const { + return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); + } + + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + __m256i origin = __lasx_xvand_v(this->value, __lasx_xvldi(0x1f)); + return __lasx_xvshuf_b(__lasx_xvldi(0), lookup_table, origin); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdutf_really_inline operator simd8() const; + simdutf_really_inline bool is_ascii() const { + __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); + if (__lasx_xbnz_v(ascii_mask)) + return false; + return true; + } + // Order-sensitive comparisons + simdutf_really_inline simd8 operator>(const simd8 other) const { + return __lasx_xvslt_b(other, this->value); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return __lasx_xvslt_b(this->value, other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, + uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, + uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, + uint8_t v31) + : simd8((__m256i)v32u8{v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31}) {} + + // Saturated math + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return __lasx_xvssub_bu(this->value, other); + } + + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return __lasx_xvsle_bu(other, *this); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + value = __lasx_xvsub_b(value, other.value); + return *this; + } + + // Bit-specific operations + simdutf_really_inline bool is_ascii() const { + __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); + if (__lasx_xbnz_v(ascii_mask)) + return false; + return true; + } + simdutf_really_inline bool any_bits_set_anywhere() const { + if (__lasx_xbnz_v(this->value)) + return true; + return false; + } + template simdutf_really_inline simd8 shr() const { + return __lasx_xvsrli_b(this->value, N); + } + template simdutf_really_inline simd8 shl() const { + return __lasx_xvslli_b(this->value, N); + } + + simdutf_really_inline uint64_t sum_bytes() const { + const auto sum_u16 = __lasx_xvhaddw_hu_bu(value, value); + const auto sum_u32 = __lasx_xvhaddw_wu_hu(sum_u16, sum_u16); + const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); + + return uint64_t(__lasx_xvpickve2gr_du(sum_u64, 0)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 1)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 2)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 3)); + } +}; +simdutf_really_inline simd8::operator simd8() const { + return this->value; +} + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, + "LASX kernel should use two registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); + } + + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), + (simd8(__m256i(this->chunks[1])) >= mask)) + .to_bitmask(); + } +}; // struct simd8x64 + +/* begin file src/simdutf/lasx/simd16-inl.h */ +template struct simd16; + +template > +struct base16 : base> { + using bitmask_type = uint32_t; + + simdutf_really_inline base16() : base>() {} + simdutf_really_inline base16(const __m256i _value) + : base>(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(__lasx_xvld(reinterpret_cast(ptr), 0)) {} + + /// the size of vector in bytes + static const int SIZE = sizeof(base>::value); + + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return __lasx_xvreplgr2vr_h(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} + + simdutf_really_inline bitmask_type to_bitmask() const { + __m256i mask = __lasx_xvmsknz_b(this->value); + bitmask_type mask0 = __lasx_xvpickve2gr_wu(mask, 0); + bitmask_type mask1 = __lasx_xvpickve2gr_wu(mask, 4); + return (mask0 | (mask1 << 16)); + } + simdutf_really_inline simd16 operator~() const { return *this ^ true; } + + simdutf_really_inline bool is_zero() const { + return __lasx_xbz_v(this->value); + } + + template simdutf_really_inline simd16 byte_right_shift() const { + const auto t0 = __lasx_xvbsrl_v(this->value, N); + const auto t1 = __lasx_xvpermi_q(this->value, __lasx_xvldi(0), 0b00000011); + const auto t2 = __lasx_xvbsll_v(t1, 16 - N); + const auto t3 = __lasx_xvor_v(t0, t2); + return t3; + } + + simdutf_really_inline uint16_t first() const { + return uint16_t(__lasx_xvpickve2gr_w(value, 0)); + } +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return __lasx_xvreplgr2vr_h((uint16_t)_value); + } + static simdutf_really_inline simd16 zero() { return __lasx_xvldi(0); } + template + static simdutf_really_inline simd16 load(const Pointer values) { + return __lasx_xvld(values, 0); + } + + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const __m256i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + // Order-specific operations + simdutf_really_inline simd16 &operator+=(const simd16 other) { + value = __lasx_xvadd_h(value, other.value); + return *this; + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return __lasx_xvshuf4i_b(this->value, 0b10110001); + } + + template + static simdutf_really_inline simd8 + pack_shifted_right(const simd16 &v0, const simd16 &v1) { + return __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(v1.value, v0.value, N), + 0b11011000); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + + return pack_shifted_right<0>(v0, v1); + } + + simdutf_really_inline uint64_t sum() const { + const auto sum_u32 = __lasx_xvhaddw_wu_hu(value, value); + const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); + + return uint64_t(__lasx_xvpickve2gr_du(sum_u64, 0)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 1)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 2)) + + uint64_t(__lasx_xvpickve2gr_du(sum_u64, 3)); + } + + template simdutf_really_inline simd16 byte_right_shift() const { + return __lasx_xvbsrl_v(this->value, N); + } +}; + +simdutf_really_inline simd16 operator<(const simd16 a, + const simd16 b) { + return __lasx_xvslt_hu(a.value, b.value); +} + +simdutf_really_inline simd16 operator>(const simd16 a, + const simd16 b) { + return __lasx_xvslt_hu(b.value, a.value); +} + +simdutf_really_inline simd16 operator<=(const simd16 a, + const simd16 b) { + return __lasx_xvsle_hu(a.value, b.value); +} + +simdutf_really_inline simd16 operator>=(const simd16 a, + const simd16 b) { + return __lasx_xvsle_hu(b.value, a.value); +} + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 2, + "LASX kernel should use two registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline simd16x32(const simd16 chunk0, + const simd16 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + } + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) + .to_bitmask(); + } +}; // struct simd16x32 + +simdutf_really_inline simd16 min(const simd16 a, + const simd16 b) { + return __lasx_xvmin_hu(a.value, b.value); +} + +simdutf_really_inline simd16 operator==(const simd16 a, + uint16_t b) { + const auto bv = __lasx_xvreplgr2vr_h(b); + return __lasx_xvseq_h(a.value, bv); +} + +simdutf_really_inline simd16 as_vector_u16(const simd16 x) { + return x.value; +} + +simdutf_really_inline simd16 operator&(const simd16 a, + uint16_t b) { + const auto bv = __lasx_xvreplgr2vr_h(b); + return __lasx_xvand_v(a.value, bv); +} + +simdutf_really_inline simd16 operator&(const simd16 a, + const simd16 b) { + return __lasx_xvand_v(a.value, b.value); +} + +simdutf_really_inline simd16 operator^(const simd16 a, + uint16_t b) { + const auto bv = __lasx_xvreplgr2vr_h(b); + return __lasx_xvxor_v(a.value, bv); +} + +simdutf_really_inline simd16 operator^(const simd16 a, + const simd16 b) { + return __lasx_xvxor_v(a.value, b.value); +} +/* end file src/simdutf/lasx/simd16-inl.h */ +/* begin file src/simdutf/lasx/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + __m256i value; + static const int SIZE = sizeof(value); + static const int ELEMENTS = SIZE / sizeof(uint32_t); + + // constructors + simdutf_really_inline simd32(__m256i v) : value(v) {} + + template + simdutf_really_inline simd32(Ptr *ptr) : value(__lasx_xvld(ptr, 0)) {} + + // in-place operators + simdutf_really_inline simd32 &operator-=(const simd32 other) { + value = __lasx_xvsub_w(value, other.value); + return *this; + } + + // members + simdutf_really_inline uint64_t sum() const { + const auto odd = __lasx_xvsrli_d(value, 32); + const auto even = __lasx_xvand_v(value, __lasx_xvreplgr2vr_d(0xffffffff)); + + const auto sum64 = __lasx_xvadd_d(odd, even); + + return uint64_t(__lasx_xvpickve2gr_du(sum64, 0)) + + uint64_t(__lasx_xvpickve2gr_du(sum64, 1)) + + uint64_t(__lasx_xvpickve2gr_du(sum64, 2)) + + uint64_t(__lasx_xvpickve2gr_du(sum64, 3)); + } + + // static members + static simdutf_really_inline simd32 splat(uint32_t x) { + return __lasx_xvreplgr2vr_w(x); + } + + static simdutf_really_inline simd32 zero() { + return __lasx_xvrepli_w(0); + } +}; + +// ------------------------------------------------------------ + +template <> struct simd32 { + __m256i value; + static const int SIZE = sizeof(value); + + // constructors + simdutf_really_inline simd32(__m256i v) : value(v) {} +}; + +// ------------------------------------------------------------ + +simdutf_really_inline simd32 operator&(const simd32 a, + const simd32 b) { + return __lasx_xvor_v(a.value, b.value); +} + +simdutf_really_inline simd32 operator<(const simd32 a, + const simd32 b) { + return __lasx_xvslt_wu(a.value, b.value); +} + +simdutf_really_inline simd32 operator>(const simd32 a, + const simd32 b) { + return __lasx_xvslt_wu(b.value, a.value); +} + +// ------------------------------------------------------------ + +simdutf_really_inline simd32 as_vector_u32(const simd32 v) { + return v.value; +} +/* end file src/simdutf/lasx/simd32-inl.h */ +/* begin file src/simdutf/lasx/simd64-inl.h */ +template struct simd64; + +template <> struct simd64 { + __m256i value; + static const int SIZE = sizeof(value); + static const int ELEMENTS = SIZE / sizeof(uint64_t); + + // constructors + simdutf_really_inline simd64(__m256i v) : value(v) {} + + template + simdutf_really_inline simd64(Ptr *ptr) : value(__lasx_xvld(ptr, 0)) {} + + // in-place operators + simdutf_really_inline simd64 &operator+=(const simd64 other) { + value = __lasx_xvadd_d(value, other.value); + return *this; + } + + // members + simdutf_really_inline uint64_t sum() const { + return uint64_t(__lasx_xvpickve2gr_du(value, 0)) + + uint64_t(__lasx_xvpickve2gr_du(value, 1)) + + uint64_t(__lasx_xvpickve2gr_du(value, 2)) + + uint64_t(__lasx_xvpickve2gr_du(value, 3)); + } + + // static members + static simdutf_really_inline simd64 zero() { + return __lasx_xvrepli_d(0); + } +}; + +// ------------------------------------------------------------ + +template <> struct simd64 { + __m256i value; + static const int SIZE = sizeof(value); + + // constructors + simdutf_really_inline simd64(__m256i v) : value(v) {} +}; + +// ------------------------------------------------------------ + +simd64 sum_8bytes(const simd8 v) { + const auto sum_u16 = __lasx_xvhaddw_hu_bu(v, v); + const auto sum_u32 = __lasx_xvhaddw_wu_hu(sum_u16, sum_u16); + const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); + + return simd64(sum_u64); +} +/* end file src/simdutf/lasx/simd64-inl.h */ + +} // namespace simd +} // unnamed namespace +} // namespace lasx +} // namespace simdutf + +#endif // SIMDUTF_LASX_SIMD_H +/* end file src/simdutf/lasx/simd.h */ + +/* begin file src/simdutf/lasx/end.h */ +#undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP + +#if SIMDUTF_CAN_ALWAYS_RUN_LASX +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif +/* end file src/simdutf/lasx/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_LASX + +#endif // SIMDUTF_LASX_H +/* end file src/simdutf/lasx.h */ +/* begin file src/simdutf/lsx.h */ +#ifndef SIMDUTF_LSX_H +#define SIMDUTF_LSX_H + +#ifdef SIMDUTF_FALLBACK_H + #error "lsx.h must be included before fallback.h" +#endif + +#ifndef SIMDUTF_CAN_ALWAYS_RUN_LASX + #error "lsx.h must be included after lasx.h" +#endif + + +#ifndef SIMDUTF_IMPLEMENTATION_LSX + #if SIMDUTF_CAN_ALWAYS_RUN_LASX + #define SIMDUTF_IMPLEMENTATION_LSX 0 + #else + #define SIMDUTF_IMPLEMENTATION_LSX (SIMDUTF_IS_LSX) + #endif +#endif +#if SIMDUTF_IMPLEMENTATION_LSX && SIMDUTF_IS_LSX + #define SIMDUTF_CAN_ALWAYS_RUN_LSX 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_LSX 0 +#endif + +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) + +#if SIMDUTF_IMPLEMENTATION_LSX + +namespace simdutf { +/** + * Implementation for LoongArch SX. + */ +namespace lsx {} // namespace lsx +} // namespace simdutf + +/* begin file src/simdutf/lsx/implementation.h */ +#ifndef SIMDUTF_LSX_IMPLEMENTATION_H +#define SIMDUTF_LSX_IMPLEMENTATION_H + + +namespace simdutf { +namespace lsx { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("lsx", "LOONGARCH SX", + internal::instruction_set::LSX) {} +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; + size_t binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + base64_options options) const noexcept; + const char *find(const char *start, const char *end, + char character) const noexcept; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept; +#endif // SIMDUTF_FEATURE_BASE64 +}; + +} // namespace lsx +} // namespace simdutf + +#endif // SIMDUTF_LSX_IMPLEMENTATION_H +/* end file src/simdutf/lsx/implementation.h */ + +/* begin file src/simdutf/lsx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lsx" +// #define SIMDUTF_IMPLEMENTATION lsx +#define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 +/* end file src/simdutf/lsx/begin.h */ + + // Declarations +/* begin file src/simdutf/lsx/intrinsics.h */ +#ifndef SIMDUTF_LSX_INTRINSICS_H +#define SIMDUTF_LSX_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +/* +Encoding of argument for LoongArch64 xvldi instruction. See: +https://jia.je/unofficial-loongarch-intrinsics-guide/lasx/misc/#__m256i-__lasx_xvldi-imm_n1024_1023-imm + +1: imm[12:8]=0b10000: broadcast imm[7:0] as 32-bit elements to all lanes + +2: imm[12:8]=0b10001: broadcast imm[7:0] << 8 as 32-bit elements to all lanes + +3: imm[12:8]=0b10010: broadcast imm[7:0] << 16 as 32-bit elements to all lanes + +4: imm[12:8]=0b10011: broadcast imm[7:0] << 24 as 32-bit elements to all lanes + +5: imm[12:8]=0b10100: broadcast imm[7:0] as 16-bit elements to all lanes + +6: imm[12:8]=0b10101: broadcast imm[7:0] << 8 as 16-bit elements to all lanes + +7: imm[12:8]=0b10110: broadcast (imm[7:0] << 8) | 0xFF as 32-bit elements to all +lanes + +8: imm[12:8]=0b10111: broadcast (imm[7:0] << 16) | 0xFFFF as 32-bit elements to +all lanes + +9: imm[12:8]=0b11000: broadcast imm[7:0] as 8-bit elements to all lanes + +10: imm[12:8]=0b11001: repeat each bit of imm[7:0] eight times, and broadcast +the result as 64-bit elements to all lanes +*/ + +namespace vldi { + +template class const_u16 { + constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); + constexpr static const uint8_t b1 = ((v >> 1 * 8) & 0xff); + + constexpr static bool is_case5 = uint16_t(b0) == v; + constexpr static bool is_case6 = (uint16_t(b1) << 8) == v; + constexpr static bool is_case9 = (b0 == b1); + constexpr static bool is_case10 = + ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)); + +public: + constexpr static uint16_t operation = is_case5 ? 0b10100 + : is_case6 ? 0b10101 + : is_case9 ? 0b11000 + : is_case10 ? 0x11001 + : 0xffff; + + constexpr static uint16_t byte = + is_case5 ? b0 + : is_case6 ? b1 + : is_case9 ? b0 + : is_case10 ? ((b0 ? 0x55 : 0x00) | (b1 ? 0xaa : 0x00)) + : 0xffff; + + constexpr static int value = int((operation << 8) | byte) - 8192; + constexpr static bool valid = operation != 0xffff; +}; + +template class const_u32 { + constexpr static const uint8_t b0 = (v & 0xff); + constexpr static const uint8_t b1 = ((v >> 8) & 0xff); + constexpr static const uint8_t b2 = ((v >> 16) & 0xff); + constexpr static const uint8_t b3 = ((v >> 24) & 0xff); + + constexpr static bool is_case1 = (uint32_t(b0) == v); + constexpr static bool is_case2 = ((uint32_t(b1) << 8) == v); + constexpr static bool is_case3 = ((uint32_t(b2) << 16) == v); + constexpr static bool is_case4 = ((uint32_t(b3) << 24) == v); + constexpr static bool is_case5 = (b0 == b2) && (b1 == 0) && (b3 == 0); + constexpr static bool is_case6 = (b1 == b3) && (b0 == 0) && (b2 == 0); + constexpr static bool is_case7 = (b3 == 0) && (b2 == 0) && (b0 == 0xff); + constexpr static bool is_case8 = (b3 == 0) && (b1 == 0xff) && (b0 == 0xff); + constexpr static bool is_case9 = (b0 == b1) && (b0 == b2) && (b0 == b3); + constexpr static bool is_case10 = + ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)) && + ((b2 == 0xff) || (b2 == 0x00)) && ((b3 == 0xff) || (b3 == 0x00)); + +public: + constexpr static uint16_t operation = is_case1 ? 0b10000 + : is_case2 ? 0b10001 + : is_case3 ? 0b10010 + : is_case4 ? 0b10011 + : is_case5 ? 0b10100 + : is_case6 ? 0b10101 + : is_case7 ? 0b10110 + : is_case8 ? 0b10111 + : is_case9 ? 0b11000 + : is_case10 ? 0b11001 + : 0xffff; + + constexpr static uint16_t byte = + is_case1 ? b0 + : is_case2 ? b1 + : is_case3 ? b2 + : is_case4 ? b3 + : is_case5 ? b0 + : is_case6 ? b1 + : is_case7 ? b1 + : is_case8 ? b2 + : is_case9 ? b0 + : is_case10 ? ((b0 ? 0x11 : 0x00) | (b1 ? 0x22 : 0x00) | + (b2 ? 0x44 : 0x00) | (b3 ? 0x88 : 0x00)) + : 0xffff; + + constexpr static int value = int((operation << 8) | byte) - 8192; + constexpr static bool valid = operation != 0xffff; +}; + +template class const_u64 { + constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); + constexpr static const uint8_t b1 = ((v >> 1 * 8) & 0xff); + constexpr static const uint8_t b2 = ((v >> 2 * 8) & 0xff); + constexpr static const uint8_t b3 = ((v >> 3 * 8) & 0xff); + constexpr static const uint8_t b4 = ((v >> 4 * 8) & 0xff); + constexpr static const uint8_t b5 = ((v >> 5 * 8) & 0xff); + constexpr static const uint8_t b6 = ((v >> 6 * 8) & 0xff); + constexpr static const uint8_t b7 = ((v >> 7 * 8) & 0xff); + + constexpr static bool is_case10 = + ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)) && + ((b2 == 0xff) || (b2 == 0x00)) && ((b3 == 0xff) || (b3 == 0x00)) && + ((b4 == 0xff) || (b4 == 0x00)) && ((b5 == 0xff) || (b5 == 0x00)) && + ((b6 == 0xff) || (b6 == 0x00)) && ((b7 == 0xff) || (b7 == 0x00)); + +public: + constexpr static bool is_32bit = + ((v & 0xffffffff) == (v >> 32)) && const_u32<(v >> 32)>::value; + constexpr static uint8_t op_32bit = const_u32<(v >> 32)>::operation; + constexpr static uint8_t byte_32bit = const_u32<(v >> 32)>::byte; + + constexpr static uint16_t operation = is_32bit ? op_32bit + : is_case10 ? 0x11001 + : 0xffff; + + constexpr static uint16_t byte = + is_32bit ? byte_32bit + : is_case10 + ? ((b0 ? 0x01 : 0x00) | (b1 ? 0x02 : 0x00) | (b2 ? 0x04 : 0x00) | + (b3 ? 0x08 : 0x00) | (b4 ? 0x10 : 0x00) | (b5 ? 0x20 : 0x00) | + (b6 ? 0x40 : 0x00) | (b7 ? 0x80 : 0x00)) + : 0xffff; + + constexpr static int value = int((operation << 8) | byte) - 8192; + constexpr static bool valid = operation != 0xffff; +}; +} // namespace vldi + +// Uncomment when running under QEMU affected +// by bug https://gitlab.com/qemu-project/qemu/-/issues/2865 +// Versions <= 9.2.2 are affected, likely anything newer is correct. +#ifndef QEMU_VLDI_BUG +// #define QEMU_VLDI_BUG 1 +#endif + +#ifndef lsx_splat_u16 + #ifdef QEMU_VLDI_BUG + #define lsx_splat_u16(v) __lsx_vreplgr2vr_h(v) + #define lsx_splat_u32(v) __lsx_vreplgr2vr_w(v) + #else +namespace { +template constexpr __m128i lsx_splat_u16_aux() { + return ((int16_t(x) < 512) && (int16_t(x) > -512)) + ? __lsx_vrepli_h( + ((int16_t(x) < 512) && (int16_t(x) > -512)) ? int16_t(x) : 0) + : (vldi::const_u16::valid + ? __lsx_vldi(vldi::const_u16::valid + ? vldi::const_u16::value + : 0) + : __lsx_vreplgr2vr_h(x)); +} + +template constexpr __m128i lsx_splat_u32_aux() { + return ((int32_t(x) < 512) && (int32_t(x) > -512)) + ? __lsx_vrepli_w( + ((int32_t(x) < 512) && (int32_t(x) > -512)) ? int32_t(x) : 0) + : (vldi::const_u32::valid + ? __lsx_vldi(vldi::const_u32::valid + ? vldi::const_u32::value + : 0) + : __lsx_vreplgr2vr_w(x)); +} +} // namespace + #define lsx_splat_u16(v) lsx_splat_u16_aux<(v)>() + #define lsx_splat_u32(v) lsx_splat_u32_aux<(v)>() + #endif // QEMU_VLDI_BUG +#endif // lsx_splat_u16 +#endif // SIMDUTF_LSX_INTRINSICS_H +/* end file src/simdutf/lsx/intrinsics.h */ +/* begin file src/simdutf/lsx/bitmanipulation.h */ +#ifndef SIMDUTF_LSX_BITMANIPULATION_H +#define SIMDUTF_LSX_BITMANIPULATION_H + +#include + +namespace simdutf { +namespace lsx { +namespace { + +simdutf_really_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); +} + +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} +#endif + +} // unnamed namespace +} // namespace lsx +} // namespace simdutf + +#endif // SIMDUTF_LSX_BITMANIPULATION_H +/* end file src/simdutf/lsx/bitmanipulation.h */ +/* begin file src/simdutf/lsx/simd.h */ +#ifndef SIMDUTF_LSX_SIMD_H +#define SIMDUTF_LSX_SIMD_H + + +namespace simdutf { +namespace lsx { +namespace { +namespace simd { + +template struct simd8; + +// +// Base class of simd8 and simd8, both of which use __m128i +// internally. +// +template > struct base_u8 { + __m128i value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdutf_really_inline base_u8(const __m128i _value) : value(_value) {} + simdutf_really_inline operator const __m128i &() const { return this->value; } + simdutf_really_inline operator __m128i &() { return this->value; } + + // Bit operations + simdutf_really_inline simd8 operator|(const simd8 other) const { + return __lsx_vor_v(this->value, other); + } + simdutf_really_inline simd8 operator&(const simd8 other) const { + return __lsx_vand_v(this->value, other); + } + simdutf_really_inline simd8 operator^(const simd8 other) const { + return __lsx_vxor_v(this->value, other); + } + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd8 &operator|=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; + } + + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return __lsx_vseq_b(lhs, rhs); + } + + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(this->value, N), + __lsx_vbsrl_v(prev_chunk.value, 16 - N)); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdutf_really_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdutf_really_inline simd8(const __m128i _value) : base_u8(_value) {} + // False constructor + simdutf_really_inline simd8() : simd8(__lsx_vldi(0)) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} + simdutf_really_inline void store(uint8_t dst[16]) const { + return __lsx_vst(this->value, dst, 0); + } + + simdutf_really_inline uint32_t to_bitmask() const { + return __lsx_vpickve2gr_wu(__lsx_vmsknz_b(*this), 0); + } +}; + +// Unsigned bytes +template <> struct simd8 : base_u8 { + static simdutf_really_inline simd8 splat(uint8_t _value) { + return __lsx_vreplgr2vr_b(_value); + } + static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd8 load(const uint8_t *values) { + return __lsx_vld(values, 0); + } + simdutf_really_inline simd8(const __m128i _value) + : base_u8(_value) {} + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i)v16u8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15}) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Store to array + simdutf_really_inline void store(uint8_t dst[16]) const { + return __lsx_vst(this->value, dst, 0); + } + + // Order-specific operations + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return __lsx_vsle_bu(other, *this); + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return __lsx_vslt_bu(other, *this); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + value = __lsx_vsub_b(value, other.value); + return *this; + } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return simd8(*this > other); + } + + // Bit-specific operations + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return __lsx_vslt_bu(__lsx_vldi(0), __lsx_vand_v(this->value, bits)); + } + simdutf_really_inline bool is_ascii() const { + return __lsx_vpickve2gr_hu(__lsx_vmskgez_b(this->value), 0) == 0xFFFF; + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(this->value), 0) > 0; + } + template simdutf_really_inline simd8 shr() const { + return __lsx_vsrli_b(this->value, N); + } + template simdutf_really_inline simd8 shl() const { + return __lsx_vslli_b(this->value, N); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); + return __lsx_vshuf_b(__lsx_vldi(0), *this, simd8(original_tmp)); + } + + simdutf_really_inline uint64_t sum_bytes() const { + const auto sum_u16 = __lsx_vhaddw_hu_bu(value, value); + const auto sum_u32 = __lsx_vhaddw_wu_hu(sum_u16, sum_u16); + const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); + + return uint64_t(__lsx_vpickve2gr_du(sum_u64, 0)) + + uint64_t(__lsx_vpickve2gr_du(sum_u64, 1)); + } +}; + +// Signed bytes +template <> struct simd8 { + __m128i value; + + static const int SIZE = sizeof(value); + + static simdutf_really_inline simd8 splat(int8_t _value) { + return __lsx_vreplgr2vr_b(_value); + } + static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd8 load(const int8_t values[16]) { + return __lsx_vld(values, 0); + } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + __m128i zero = __lsx_vldi(0); + if simdutf_constexpr (match_system(big_endian)) { + __lsx_vst(__lsx_vilvl_b(zero, (__m128i)this->value), + reinterpret_cast(p), 0); + __lsx_vst(__lsx_vilvh_b(zero, (__m128i)this->value), + reinterpret_cast(p + 8), 0); + } else { + __lsx_vst(__lsx_vilvl_b((__m128i)this->value, zero), + reinterpret_cast(p), 0); + __lsx_vst(__lsx_vilvh_b((__m128i)this->value, zero), + reinterpret_cast(p + 8), 0); + } + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, (__m128i)this->value); + __m128i in16high = __lsx_vilvh_b(zero, (__m128i)this->value); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + __lsx_vst(in32_0, reinterpret_cast(p), 0); + __lsx_vst(in32_1, reinterpret_cast(p + 4), 0); + __lsx_vst(in32_2, reinterpret_cast(p + 8), 0); + __lsx_vst(in32_3, reinterpret_cast(p + 12), 0); + } + + // In places where the table can be reused, which is most uses in simdutf, it + // is worth it to do 4 table lookups, as there is no direct zero extension + // from u8 to u32. + simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { + const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, + 2, 255, 255, 255, 3, 255, 255, 255}; + const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, + 6, 255, 255, 255, 7, 255, 255, 255}; + const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, + 10, 255, 255, 255, 11, 255, 255, 255}; + const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, + 14, 255, 255, 255, 15, 255, 255, 255}; + + // encourage store pairing and interleaving + const auto shuf1 = this->apply_lookup_16_to(tb1); + const auto shuf2 = this->apply_lookup_16_to(tb2); + shuf1.store(reinterpret_cast(p)); + shuf2.store(reinterpret_cast(p + 4)); + + const auto shuf3 = this->apply_lookup_16_to(tb3); + const auto shuf4 = this->apply_lookup_16_to(tb4); + shuf3.store(reinterpret_cast(p + 8)); + shuf4.store(reinterpret_cast(p + 12)); + } + // Conversion from/to SIMD register + simdutf_really_inline simd8(const __m128i _value) : value(_value) {} + + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + + // Store to array + simdutf_really_inline void store(int8_t dst[16]) const { + return __lsx_vst(value, dst, 0); + } + + simdutf_really_inline operator simd8() const { + return ((__m128i)this->value); + } + + simdutf_really_inline simd8 + operator|(const simd8 other) const { + return __lsx_vor_v((__m128i)value, (__m128i)other.value); + } + + simdutf_really_inline bool is_ascii() const { + return (__lsx_vpickve2gr_hu(__lsx_vmskgez_b((__m128i)this->value), 0) == + 0xffff); + } + + // Order-sensitive comparisons + simdutf_really_inline simd8 operator>(const simd8 other) const { + return __lsx_vslt_b((__m128i)other.value, (__m128i)value); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return __lsx_vslt_b((__m128i)value, (__m128i)other.value); + } + + template + simdutf_really_inline simd8 + prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(this->value, N), + __lsx_vbsrl_v(prev_chunk.value, 16 - N)); + } + + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); + return __lsx_vshuf_b(__lsx_vldi(0), (__m128i)this->value, + simd8(original_tmp)); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert( + NUM_CHUNKS == 4, + "LoongArch kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); + } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } + + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); + } + + simdutf_really_inline uint64_t to_bitmask() const { + __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[3]), 6); + mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[2]), 4)); + mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[1]), 2)); + mask = __lsx_vor_v(mask, __lsx_vmsknz_b(this->chunks[0])); + return __lsx_vpickve2gr_du(mask, 0); + } + + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(this->chunks[0].value) >= mask, + simd8(this->chunks[1].value) >= mask, + simd8(this->chunks[2].value) >= mask, + simd8(this->chunks[3].value) >= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +/* begin file src/simdutf/lsx/simd16-inl.h */ +template struct simd16; + +template > struct base_u16 { + __m128i value; + static const size_t SIZE = sizeof(value); + static const size_t ELEMENTS = sizeof(value) / sizeof(T); + + // Conversion from/to SIMD register + simdutf_really_inline base_u16() = default; + simdutf_really_inline base_u16(const __m128i _value) : value(_value) {} + // Bit operations + simdutf_really_inline simd16 operator|(const simd16 other) const { + return __lsx_vor_v(this->value, other.value); + } + simdutf_really_inline simd16 operator&(const simd16 other) const { + return __lsx_vand_v(this->value, other.value); + } + simdutf_really_inline simd16 operator~() const { + return __lsx_vxori_b(this->value, 0xFF); + } + + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return __lsx_vseq_h(lhs.value, rhs.value); + } + + template + simdutf_really_inline simd16 byte_right_shift() const { + return __lsx_vbsrl_v(this->value, N); + } + + simdutf_really_inline uint16_t first() const { + return uint16_t(__lsx_vpickve2gr_w(value, 0)); + } +}; + +template > +struct base16 : base_u16 { + using bitmask_type = uint16_t; + + simdutf_really_inline base16() : base_u16() {} + simdutf_really_inline base16(const __m128i _value) : base_u16(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(__lsx_vld(ptr, 0)) {} + + static const int SIZE = sizeof(base_u16::value); + + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), + __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return __lsx_vreplgr2vr_h(uint16_t(-(!!_value))); + } + + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} + + simdutf_really_inline bitmask_type to_bitmask() const { + __m128i mask = __lsx_vmsknz_b(this->value); + bitmask_type mask0 = bitmask_type(__lsx_vpickve2gr_wu(mask, 0)); + return mask0; + } + + simdutf_really_inline bool is_zero() const { return __lsx_bz_v(this->value); } +}; + +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return __lsx_vreplgr2vr_h(_value); + } + static simdutf_really_inline simd16 zero() { return __lsx_vldi(0); } + + template + static simdutf_really_inline simd16 load(const Pointer values) { + return __lsx_vld(values, 0); + } + + simdutf_really_inline base16_numeric(const __m128i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return __lsx_vst(this->value, dst, 0); + } + + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { + return __lsx_vxori_b(this->value, 0xFF); + } +}; + +// Unsigned code unitstemplate<> +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric((__m128i)_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + // Copy constructor + simdutf_really_inline simd16(const simd16 mask) : simd16(mask.value) {} + + // Order-specific operations + simdutf_really_inline simd16 &operator+=(const simd16 other) { + value = __lsx_vadd_h(value, other.value); + return *this; + } + + template + static simdutf_really_inline simd8 + pack_shifted_right(const simd16 &v0, const simd16 &v1) { + return __lsx_vssrlni_bu_h(v1.value, v0.value, N); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return pack_shifted_right<0>(v0, v1); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return __lsx_vshuf4i_b(this->value, 0b10110001); + } + + simdutf_really_inline uint64_t sum() const { + const auto sum_u32 = __lsx_vhaddw_wu_hu(value, value); + const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); + + return uint64_t(__lsx_vpickve2gr_du(sum_u64, 0)) + + uint64_t(__lsx_vpickve2gr_du(sum_u64, 1)); + } +}; + +simdutf_really_inline simd16 operator<(const simd16 a, + const simd16 b) { + return __lsx_vslt_hu(a.value, b.value); +} + +simdutf_really_inline simd16 operator>(const simd16 a, + const simd16 b) { + return __lsx_vslt_hu(b.value, a.value); +} + +simdutf_really_inline simd16 operator<=(const simd16 a, + const simd16 b) { + return __lsx_vsle_hu(a.value, b.value); +} + +simdutf_really_inline simd16 operator>=(const simd16 a, + const simd16 b) { + return __lsx_vsle_hu(b.value, a.value); +} + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert( + NUM_CHUNKS == 4, + "LOONGARCH kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); + } + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd16x32 + +simdutf_really_inline simd16 operator^(const simd16 a, + uint16_t b) { + const auto bv = __lsx_vreplgr2vr_h(b); + return __lsx_vxor_v(a.value, bv); +} + +simdutf_really_inline simd16 operator^(const simd16 a, + const simd16 b) { + return __lsx_vxor_v(a.value, b.value); +} + +simdutf_really_inline simd16 min(const simd16 a, + const simd16 b) { + return __lsx_vmin_hu(a.value, b.value); +} + +simdutf_really_inline simd16 as_vector_u16(const simd16 x) { + return x.value; +} +/* end file src/simdutf/lsx/simd16-inl.h */ +/* begin file src/simdutf/lsx/simd32-inl.h */ +template struct simd32; + +template <> struct simd32 { + __m128i value; + static const int SIZE = sizeof(value); + static const int ELEMENTS = SIZE / sizeof(uint32_t); + + // constructors + simdutf_really_inline simd32(__m128i v) : value(v) {} + + template + simdutf_really_inline simd32(Ptr *ptr) : value(__lsx_vld(ptr, 0)) {} + + // in-place operators + simdutf_really_inline simd32 &operator-=(const simd32 other) { + value = __lsx_vsub_w(value, other.value); + return *this; + } + + // members + simdutf_really_inline uint64_t sum() const { + return uint64_t(__lsx_vpickve2gr_wu(value, 0)) + + uint64_t(__lsx_vpickve2gr_wu(value, 1)) + + uint64_t(__lsx_vpickve2gr_wu(value, 2)) + + uint64_t(__lsx_vpickve2gr_wu(value, 3)); + } + + // static members + static simdutf_really_inline simd32 splat(uint32_t x) { + return __lsx_vreplgr2vr_w(x); + } + + static simdutf_really_inline simd32 zero() { + return __lsx_vrepli_w(0); + } +}; + +// ------------------------------------------------------------ + +template <> struct simd32 { + __m128i value; + static const int SIZE = sizeof(value); + + // constructors + simdutf_really_inline simd32(__m128i v) : value(v) {} +}; + +// ------------------------------------------------------------ + +simdutf_really_inline simd32 operator&(const simd32 a, + const simd32 b) { + return __lsx_vor_v(a.value, b.value); +} + +simdutf_really_inline simd32 operator<(const simd32 a, + const simd32 b) { + return __lsx_vslt_wu(a.value, b.value); +} + +simdutf_really_inline simd32 operator>(const simd32 a, + const simd32 b) { + return __lsx_vslt_wu(b.value, a.value); +} + +// ------------------------------------------------------------ + +simdutf_really_inline simd32 as_vector_u32(const simd32 v) { + return v.value; +} +/* end file src/simdutf/lsx/simd32-inl.h */ +/* begin file src/simdutf/lsx/simd64-inl.h */ +template struct simd64; + +template <> struct simd64 { + __m128i value; + static const int SIZE = sizeof(value); + static const int ELEMENTS = SIZE / sizeof(uint64_t); + + // constructors + simdutf_really_inline simd64(__m128i v) : value(v) {} + + template + simdutf_really_inline simd64(Ptr *ptr) : value(__lsx_vld(ptr, 0)) {} + + // in-place operators + simdutf_really_inline simd64 &operator+=(const simd64 other) { + value = __lsx_vadd_d(value, other.value); + return *this; + } + + // members + simdutf_really_inline uint64_t sum() const { + return uint64_t(__lsx_vpickve2gr_du(value, 0)) + + uint64_t(__lsx_vpickve2gr_du(value, 1)); + } + + // static members + static simdutf_really_inline simd64 zero() { + return __lsx_vrepli_d(0); + } +}; + +// ------------------------------------------------------------ + +template <> struct simd64 { + __m128i value; + static const int SIZE = sizeof(value); + + // constructors + simdutf_really_inline simd64(__m128i v) : value(v) {} +}; + +// ------------------------------------------------------------ + +simd64 sum_8bytes(const simd8 v) { + const auto sum_u16 = __lsx_vhaddw_hu_bu(v, v); + const auto sum_u32 = __lsx_vhaddw_wu_hu(sum_u16, sum_u16); + const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); + + return simd64(sum_u64); +} +/* end file src/simdutf/lsx/simd64-inl.h */ + +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdutf + +#endif // SIMDUTF_LSX_SIMD_H +/* end file src/simdutf/lsx/simd.h */ + +/* begin file src/simdutf/lsx/end.h */ +#undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP +/* end file src/simdutf/lsx/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_LSX + +#endif // SIMDUTF_LSX_H +/* end file src/simdutf/lsx.h */ +/* begin file src/simdutf/fallback.h */ +#ifndef SIMDUTF_FALLBACK_H +#define SIMDUTF_FALLBACK_H + + +// Note that fallback.h is always imported last. + +// Default Fallback to on unless a builtin implementation has already been +// selected. +#ifndef SIMDUTF_IMPLEMENTATION_FALLBACK + #if SIMDUTF_CAN_ALWAYS_RUN_ARM64 || SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || \ + SIMDUTF_CAN_ALWAYS_RUN_HASWELL || SIMDUTF_CAN_ALWAYS_RUN_WESTMERE || \ + SIMDUTF_CAN_ALWAYS_RUN_PPC64 || SIMDUTF_CAN_ALWAYS_RUN_RVV || \ + SIMDUTF_CAN_ALWAYS_RUN_LSX || SIMDUTF_CAN_ALWAYS_RUN_LASX + #define SIMDUTF_IMPLEMENTATION_FALLBACK 0 + #else + #define SIMDUTF_IMPLEMENTATION_FALLBACK 1 + #endif +#endif + +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) + +#if SIMDUTF_IMPLEMENTATION_FALLBACK + +namespace simdutf { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback {} // namespace fallback +} // namespace simdutf + +/* begin file src/simdutf/fallback/implementation.h */ +#ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H +#define SIMDUTF_FALLBACK_IMPLEMENTATION_H + + +namespace simdutf { +namespace fallback { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("fallback", "Generic fallback implementation", + 0) {} + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final; + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final; + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t length) const noexcept override; + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept override; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf16_length_from_utf8( + const char *input, size_t length) const noexcept override; + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept override; + ; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t latin1_length_from_utf8( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t utf8_length_from_latin1( + const char *input, size_t length) const noexcept override; +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept override; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override; + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override; + const char *find(const char *start, const char *end, + char character) const noexcept override; + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept override; + +#endif // SIMDUTF_FEATURE_BASE64 +}; +} // namespace fallback +} // namespace simdutf + +#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H +/* end file src/simdutf/fallback/implementation.h */ + +/* begin file src/simdutf/fallback/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "fallback" +// #define SIMDUTF_IMPLEMENTATION fallback +/* end file src/simdutf/fallback/begin.h */ + + // Declarations +/* begin file src/simdutf/fallback/bitmanipulation.h */ +#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H +#define SIMDUTF_FALLBACK_BITMANIPULATION_H + +#include + +namespace simdutf { +namespace fallback { +namespace {} // unnamed namespace +} // namespace fallback +} // namespace simdutf + +#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H +/* end file src/simdutf/fallback/bitmanipulation.h */ + +/* begin file src/simdutf/fallback/end.h */ +/* end file src/simdutf/fallback/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_FALLBACK +#endif // SIMDUTF_FALLBACK_H +/* end file src/simdutf/fallback.h */ +#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO +SIMDUTF_POP_DISABLE_WARNINGS +#endif + +// The scalar routines should be included once. +#if SIMDUTF_FEATURE_ASCII +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || \ + (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || + // (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_BASE64 +#endif // SIMDUTF_FEATURE_BASE64 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && \ + (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +/* begin file src/implementation.cpp */ +#include +#include +#include +#if SIMDUTF_ATOMIC_REF + #include +#endif + +static_assert(sizeof(uint8_t) == sizeof(char), + "simdutf requires that uint8_t be a char"); +static_assert(sizeof(uint16_t) == sizeof(char16_t), + "simdutf requires that char16_t be 16 bits"); +static_assert(sizeof(uint32_t) == sizeof(char32_t), + "simdutf requires that char32_t be 32 bits"); +// next line is redundant, but it is kept to catch defective systems. +static_assert(CHAR_BIT == 8, "simdutf requires 8-bit bytes"); + +// Useful for debugging purposes +namespace simdutf { +namespace { + +template std::string toBinaryString(T b) { + std::string binary = ""; + T mask = T(1) << (sizeof(T) * CHAR_BIT - 1); + while (mask > 0) { + binary += ((b & mask) == 0) ? '0' : '1'; + mask >>= 1; + } + return binary; +} +} // namespace +} // namespace simdutf + +namespace simdutf { +bool implementation::supported_by_runtime_system() const { + uint32_t required_instruction_sets = this->required_instruction_sets(); + uint32_t supported_instruction_sets = + internal::detect_supported_architectures(); + return ((supported_instruction_sets & required_instruction_sets) == + required_instruction_sets); +} + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused encoding_type implementation::autodetect_encoding( + const char *input, size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + // UTF8 is common, it includes ASCII, and is commonly represented + // without a BOM, so if it fits, go with that. Note that it is still + // possible to get it wrong, we are only 'guessing'. If some has UTF-16 + // data without a BOM, it could pass as UTF-8. + // + // An interesting twist might be to check for UTF-16 ASCII first (every + // other byte is zero). + if (validate_utf8(input, length)) { + return encoding_type::UTF8; + } + // The next most common encoding that might appear without BOM is probably + // UTF-16LE, so try that next. + if ((length % 2) == 0) { + // important: we need to divide by two + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + return encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + return encoding_type::UTF32_LE; + } + } + return encoding_type::unspecified; +} + + #ifdef SIMDUTF_INTERNAL_TESTS +std::vector +implementation::internal_tests() const { + return {}; +} + #endif +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} + +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} +simdutf_warn_unused size_t implementation::base64_length_from_binary( + size_t length, base64_options options) const noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} +#endif // SIMDUTF_FEATURE_BASE64 + +namespace internal { +// When there is a single implementation, we should not pay a price +// for dispatching to the best implementation. We should just use the +// one we have. This is a compile-time check. +#define SIMDUTF_SINGLE_IMPLEMENTATION \ + (SIMDUTF_IMPLEMENTATION_ICELAKE + SIMDUTF_IMPLEMENTATION_HASWELL + \ + SIMDUTF_IMPLEMENTATION_WESTMERE + SIMDUTF_IMPLEMENTATION_ARM64 + \ + SIMDUTF_IMPLEMENTATION_PPC64 + SIMDUTF_IMPLEMENTATION_LSX + \ + SIMDUTF_IMPLEMENTATION_LASX + SIMDUTF_IMPLEMENTATION_FALLBACK == \ + 1) + +// Static array of known implementations. We are hoping these get baked into the +// executable without requiring a static initializer. + +#if SIMDUTF_IMPLEMENTATION_ICELAKE +static const icelake::implementation *get_icelake_singleton() { + static const icelake::implementation icelake_singleton{}; + return &icelake_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_HASWELL +static const haswell::implementation *get_haswell_singleton() { + static const haswell::implementation haswell_singleton{}; + return &haswell_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_WESTMERE +static const westmere::implementation *get_westmere_singleton() { + static const westmere::implementation westmere_singleton{}; + return &westmere_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_ARM64 +static const arm64::implementation *get_arm64_singleton() { + static const arm64::implementation arm64_singleton{}; + return &arm64_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_PPC64 +static const ppc64::implementation *get_ppc64_singleton() { + static const ppc64::implementation ppc64_singleton{}; + return &ppc64_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_RVV +static const rvv::implementation *get_rvv_singleton() { + static const rvv::implementation rvv_singleton{}; + return &rvv_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_LASX +static const lasx::implementation *get_lasx_singleton() { + static const lasx::implementation lasx_singleton{}; + return &lasx_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_LSX +static const lsx::implementation *get_lsx_singleton() { + static const lsx::implementation lsx_singleton{}; + return &lsx_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_FALLBACK +static const fallback::implementation *get_fallback_singleton() { + static const fallback::implementation fallback_singleton{}; + return &fallback_singleton; +} +#endif + +#if SIMDUTF_SINGLE_IMPLEMENTATION +static const implementation *get_single_implementation() { + return + #if SIMDUTF_IMPLEMENTATION_ICELAKE + get_icelake_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_HASWELL + get_haswell_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_WESTMERE + get_westmere_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_ARM64 + get_arm64_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_PPC64 + get_ppc64_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_LASX + get_lasx_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_LSX + get_lsx_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_FALLBACK + get_fallback_singleton(); + #endif +} +#endif + +/** + * @private Detects best supported implementation on first use, and sets it + */ +class detect_best_supported_implementation_on_first_use final + : public implementation { +public: + std::string name() const noexcept final { return set_best()->name(); } + std::string description() const noexcept final { + return set_best()->description(); + } + uint32_t required_instruction_sets() const noexcept final { + return set_best()->required_instruction_sets(); + } + +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int + detect_encodings(const char *input, size_t length) const noexcept override { + return set_best()->detect_encodings(input, length); + } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf8(const char *buf, size_t len) const noexcept final override { + return set_best()->validate_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result validate_utf8_with_errors( + const char *buf, size_t len) const noexcept final override { + return set_best()->validate_utf8_with_errors(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_ascii(const char *buf, size_t len) const noexcept final override { + return set_best()->validate_ascii(buf, len); + } + simdutf_warn_unused result validate_ascii_with_errors( + const char *buf, size_t len) const noexcept final override { + return set_best()->validate_ascii_with_errors(buf, len); + } +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf16le_as_ascii(buf, len); + } + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf16be_as_ascii(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf16le(const char16_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf16le(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool + validate_utf16be(const char16_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf16be(buf, len); + } + + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf16le_with_errors(buf, len); + } + + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf16be_with_errors(buf, len); + } + void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept final override { + return set_best()->to_well_formed_utf16be(input, len, output); + } + void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept final override { + return set_best()->to_well_formed_utf16le(input, len, output); + } +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf32(const char32_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf32(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf32_with_errors(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_latin1_to_utf8(const char *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_latin1_to_utf8(buf, len, utf8_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, + char32_t *latin1_output) const noexcept final override { + return set_best()->convert_latin1_to_utf32(buf, len, latin1_output); + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf8_to_latin1(const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf8_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf8_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_latin1(buf, len, latin1_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16le_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16be_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf16be(buf, len, utf16_output); + } + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept final override { + return set_best()->utf8_length_from_utf16le_with_replacement(input, length); + } + + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept final override { + return set_best()->utf8_length_from_utf16be_with_replacement(input, length); + } + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf8_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf8_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf8_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + convert_utf16le_to_utf8(const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t + convert_utf16be_to_utf8(const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf8_with_errors(buf, len, + utf8_output); + } + + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf8_with_errors(buf, len, + utf8_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + convert_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf32_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + } + + simdutf_warn_unused size_t + convert_valid_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf8(buf, len, utf8_output); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16le_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16be_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *buf, size_t len, + char16_t *output) const noexcept final override { + set_best()->change_endianness_utf16(buf, len, output); + } + + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t len) const noexcept final override { + return set_best()->count_utf16le(buf, len); + } + + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t len) const noexcept final override { + return set_best()->count_utf16be(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t + count_utf8(const char *buf, size_t len) const noexcept final override { + return set_best()->count_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->latin1_length_from_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_latin1(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf16le(buf, len); + } + + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf16be(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf16le(buf, len); + } + + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf16be(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->utf16_length_from_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf32(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *buf, size_t len) const noexcept override { + return set_best()->utf16_length_from_utf32(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf8(buf, len); + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary_details(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary_details(input, length, output, options, + last_chunk_handling_options); + } + + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override { + return set_best()->binary_to_base64(input, length, output, options); + } + + size_t + binary_to_base64_with_lines(const char *input, size_t length, char *output, + size_t line_length, + base64_options options) const noexcept override { + return set_best()->binary_to_base64_with_lines(input, length, output, + line_length, options); + } + + const char *find(const char *start, const char *end, + char character) const noexcept override { + return set_best()->find(start, end, character); + } + + const char16_t *find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept override { + return set_best()->find(start, end, character); + } +#endif // SIMDUTF_FEATURE_BASE64 + + simdutf_really_inline + detect_best_supported_implementation_on_first_use() noexcept + : implementation("best_supported_detector", + "Detects the best supported implementation and sets it", + 0) {} + +private: + const implementation *set_best() const noexcept; +}; + +static_assert(std::is_trivially_destructible< + detect_best_supported_implementation_on_first_use>::value, + "detect_best_supported_implementation_on_first_use should be " + "trivially destructible"); + +static const std::initializer_list & +get_available_implementation_pointers() { + static const std::initializer_list + available_implementation_pointers{ +#if SIMDUTF_IMPLEMENTATION_ICELAKE + get_icelake_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_HASWELL + get_haswell_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_WESTMERE + get_westmere_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_ARM64 + get_arm64_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_PPC64 + get_ppc64_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_RVV + get_rvv_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_LASX + get_lasx_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_LSX + get_lsx_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_FALLBACK + get_fallback_singleton(), +#endif + }; // available_implementation_pointers + return available_implementation_pointers; +} + +// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no +// support +class unsupported_implementation final : public implementation { +public: +#if SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused int detect_encodings(const char *, + size_t) const noexcept override { + return encoding_type::unspecified; + } +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool validate_utf8(const char *, + size_t) const noexcept final override { + return false; // Just refuse to validate. Given that we have a fallback + // implementation + // it seems unlikely that unsupported_implementation will ever be used. If + // it is used, then it will flag all strings as invalid. The alternative is + // to return an error_code from which the user has to figure out whether the + // string is valid UTF-8... which seems like a lot of work just to handle + // the very unlikely case that we have an unsupported implementation. And, + // when it does happen (that we have an unsupported implementation), what + // are the chances that the programmer has a fallback? Given that *we* + // provide the fallback, it implies that the programmer would need a + // fallback for our fallback. + } +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused result validate_utf8_with_errors( + const char *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_ascii(const char *, size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused result validate_ascii_with_errors( + const char *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + simdutf_warn_unused bool + validate_utf16le_as_ascii(const char16_t *, + size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused bool + validate_utf16be_as_ascii(const char16_t *, + size_t) const noexcept final override { + return false; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf16le(const char16_t *, size_t) const noexcept final override { + return false; + } +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused bool + validate_utf16be(const char16_t *, size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } + void to_well_formed_utf16be(const char16_t *, size_t, + char16_t *) const noexcept final override {} + void to_well_formed_utf16le(const char16_t *, size_t, + char16_t *) const noexcept final override {} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + simdutf_warn_unused bool + validate_utf32(const char32_t *, size_t) const noexcept final override { + return false; + } +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *, size_t) const noexcept final override { + return {OTHER, 0}; // Not supported + } + + simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *, size_t) const noexcept final override { + return {OTHER, 0}; // Not supported + } + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf16le_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t convert_utf32_to_latin1( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_latin1_with_errors( + const char32_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 + void change_endianness_utf16(const char16_t *, size_t, + char16_t *) const noexcept final override {} + + simdutf_warn_unused size_t + count_utf16le(const char16_t *, size_t) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t + count_utf16be(const char16_t *, size_t) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 + simdutf_warn_unused size_t count_utf8(const char *, + size_t) const noexcept final override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf16le(const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf32_length_from_utf16be(const char16_t *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 + simdutf_warn_unused result + base64_to_binary(const char *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return full_result(error_code::OTHER, 0, 0); + } + + simdutf_warn_unused result + base64_to_binary(const char16_t *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return full_result(error_code::OTHER, 0, 0); + } + + size_t binary_to_base64(const char *, size_t, char *, + base64_options) const noexcept override { + return 0; + } + size_t binary_to_base64_with_lines(const char *, size_t, char *, size_t, + base64_options) const noexcept override { + return 0; + } + const char *find(const char *, const char *, char) const noexcept override { + return nullptr; + } + const char16_t *find(const char16_t *, const char16_t *, + char16_t) const noexcept override { + return nullptr; + } +#endif // SIMDUTF_FEATURE_BASE64 + + unsupported_implementation() + : implementation("unsupported", + "Unsupported CPU (no detected SIMD instructions)", 0) {} +}; + +const unsupported_implementation *get_unsupported_singleton() { + static const unsupported_implementation unsupported_singleton{}; + return &unsupported_singleton; +} +static_assert(std::is_trivially_destructible::value, + "unsupported_singleton should be trivially destructible"); + +size_t available_implementation_list::size() const noexcept { + return internal::get_available_implementation_pointers().size(); +} +const implementation *const * +available_implementation_list::begin() const noexcept { + return internal::get_available_implementation_pointers().begin(); +} +const implementation *const * +available_implementation_list::end() const noexcept { + return internal::get_available_implementation_pointers().end(); +} +const implementation * +available_implementation_list::detect_best_supported() const noexcept { + // They are prelisted in priority order, so we just go down the list + uint32_t supported_instruction_sets = + internal::detect_supported_architectures(); + for (const implementation *impl : + internal::get_available_implementation_pointers()) { + uint32_t required_instruction_sets = impl->required_instruction_sets(); + if ((supported_instruction_sets & required_instruction_sets) == + required_instruction_sets) { + return impl; + } + } + return get_unsupported_singleton(); // this should never happen? +} + +const implementation * +detect_best_supported_implementation_on_first_use::set_best() const noexcept { + SIMDUTF_PUSH_DISABLE_WARNINGS + SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: + // manually verified this is safe + char *force_implementation_name = getenv("SIMDUTF_FORCE_IMPLEMENTATION"); + SIMDUTF_POP_DISABLE_WARNINGS + + if (force_implementation_name) { + auto force_implementation = + get_available_implementations()[force_implementation_name]; + if (force_implementation) { + return get_active_implementation() = force_implementation; + } else { + // Note: abort() and stderr usage within the library is forbidden. + return get_active_implementation() = get_unsupported_singleton(); + } + } + return get_active_implementation() = + get_available_implementations().detect_best_supported(); +} + +} // namespace internal + +/** + * The list of available implementations compiled into simdutf. + */ +SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & +get_available_implementations() { + static const internal::available_implementation_list + available_implementations{}; + return available_implementations; +} + +/** + * The active implementation. + */ +SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr & +get_active_implementation() { +#if SIMDUTF_SINGLE_IMPLEMENTATION + // skip runtime detection + static internal::atomic_ptr active_implementation{ + internal::get_single_implementation()}; + return active_implementation; +#else + static const internal::detect_best_supported_implementation_on_first_use + detect_best_supported_implementation_on_first_use_singleton; + static internal::atomic_ptr active_implementation{ + &detect_best_supported_implementation_on_first_use_singleton}; + return active_implementation; +#endif +} + +#if SIMDUTF_SINGLE_IMPLEMENTATION +const implementation *get_default_implementation() { + return internal::get_single_implementation(); +} +#else +internal::atomic_ptr &get_default_implementation() { + return get_active_implementation(); +} +#endif +#define SIMDUTF_GET_CURRENT_IMPLEMENTATION + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { + return get_default_implementation()->validate_utf8(buf, len); +} +simdutf_warn_unused result validate_utf8_with_errors(const char *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf8_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept { + return get_default_implementation()->validate_ascii(buf, len); +} +simdutf_warn_unused result validate_ascii_with_errors(const char *buf, + size_t len) noexcept { + return get_default_implementation()->validate_ascii_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16le_as_ascii(buf, len); +} +simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16be_as_ascii(buf, len); +} +simdutf_warn_unused bool validate_utf16_as_ascii(const char16_t *input, + size_t length) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return validate_utf16be_as_ascii(input, length); + #else + return validate_utf16le_as_ascii(input, length); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_utf8_to_utf16( + const char *input, size_t length, char16_t *utf16_output) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf8_to_utf16be(input, length, utf16_output); + #else + return convert_utf8_to_utf16le(input, length, utf16_output); + #endif +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_latin1_to_utf8(const char *buf, size_t len, + char *utf8_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf8(buf, len, + utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf16le(buf, len, + utf16_output); +} +simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf16be(buf, len, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *latin1_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf32(buf, len, + latin1_output); +} +// moved to the header file +// simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept +// simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf8_to_latin1(buf, len, + latin1_output); +} +simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf8_to_latin1_with_errors( + buf, len, latin1_output); +} +simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_valid_utf8_to_latin1( + buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16le(input, length, + utf16_output); +} +simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16be(input, length, + utf16_output); +} +simdutf_warn_unused result convert_utf8_to_utf16_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf8_to_utf16be_with_errors(input, length, utf16_output); + #else + return convert_utf8_to_utf16le_with_errors(input, length, utf16_output); + #endif +} +simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16le_with_errors( + input, length, utf16_output); +} +simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16be_with_errors( + input, length, utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *input, size_t length, char32_t *utf32_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf32(input, length, + utf32_output); +} +simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *input, size_t length, char32_t *utf32_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf32_with_errors( + input, length, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool validate_utf16(const char16_t *buf, + size_t len) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return validate_utf16be(buf, len); + #else + return validate_utf16le(buf, len); + #endif +} +void to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) noexcept { + return get_default_implementation()->to_well_formed_utf16be(input, len, + output); +} +void to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) noexcept { + return get_default_implementation()->to_well_formed_utf16le(input, len, + output); +} +void to_well_formed_utf16(const char16_t *input, size_t len, + char16_t *output) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + to_well_formed_utf16be(input, len, output); + #else + to_well_formed_utf16le(input, len, output); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16le(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 + #if SIMDUTF_ATOMIC_REF +template +simdutf_warn_unused result atomic_base64_to_binary_safe_impl( + const char_type *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options, + bool decode_up_to_bad_char) noexcept { + #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) + // We use a smaller buffer during fuzzing to more easily detect bugs. + constexpr size_t buffer_size = 128; + #else + // Arbitrary block sizes: 4KB for input. + constexpr size_t buffer_size = 4096; + #endif + std::array temp_buffer; + const char_type *const input_init = input; + size_t actual_out = 0; + bool last_chunk = false; + const size_t length_init = length; + result r; + while (!last_chunk) { + last_chunk |= (temp_buffer.size() >= outlen - actual_out); + size_t temp_outlen = (std::min)(temp_buffer.size(), outlen - actual_out); + r = base64_to_binary_safe(input, length, temp_buffer.data(), temp_outlen, + options, last_chunk_handling_options, + decode_up_to_bad_char); + // We processed r.count characters of input. + // We wrote temp_outlen bytes to temp_buffer. + // If there is no ignorable characters, + // we should expect that values/4.0*3 == temp_outlen, + // except maybe at the tail end of the string. + + // + // We are assuming that when r.error == error_code::OUTPUT_BUFFER_TOO_SMALL, + // we truncate the results so that a number of base64 characters divisible + // by four is processed. + // + + // + // We wrote temp_outlen bytes to temp_buffer. + // We need to copy them to output. + // Copy with relaxed atomic operations to the output + simdutf_log_assert(temp_outlen <= outlen - actual_out, + "Output buffer is too small"); + simdutf_log_assert(temp_outlen <= temp_buffer.size(), + "Output buffer is too small"); + + simdutf::scalar::memcpy_atomic_write(output + actual_out, + temp_buffer.data(), temp_outlen); + actual_out += temp_outlen; + length -= r.count; + input += r.count; + + if (r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) { + break; + } + } + if (size_t(input - input_init) != length_init) { + // We did not process all input characters. In such case, we + // should not end with an ignorable character. See + // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + while (input > input_init && base64_ignorable(*(input - 1), options)) { + --input; + } + } + outlen = actual_out; + return {r.error, size_t(input - input_init)}; +} + +simdutf_warn_unused result atomic_base64_to_binary_safe( + const char *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options, + bool decode_up_to_bad_char) noexcept { + return atomic_base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options, + decode_up_to_bad_char); +} +simdutf_warn_unused result atomic_base64_to_binary_safe( + const char16_t *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options, + bool decode_up_to_bad_char) noexcept { + return atomic_base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options, + decode_up_to_bad_char); +} + #endif // SIMDUTF_ATOMIC_REF + +#endif // SIMDUTF_FEATURE_BASE64 + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16be(buf, len); +} +simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, + size_t len) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return validate_utf16be_with_errors(buf, len); + #else + return validate_utf16le_with_errors(buf, len); + #endif +} +simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16le_with_errors(buf, len); +} +simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16be_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf32(buf, len); +} +simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf32_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_valid_utf8_to_utf16( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf8_to_utf16be(input, length, utf16_buffer); + #else + return convert_valid_utf8_to_utf16le(input, length, utf16_buffer); + #endif +} +simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf16le( + input, length, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf16be( + input, length, utf16_buffer); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *input, size_t length, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf32( + input, length, utf32_buffer); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf8(buf, len, utf8_buffer); + #else + return convert_utf16le_to_utf8(buf, len, utf8_buffer); + #endif +} + +simdutf_warn_unused size_t +convert_utf16_to_utf8_safe(const char16_t *buf, size_t len, char *utf8_output, + size_t utf8_len) noexcept { + const auto start{utf8_output}; + // We might be able to go faster by first scanning the input buffer to + // determine how many char16_t characters we can read without exceeding the + // utf8_len. This is a one-pass algorithm that has the benefit of not + // requiring a first pass to determine the length. + while (true) { + // The worst case for convert_utf16_to_utf8 is when you go from 1 char16_t + // to 3 characters of UTF-8. So we can read at most utf8_len / 3 char16_t + // characters. + auto read_len = std::min(len, utf8_len / 3); + if (read_len <= 16) { + break; + } + if (read_len < len) { + // If we have a high surrogate at the end of the buffer, we need to + // either read one more char16_t or backtrack. + if (scalar::utf16::high_surrogate(buf[read_len - 1])) { + read_len--; + } + } + if (read_len == 0) { + // If we cannot read anything, we are done. + break; + } + const auto write_len = + simdutf::convert_utf16_to_utf8(buf, read_len, utf8_output); + if (write_len == 0) { + // There was an error in the conversion, we cannot continue. + return 0; // indicating failure + } + + utf8_output += write_len; + utf8_len -= write_len; + buf += read_len; + len -= read_len; + } + #if SIMDUTF_IS_BIG_ENDIAN + full_result r = + scalar::utf16_to_utf8::convert_with_errors( + buf, len, utf8_output, utf8_len); + #else + full_result r = + scalar::utf16_to_utf8::convert_with_errors( + buf, len, utf8_output, utf8_len); + #endif + if (r.error != error_code::SUCCESS && + r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) { + // If there was an error, we return 0 to indicate failure. + return 0; // indicating failure + } + return r.output_count + (utf8_output - start); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_utf16_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_latin1(buf, len, latin1_buffer); + #else + return convert_utf16le_to_latin1(buf, len, latin1_buffer); + #endif +} +simdutf_warn_unused size_t convert_latin1_to_utf16( + const char *buf, size_t len, char16_t *utf16_output) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_latin1_to_utf16be(buf, len, utf16_output); + #else + return convert_latin1_to_utf16le(buf, len, utf16_output); + #endif +} +simdutf_warn_unused size_t convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_latin1(buf, len, + latin1_buffer); +} +simdutf_warn_unused size_t convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_latin1(buf, len, + latin1_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_latin1( + buf, len, latin1_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_latin1( + buf, len, latin1_buffer); +} +simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_latin1_with_errors( + buf, len, latin1_buffer); +} +simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_latin1_with_errors( + buf, len, latin1_buffer); +} +// moved to header file +// simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept +// simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused result convert_utf16_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); + #else + return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused result convert_utf16_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer); + #else + return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); + #else + return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_valid_utf16_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer); + #else + return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_utf8( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_utf8( + buf, len, utf8_buffer); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf8(buf, len, + utf8_buffer); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_utf32_to_utf16( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf32_to_utf16be(buf, len, utf16_buffer); + #else + return convert_utf32_to_utf16le(buf, len, utf16_buffer); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_utf32_to_latin1( + const char32_t *input, size_t length, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf32_to_latin1(input, length, + latin1_output); +} +simdutf_warn_unused result convert_utf32_to_latin1_with_errors( + const char32_t *input, size_t length, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_latin1_with_errors( + input, length, latin1_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + const char32_t *input, size_t length, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_latin1( + input, length, latin1_buffer); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16le(buf, len, + utf16_buffer); +} +simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16be(buf, len, + utf16_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf16_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer); + #else + return convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer); + #endif +} +simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16le_with_errors( + buf, len, utf16_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16be_with_errors( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf32_to_utf16be(buf, len, utf16_buffer); + #else + return convert_valid_utf32_to_utf16le(buf, len, utf16_buffer); + #endif +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf16le( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf16be( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_utf16_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf32(buf, len, utf32_buffer); + #else + return convert_utf16le_to_utf32(buf, len, utf32_buffer); + #endif +} +simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf32(buf, len, + utf32_buffer); +} +simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf32(buf, len, + utf32_buffer); +} +simdutf_warn_unused result convert_utf16_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer); + #else + return convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer); + #endif +} +simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf32_with_errors( + buf, len, utf32_buffer); +} +simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf32_with_errors( + buf, len, utf32_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_utf32(buf, len, utf32_buffer); + #else + return convert_valid_utf16le_to_utf32(buf, len, utf32_buffer); + #endif +} +simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_utf32( + buf, len, utf32_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_utf32( + buf, len, utf32_buffer); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void change_endianness_utf16(const char16_t *input, size_t length, + char16_t *output) noexcept { + get_default_implementation()->change_endianness_utf16(input, length, output); +} +simdutf_warn_unused size_t count_utf16(const char16_t *input, + size_t length) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return count_utf16be(input, length); + #else + return count_utf16le(input, length); + #endif +} +simdutf_warn_unused size_t count_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->count_utf16le(input, length); +} +simdutf_warn_unused size_t count_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->count_utf16be(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t count_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->count_utf8(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t latin1_length_from_utf8(const char *buf, + size_t len) noexcept { + return get_default_implementation()->latin1_length_from_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, + size_t len) noexcept { + return get_default_implementation()->utf8_length_from_latin1(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, + size_t length) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return utf8_length_from_utf16be(input, length); + #else + return utf8_length_from_utf16le(input, length); + #endif +} +simdutf_warn_unused result utf8_length_from_utf16_with_replacement( + const char16_t *input, size_t length) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return utf8_length_from_utf16be_with_replacement(input, length); + #else + return utf8_length_from_utf16le_with_replacement(input, length); + #endif +} +simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf16le(input, length); +} +simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf16be(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, + size_t length) noexcept { + #if SIMDUTF_IS_BIG_ENDIAN + return utf32_length_from_utf16be(input, length); + #else + return utf32_length_from_utf16le(input, length); + #endif +} +simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf16le(input, length); +} +simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf16be(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->utf16_length_from_utf8(input, length); +} +simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) noexcept { + return get_default_implementation() + ->utf8_length_from_utf16le_with_replacement(input, length); +} + +simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) noexcept { + return get_default_implementation() + ->utf8_length_from_utf16be_with_replacement(input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, + size_t length) noexcept { + return get_default_implementation()->utf16_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf8(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 + +// this has been moved to implementation.h +// simdutf_warn_unused size_t +// base64_length_from_binary(size_t length, base64_options option) noexcept; + +// this has been moved to implementation.h +// simdutf_warn_unused size_t base64_length_from_binary_with_lines( +// size_t length, base64_options options, size_t line_length) noexcept; +// } + +simdutf_warn_unused const char *detail::find(const char *start, const char *end, + char character) noexcept { + return get_default_implementation()->find(start, end, character); +} +simdutf_warn_unused const char16_t *detail::find(const char16_t *start, + const char16_t *end, + char16_t character) noexcept { + return get_default_implementation()->find(start, end, character); +} + +simdutf_warn_unused size_t +maximal_binary_length_from_base64(const char *input, size_t length) noexcept { + return get_default_implementation()->maximal_binary_length_from_base64( + input, length); +} + +simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return get_default_implementation()->base64_to_binary( + input, length, output, options, last_chunk_handling_options); +} + +simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) noexcept { + return get_default_implementation()->maximal_binary_length_from_base64( + input, length); +} + +simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return get_default_implementation()->base64_to_binary( + input, length, output, options, last_chunk_handling_options); +} + +// moved to implementation.h +// simdutf_warn_unused bool base64_ignorable(char input, +// base64_options options) noexcept +// simdutf_warn_unused bool base64_ignorable(char16_t input, +// base64_options options) noexcept +// simdutf_warn_unused bool base64_valid(char input, +// base64_options options) noexcept +// simdutf_warn_unused bool base64_valid(char16_t input, +// base64_options options) noexcept +// simdutf_warn_unused bool +// base64_valid_or_padding(char input, base64_options options) noexcept +// simdutf_warn_unused bool +// base64_valid_or_padding(char16_t input, base64_options options) noexcept + +// base64_to_binary_safe_impl is moved to +// include/simdutf/base64_implementation.h + + #if SIMDUTF_ATOMIC_REF +size_t atomic_binary_to_base64(const char *input, size_t length, char *output, + base64_options options) noexcept { + size_t retval = 0; + #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) + // We use a smaller buffer during fuzzing to more easily detect bugs. + constexpr size_t input_block_size = 128 * 3; + #else + // Arbitrary block sizes: 3KB for input which produces 4KB in output. + constexpr size_t input_block_size = 1024 * 3; + #endif + std::array inbuf; + for (size_t i = 0; i < length; i += input_block_size) { + const size_t current_block_size = std::min(input_block_size, length - i); + simdutf::scalar::memcpy_atomic_read(inbuf.data(), input + i, + current_block_size); + const size_t written = binary_to_base64(inbuf.data(), current_block_size, + output + retval, options); + retval += written; + } + return retval; +} + #endif // SIMDUTF_ATOMIC_REF + +#endif // SIMDUTF_FEATURE_BASE64 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t convert_latin1_to_utf8_safe( + const char *buf, size_t len, char *utf8_output, size_t utf8_len) noexcept { + const auto start{utf8_output}; + + while (true) { + // convert_latin1_to_utf8 will never write more than input length * 2 + auto read_len = std::min(len, utf8_len >> 1); + if (read_len <= 16) { + break; + } + + const auto write_len = + simdutf::convert_latin1_to_utf8(buf, read_len, utf8_output); + + utf8_output += write_len; + utf8_len -= write_len; + buf += read_len; + len -= read_len; + } + + utf8_output += + scalar::latin1_to_utf8::convert_safe(buf, len, utf8_output, utf8_len); + + return utf8_output - start; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result +base64_to_binary_safe(const char *input, size_t length, char *output, + size_t &outlen, base64_options options, + last_chunk_handling_options last_chunk_handling_options, + bool decode_up_to_bad_char) noexcept { + return base64_to_binary_safe_impl(input, length, output, outlen, + options, last_chunk_handling_options, + decode_up_to_bad_char); +} +simdutf_warn_unused result +base64_to_binary_safe(const char16_t *input, size_t length, char *output, + size_t &outlen, base64_options options, + last_chunk_handling_options last_chunk_handling_options, + bool decode_up_to_bad_char) noexcept { + return base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options, + decode_up_to_bad_char); +} + +size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) noexcept { + return get_default_implementation()->binary_to_base64(input, length, output, + options); +} + +size_t binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + base64_options options) noexcept { + return get_default_implementation()->binary_to_base64_with_lines( + input, length, output, line_length, options); +} +#endif // SIMDUTF_FEATURE_BASE64 + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused simdutf::encoding_type +autodetect_encoding(const char *buf, size_t length) noexcept { + return get_default_implementation()->autodetect_encoding(buf, length); +} + +simdutf_warn_unused int detect_encodings(const char *buf, + size_t length) noexcept { + return get_default_implementation()->detect_encodings(buf, length); +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +const implementation *builtin_implementation() { + static const implementation *builtin_impl = + get_available_implementations()[SIMDUTF_STRINGIFY( + SIMDUTF_BUILTIN_IMPLEMENTATION)]; + return builtin_impl; +} + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) { + return scalar::utf8::trim_partial_utf8(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, + size_t length) { + return scalar::utf16::trim_partial_utf16(input, length); +} + +simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, + size_t length) { + return scalar::utf16::trim_partial_utf16(input, length); +} + +simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, + size_t length) { + #if SIMDUTF_IS_BIG_ENDIAN + return trim_partial_utf16be(input, length); + #else + return trim_partial_utf16le(input, length); + #endif +} +#endif // SIMDUTF_FEATURE_UTF16 + +} // namespace simdutf +/* end file src/implementation.cpp */ + +SIMDUTF_PUSH_DISABLE_WARNINGS +SIMDUTF_DISABLE_UNDESIRED_WARNINGS + +#if SIMDUTF_IMPLEMENTATION_ARM64 +/* begin file src/arm64/implementation.cpp */ +/* begin file src/simdutf/arm64/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "arm64" +// #define SIMDUTF_IMPLEMENTATION arm64 +#define SIMDUTF_SIMD_HAS_BYTEMASK 1 +/* end file src/simdutf/arm64/begin.h */ +namespace simdutf { +namespace arm64 { +namespace { +#ifndef SIMDUTF_ARM64_H + #error "arm64.h must be included" +#endif +using namespace simd; + +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + simd8 bits = input.reduce_or(); + return bits.max_val() < 0b10000000u; +} +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + return is_third_byte ^ is_fourth_byte; +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) +// common functions for utf8 conversions +simdutf_really_inline uint16x4_t convert_utf8_3_byte_to_utf16(uint8x16_t in) { + // Low half contains 10cccccc|1110aaaa + // High half contains 10bbbbbb|10bbbbbb + #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t sh = simdutf_make_uint8x16_t(0, 2, 3, 5, 6, 8, 9, 11, 1, 1, + 4, 4, 7, 7, 10, 10); + #else + const uint8x16_t sh = {0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10}; + #endif + uint8x16_t perm = vqtbl1q_u8(in, sh); + // Split into half vectors. + // 10cccccc|1110aaaa + uint8x8_t perm_low = vget_low_u8(perm); // no-op + // 10bbbbbb|10bbbbbb + uint8x8_t perm_high = vget_high_u8(perm); + // xxxxxxxx 10bbbbbb + uint16x4_t mid = vreinterpret_u16_u8(perm_high); // no-op + // xxxxxxxx 1110aaaa + uint16x4_t high = vreinterpret_u16_u8(perm_low); // no-op + // Assemble with shift left insert. + // xxxxxxaa aabbbbbb + uint16x4_t mid_high = vsli_n_u16(mid, high, 6); + // (perm_low << 8) | (perm_low >> 8) + // xxxxxxxx 10cccccc + uint16x4_t low = vreinterpret_u16_u8(vrev16_u8(perm_low)); + // Shift left insert into the low bits + // aaaabbbb bbcccccc + uint16x4_t composed = vsli_n_u16(low, mid_high, 6); + return composed; +} + +simdutf_really_inline uint16x8_t convert_utf8_2_byte_to_utf16(uint8x16_t in) { + // Converts 6 2 byte UTF-8 characters to 6 UTF-16 characters. + // Technically this calculates 8, but 6 does better and happens more often + // (The languages which use these codepoints use ASCII spaces so 8 would need + // to be in the middle of a very long word). + + // 10bbbbbb 110aaaaa + uint16x8_t upper = vreinterpretq_u16_u8(in); + // (in << 8) | (in >> 8) + // 110aaaaa 10bbbbbb + uint16x8_t lower = vreinterpretq_u16_u8(vrev16q_u8(in)); + // 00000000 000aaaaa + uint16x8_t upper_masked = vandq_u16(upper, vmovq_n_u16(0x1F)); + // Assemble with shift left insert. + // 00000aaa aabbbbbb + uint16x8_t composed = vsliq_n_u16(lower, upper_masked, 6); + return composed; +} + +simdutf_really_inline uint16x8_t +convert_utf8_1_to_2_byte_to_utf16(uint8x16_t in, size_t shufutf8_idx) { + // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. + // This is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx])); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh)); + // Mask + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000000 00bbbbbb + uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits + // 1 byte: 00000000 00000000 + // 2 byte: 000aaaaa 00000000 + uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits + // Combine with a shift right accumulate + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000aaa aabbbbbb + uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); + return composed; +} +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32) + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/arm64/arm_utf16fix.cpp */ + +/* + * Returns if a vector of type uint8x16_t is all zero. + */ +simdutf_really_inline int veq_non_zero(uint8x16_t v) { + // might compile to two instructions: + // umaxv s0, v0.4s + // fmov w0, s0 + // On Apple hardware, they both have a latency of 3 cycles, with a throughput + // of four instructions per cycle. So that's 6 cycles of latency (!!!) for the + // two instructions. A narrowing shift has the same latency and throughput. + return vmaxvq_u32(vreinterpretq_u32_u8(v)); +} + +/* + * Process one block of 16 characters. If in_place is false, + * copy the block from in to out. If there is a sequencing + * error in the block, overwrite the illsequenced characters + * with the replacement character. This function reads one + * character before the beginning of the buffer as a lookback. + * If that character is illsequenced, it too is overwritten. + */ +template +void utf16fix_block(char16_t *out, const char16_t *in) { + const char16_t replacement = scalar::utf16::replacement(); + uint8x16x2_t lb, block; + uint8x16_t lb_masked, block_masked, lb_is_high, block_is_low; + uint8x16_t illseq; + + constexpr int idx = !match_system(big_endian) ? 0 : 1; + + /* TODO: compute lookback using shifts */ + lb = vld2q_u8((const uint8_t *)(in - 1)); + block = vld2q_u8((const uint8_t *)in); + lb_masked = vandq_u8(lb.val[idx], vdupq_n_u8(0xfc)); + block_masked = vandq_u8(block.val[idx], vdupq_n_u8(0xfc)); + lb_is_high = vceqq_u8(lb_masked, vdupq_n_u8(0xd8)); + block_is_low = vceqq_u8(block_masked, vdupq_n_u8(0xdc)); + + illseq = veorq_u8(lb_is_high, block_is_low); + if (veq_non_zero(illseq)) { + uint8x16_t lb_illseq, block_illseq; + char16_t lbc; + int ill; + + /* compute the cause of the illegal sequencing */ + lb_illseq = vbicq_u8(lb_is_high, block_is_low); + block_illseq = vorrq_u8(vbicq_u8(block_is_low, lb_is_high), + vextq_u8(lb_illseq, vdupq_n_u8(0), 1)); + + /* fix illegal sequencing in the lookback */ + ill = vgetq_lane_u8(lb_illseq, 0); + lbc = out[-1]; + out[-1] = ill ? replacement : lbc; + + /* fix illegal sequencing in the main block */ + if simdutf_constexpr (!match_system(big_endian)) { + block.val[1] = vbslq_u8(block_illseq, vdupq_n_u8(0xfd), block.val[1]); + block.val[0] = vorrq_u8(block_illseq, block.val[0]); + } else { + block.val[0] = vbslq_u8(block_illseq, vdupq_n_u8(0xfd), block.val[0]); + block.val[1] = vorrq_u8(block_illseq, block.val[1]); + } + + vst2q_u8((uint8_t *)out, block); + } else if (!inplace) { + vst2q_u8((uint8_t *)out, block); + } +} + +template +uint8x16_t get_mismatch_copy(const char16_t *in, char16_t *out) { + constexpr int idx = !match_system(big_endian) ? 0 : 1; + uint8x16x2_t lb = vld2q_u8((const uint8_t *)(in - 1)); + uint8x16x2_t block = vld2q_u8((const uint8_t *)in); + uint8x16_t lb_masked = vandq_u8(lb.val[idx], vdupq_n_u8(0xfc)); + uint8x16_t block_masked = vandq_u8(block.val[idx], vdupq_n_u8(0xfc)); + uint8x16_t lb_is_high = vceqq_u8(lb_masked, vdupq_n_u8(0xd8)); + uint8x16_t block_is_low = vceqq_u8(block_masked, vdupq_n_u8(0xdc)); + uint8x16_t illseq = veorq_u8(lb_is_high, block_is_low); + if (!inplace) { + vst2q_u8((uint8_t *)out, block); + } + return illseq; +} + +simdutf_really_inline uint64_t get_mask(uint8x16_t illse0, uint8x16_t illse1, + uint8x16_t illse2, uint8x16_t illse3) { +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + uint8x16_t sum0 = + vpaddq_u8(vandq_u8(illse0, bit_mask), vandq_u8(illse1, bit_mask)); + uint8x16_t sum1 = + vpaddq_u8(vandq_u8(illse2, bit_mask), vandq_u8(illse3, bit_mask)); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); +} + +// The idea is to process 64 characters at a time, and if there is a mismatch +// we can fix it with a bit of scalar code. When the input is correct, this +// function might be faster than alternative implementations working on small +// blocks of input. +template +bool utf16fix_block64(char16_t *out, const char16_t *in) { + const char16_t replacement = scalar::utf16::replacement(); + + uint8x16_t illse0 = inplace ? get_mismatch_copy(in, out) + : get_mismatch_copy(in, out); + uint8x16_t illse1 = + inplace ? get_mismatch_copy(in + 16, out + 16) + : get_mismatch_copy(in + 16, out + 16); + uint8x16_t illse2 = + inplace ? get_mismatch_copy(in + 32, out + 32) + : get_mismatch_copy(in + 32, out + 32); + uint8x16_t illse3 = + inplace ? get_mismatch_copy(in + 48, out + 48) + : get_mismatch_copy(in + 48, out + 48); + // this branch could be marked as unlikely: + if (veq_non_zero( + vorrq_u8(vorrq_u8(illse0, illse1), vorrq_u8(illse2, illse3)))) { + uint64_t matches = get_mask(illse0, illse1, illse2, illse3); + // Given that ARM has a fast bitreverse instruction, we can + // reverse once and then use clz to find the first bit set. + // It is how it is done in simdjson and *might* be beneficial. + // + // We might also proceed in reverse to reduce the RAW hazard, + // but it might require more instructions. + + while (matches != 0) { + int r = trailing_zeroes(matches); // generates rbit + clz + // Either we have a high surrogate followed by a non-low surrogate + // or we have a low surrogate not preceded by a high surrogate. + bool is_high = scalar::utf16::is_high_surrogate(in[r - 1]); + out[r - is_high] = replacement; + matches = clear_least_significant_bit(matches); + } + return false; + } + return true; +} + +template +void utf16fix_neon_64bits(const char16_t *in, size_t n, char16_t *out) { + size_t i; + const char16_t replacement = scalar::utf16::replacement(); + if (n < 17) { + return scalar::utf16::to_well_formed_utf16(in, n, out); + } + out[0] = + scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; + i = 1; + + /* duplicate code to have the compiler specialise utf16fix_block() */ + if (in == out) { + for (i = 1; i + 64 < n; i += 64) { + utf16fix_block64(out + i, in + i); + } + + for (; i + 16 < n; i += 16) { + utf16fix_block(out + i, in + i); + } + + /* tbd: find carry */ + utf16fix_block(out + n - 16, in + n - 16); + } else { + for (i = 1; i + 64 < n; i += 64) { + utf16fix_block64(out + i, in + i); + } + for (; i + 16 < n; i += 16) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - 16, in + n - 16); + } + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} +/* end file src/arm64/arm_utf16fix.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/arm64/arm_validate_utf16.cpp */ +template +const char16_t *arm_validate_utf16(const char16_t *input, size_t size) { + const char16_t *end = input + size; + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + while (end - input >= 16) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + if simdutf_constexpr (!match_system(big_endian)) { + in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0))); + in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1))); + } + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const simd8 in = simd16::pack(t0, t1); + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64(); + if (surrogates_wordmask == 0) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher word) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint64_t V = ~surrogates_wordmask; + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = ((in & v_fc) == v_dc); + const uint64_t H = vH.to_bitmask64(); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint64_t L = ~H & surrogates_wordmask; + + const uint64_t a = + L & (H >> 4); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint64_t b = + a << 4; // Just mark that the opposite fact is hold, + // thanks to that we have only two masks for valid case. + const uint64_t c = V | a | b; // Combine all the masks into the final one. + if (c == ~0ull) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0xfffffffffffffffull) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return nullptr; + } + } + } + return input; +} + +template +const char16_t *arm_validate_utf16_as_ascii(const char16_t *input, + size_t size) { + const char16_t *end = input + size; + while (end - input >= 16) { + uint16x8_t in1 = vld1q_u16(reinterpret_cast(input)); + uint16x8_t in2 = vld1q_u16(reinterpret_cast(input + 8)); + uint16x8_t inor = vorrq_u16(in1, in2); + if simdutf_constexpr (!match_system(big_endian)) { + inor = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inor))); + } + // next we compute inor > 0x7f + uint16x8_t cmp = vcgtq_u16(inor, vdupq_n_u16(0x7f)); + uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(cmp, 4)), 0); + if (mask) { + return nullptr; + } + input += 16; + } + return input; +} + +template +const result arm_validate_utf16_with_errors(const char16_t *input, + size_t size) { + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + while (input + 16 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + if simdutf_constexpr (!match_system(big_endian)) { + in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0))); + in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1))); + } + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const simd8 in = simd16::pack(t0, t1); + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64(); + if (surrogates_wordmask == 0) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher word) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint64_t V = ~surrogates_wordmask; + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = ((in & v_fc) == v_dc); + const uint64_t H = vH.to_bitmask64(); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint64_t L = ~H & surrogates_wordmask; + + const uint64_t a = + L & (H >> 4); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint64_t b = + a << 4; // Just mark that the opposite fact is hold, + // thanks to that we have only two masks for valid case. + const uint64_t c = V | a | b; // Combine all the masks into the final one. + if (c == ~0ull) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0xfffffffffffffffull) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + return result(error_code::SUCCESS, input - start); +} +/* end file src/arm64/arm_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/arm64/arm_validate_utf32le.cpp */ + +const char32_t *arm_validate_utf32le(const char32_t *input, size_t size) { + const char32_t *end = input + size; + + const uint32x4_t standardmax = vmovq_n_u32(0x10ffff); + const uint32x4_t offset = vmovq_n_u32(0xffff2000); + const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff); + uint32x4_t currentmax = vmovq_n_u32(0x0); + uint32x4_t currentoffsetmax = vmovq_n_u32(0x0); + + while (end - input >= 4) { + const uint32x4_t in = vld1q_u32(reinterpret_cast(input)); + currentmax = vmaxq_u32(in, currentmax); + currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax); + input += 4; + } + + uint32x4_t is_zero = + veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax); + if (vmaxvq_u32(is_zero) != 0) { + return nullptr; + } + + is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (vmaxvq_u32(is_zero) != 0) { + return nullptr; + } + + return input; +} + +const result arm_validate_utf32le_with_errors(const char32_t *input, + size_t size) { + const char32_t *start = input; + const char32_t *end = input + size; + + const uint32x4_t standardmax = vmovq_n_u32(0x10ffff); + const uint32x4_t offset = vmovq_n_u32(0xffff2000); + const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff); + uint32x4_t currentmax = vmovq_n_u32(0x0); + uint32x4_t currentoffsetmax = vmovq_n_u32(0x0); + + while (end - input >= 4) { + const uint32x4_t in = vld1q_u32(reinterpret_cast(input)); + currentmax = vmaxq_u32(in, currentmax); + currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax); + + uint32x4_t is_zero = + veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax); + if (vmaxvq_u32(is_zero) != 0) { + return result(error_code::TOO_LARGE, input - start); + } + + is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (vmaxvq_u32(is_zero) != 0) { + return result(error_code::SURROGATE, input - start); + } + + input += 4; + } + + return result(error_code::SUCCESS, input - start); +} +/* end file src/arm64/arm_validate_utf32le.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/arm64/arm_convert_latin1_to_utf16.cpp */ +template +std::pair +arm_convert_latin1_to_utf16(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + + while (end - buf >= 16) { + uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf)); + uint16x8_t inlow = vmovl_u8(vget_low_u8(in8)); + if simdutf_constexpr (!match_system(big_endian)) { + inlow = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inlow))); + } + vst1q_u16(reinterpret_cast(utf16_output), inlow); + uint16x8_t inhigh = vmovl_u8(vget_high_u8(in8)); + if simdutf_constexpr (!match_system(big_endian)) { + inhigh = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inhigh))); + } + vst1q_u16(reinterpret_cast(utf16_output + 8), inhigh); + utf16_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf16_output); +} +/* end file src/arm64/arm_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/arm64/arm_convert_latin1_to_utf32.cpp */ +std::pair +arm_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const char *end = buf + len; + + while (end - buf >= 16) { + uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf)); + uint16x8_t in8low = vmovl_u8(vget_low_u8(in8)); + uint32x4_t in16lowlow = vmovl_u16(vget_low_u16(in8low)); + uint32x4_t in16lowhigh = vmovl_u16(vget_high_u16(in8low)); + uint16x8_t in8high = vmovl_u8(vget_high_u8(in8)); + uint32x4_t in8highlow = vmovl_u16(vget_low_u16(in8high)); + uint32x4_t in8highhigh = vmovl_u16(vget_high_u16(in8high)); + vst1q_u32(reinterpret_cast(utf32_output), in16lowlow); + vst1q_u32(reinterpret_cast(utf32_output + 4), in16lowhigh); + vst1q_u32(reinterpret_cast(utf32_output + 8), in8highlow); + vst1q_u32(reinterpret_cast(utf32_output + 12), in8highhigh); + + utf32_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf32_output); +} +/* end file src/arm64/arm_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/arm64/arm_convert_latin1_to_utf8.cpp */ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +std::pair +arm_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char *end = latin1_input + len; + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + // We always write 16 bytes, of which more than the first 8 bytes + // are valid. A safety margin of 8 is more than sufficient. + while (end - latin1_input >= 16 + 8) { + uint8x16_t in8 = vld1q_u8(reinterpret_cast(latin1_input)); + if (vmaxvq_u8(in8) <= 0x7F) { // ASCII fast path!!!! + vst1q_u8(utf8_output, in8); + utf8_output += 16; + latin1_input += 16; + continue; + } + + // We just fallback on UTF-16 code. This could be optimized/simplified + // further. + uint16x8_t in16 = vmovl_u8(vget_low_u8(in8)); + // 1. prepare 2-byte values + // input 8-bit word : [aabb|bbbb] x 8 + // expected output : [1100|00aa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [0000|00aa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(in16, 2); + // t1 = [0000|00aa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(in16, v_003f); + // t3 = [0000|00aa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [1100|00aa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in16, v_007f); + const uint8x16_t utf8_unpacked = + vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in16, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + // 6. adjust pointers + latin1_input += 8; + utf8_output += row[0]; + + } // while + + return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); +} +/* end file src/arm64/arm_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/arm64/arm_convert_utf8_to_latin1.cpp */ +// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 16, usually 12). +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint8x16_t in = vld1q_u8(reinterpret_cast(input)); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if (utf8_end_of_code_point_mask == 0xfff) { + // We process in chunks of 12 bytes + vst1q_u8(reinterpret_cast(latin1_output), in); + latin1_output += 12; // We wrote 12 18-bit characters. + return 12; // We consumed 12 bytes. + } + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 + // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy + // scenario we process SIX (6) input code-code units. The max length in bytes + // of six code code units spanning between 1 and 2 bytes each is 12 bytes. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh)); + // Mask + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000000 00bbbbbb + uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits + // 1 byte: 00000000 00000000 + // 2 byte: 000aaaaa 00000000 + uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits + // Combine with a shift right accumulate + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000aaa aabbbbbb + uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); + // writing 8 bytes even though we only care about the first 6 bytes. + uint8x8_t latin1_packed = vmovn_u16(composed); + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/arm64/arm_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/arm64/arm_convert_utf8_to_utf16.cpp */ +// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 16, usually 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint8x16_t in = vld1q_u8(reinterpret_cast(input)); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xffff) { + // We process in chunks of 16 bytes + // The routine in simd.h is reused. + simd8 temp{vreinterpretq_s8_u8(in)}; + temp.store_ascii_as_utf16(utf16_output); + utf16_output += 16; // We wrote 16 16-bit characters. + return 16; // We consumed 16 bytes. + } + + // 3 byte sequences are the next most common, as seen in CJK, which has long + // sequences of these. + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. + uint16x4_t composed = convert_utf8_3_byte_to_utf16(in); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed))); + } + vst1_u16(reinterpret_cast(utf16_output), composed); + utf16_output += 4; // We wrote 4 16-bit characters. + return 12; // We consumed 12 bytes. + } + + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if ((utf8_end_of_code_point_mask & 0xFFF) == 0xaaa) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte + // UTF-16 code units. + uint16x8_t composed = convert_utf8_2_byte_to_utf16(in); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = + vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); + } + vst1q_u16(reinterpret_cast(utf16_output), composed); + + utf16_output += 6; // We wrote 6 16-bit characters. + return 12; // We consumed 12 bytes. + } + + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + uint16x8_t composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = + vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); + } + // Store + vst1q_u16(reinterpret_cast(utf16_output), composed); + utf16_output += 6; // We wrote 6 16-bit characters. + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + // XXX: depending on the system scalar instructions might be faster. + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); + // 1 byte: 00000000 0ccccccc + // 2 byte: xx0bbbbb x0cccccc + // 3 byte: xxbbbbbb x0cccccc + uint16x4_t lowperm = vmovn_u32(perm); + // Partially mask with bic (doesn't require a temporary register unlike and) + // The shift left insert below will clear the top bits. + // 1 byte: 00000000 00000000 + // 2 byte: xx0bbbbb 00000000 + // 3 byte: xxbbbbbb 00000000 + uint16x4_t middlebyte = vbic_u16(lowperm, vmov_n_u16(uint16_t(~0xFF00))); + // ASCII + // 1 byte: 00000000 0ccccccc + // 2+byte: 00000000 00cccccc + uint16x4_t ascii = vand_u16(lowperm, vmov_n_u16(0x7F)); + // Split into narrow vectors. + // 2 byte: 00000000 00000000 + // 3 byte: 00000000 xxxxaaaa + uint16x4_t highperm = vshrn_n_u32(perm, 16); + // Shift right accumulate the middle byte + // 1 byte: 00000000 0ccccccc + // 2 byte: 00xx0bbb bbcccccc + // 3 byte: 00xxbbbb bbcccccc + uint16x4_t middlelow = vsra_n_u16(ascii, middlebyte, 2); + // Shift left and insert the top 4 bits, overwriting the garbage + // 1 byte: 00000000 0ccccccc + // 2 byte: 00000bbb bbcccccc + // 3 byte: aaaabbbb bbcccccc + uint16x4_t composed = vsli_n_u16(middlelow, highperm, 12); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed))); + } + vst1_u16(reinterpret_cast(utf16_output), composed); + + utf16_output += 4; // We wrote 4 16-bit codepoints + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but + // it is easier when we can assume they are all pairs. This version does + // not use the LUT, but 4 byte sequences are less common and the overhead + // of the extra memory access is less important than the early branch + // overhead in shorter sequences. + + // Swap byte pairs + // 10dddddd 10cccccc|10bbbbbb 11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + uint8x16_t swap = vrev16q_u8(in); + // Shift left 2 bits + // cccccc00 dddddd00 xxxxxxxx bbbbbb00 + uint32x4_t shift = vreinterpretq_u32_u8(vshlq_n_u8(swap, 2)); + // Create a magic number containing the low 2 bits of the trail surrogate + // and all the corrections needed to create the pair. UTF-8 4b prefix = + // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) + // surrogate high = +0x0000|0xD800 + // surrogate low = +0xDC00|0x0000 + // ------------------------------- + // = +0xDC00|0xE7C0 + uint32x4_t magic = vmovq_n_u32(0xDC00E7C0); + // Generate unadjusted trail surrogate minus lowest 2 bits + // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 + uint32x4_t trail = + vbslq_u32(vmovq_n_u32(0x0000FF00), vreinterpretq_u32_u8(swap), shift); + // Insert low 2 bits of trail surrogate to magic number for later + // 11011100 00000000 11100111 110000cc + uint16x8_t magic_with_low_2 = + vreinterpretq_u16_u32(vsraq_n_u32(magic, shift, 30)); + // Generate lead surrogate + // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx + uint32x4_t lead = vreinterpretq_u32_u16( + vsliq_n_u16(vreinterpretq_u16_u8(swap), vreinterpretq_u16_u8(in), 6)); + // Mask out lead + // 000000cc ccdddddd|xxxxxxxx xxxxxxxx + lead = vbicq_u32(lead, vmovq_n_u32(uint32_t(~0x03FFFFFF))); + // Blend pairs + // 000000cc ccdddddd|11110aaa bbbbbb00 + uint16x8_t blend = vreinterpretq_u16_u32( + vbslq_u32(vmovq_n_u32(0x0000FFFF), trail, lead)); + // Add magic number to finish the result + // 110111CC CCDDDDDD|110110AA BBBBBBCC + uint16x8_t composed = vaddq_u16(blend, magic_with_low_2); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = + vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); + } + uint16_t buffer[8]; + vst1q_u16(reinterpret_cast(buffer), composed); + for (int k = 0; k < 6; k++) { + utf16_output[k] = buffer[k]; + } // the loop might compiler to a couple of instructions. + // We need some validation. See + // https://github.com/simdutf/simdutf/pull/631 +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + uint8x16_t expected_mask = simdutf_make_uint8x16_t( + 0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, + 0xc0, 0x0, 0x0, 0x0, 0x0); +#else + uint8x16_t expected_mask = {0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, + 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, + 0x0, 0x0, 0x0, 0x0}; +#endif +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + uint8x16_t expected = simdutf_make_uint8x16_t( + 0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, + 0x80, 0x0, 0x0, 0x0, 0x0); +#else + uint8x16_t expected = {0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, + 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; +#endif + uint8x16_t check = vceqq_u8(vandq_u8(in, expected_mask), expected); + bool correct = (vminvq_u32(vreinterpretq_u32_u8(check)) == 0xFFFFFFFF); + // The validation is just three instructions and it is not on a critical + // path. + if (correct) { + utf16_output += 6; // We wrote 3 32-bit surrogate pairs. + } + return 12; // We consumed 12 bytes. + } + // 3 1-4 byte sequences + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 3 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); + // added to fix issue https://github.com/simdutf/simdutf/issues/514 + // We only want to write 2 * 16-bit code units when that is actually what we + // have. Unfortunately, we cannot trust the input. So it is possible to get + // 0xff as an input byte and it should not result in a surrogate pair. We + // need to check for that. + uint32_t permbuffer[4]; + vst1q_u32(permbuffer, perm); + // Mask the low and middle bytes + // 00000000 00000000 00000000 0ddddddd + uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7f)); + // Because the surrogates need more work, the high surrogate is computed + // first. + uint32x4_t middlehigh = vshlq_n_u32(perm, 2); + // 00000000 00000000 00cccccc 00000000 + uint32x4_t middlebyte = vandq_u32(perm, vmovq_n_u32(0x3F00)); + // Start assembling the sequence. Since the 4th byte is in the same position + // as it would be in a surrogate and there is no dependency, shift left + // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: + // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx + uint32x4_t ab = vbslq_u32(vmovq_n_u32(0xFF000000), perm, middlehigh); + // Top 16 bits contains the high ten bits of the surrogate pair before + // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa + // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction + uint32x4_t abc = + vbslq_u32(vmovq_n_u32(0xFFFC0000), ab, vshlq_n_u32(middlebyte, 4)); + // Combine the low 6 or 7 bits by a shift right accumulate + // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct + // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o + // correction + uint32x4_t composed = vsraq_n_u32(ascii, abc, 6); + // After this is for surrogates + // Blend the low and high surrogates + // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd + uint32x4_t mixed = vbslq_u32(vmovq_n_u32(0xFFFF0000), abc, composed); + // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits + // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: + // 11110aaa bbbbbbcc|000000cc ccdddddd + uint16x8_t masked_pair = vreinterpretq_u16_u32( + vbicq_u32(mixed, vmovq_n_u32(uint32_t(~0xFFFF03FF)))); + // Correct the remaining UTF-8 prefix, surrogate offset, and add the + // surrogate prefixes in one magic 16-bit addition. similar magic number but + // without the continue byte adjust and halfword swapped UTF-8 4b prefix = + // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) + // surrogate high = +0xD800|0x0000 + // surrogate low = +0x0000|0xDC00 + // ----------------------------------- + // = +0xE7C0|0xDC00 + uint16x8_t magic = vreinterpretq_u16_u32(vmovq_n_u32(0xE7C0DC00)); + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete + uint32x4_t surrogates = + vreinterpretq_u32_u16(vaddq_u16(masked_pair, magic)); + // If the high bit is 1 (s32 less than zero), this needs a surrogate pair + uint32x4_t is_pair = vcltzq_s32(vreinterpretq_s32_u32(perm)); + + // Select either the 4 byte surrogate pair or the 2 byte solo codepoint + // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD + uint32x4_t selected = vbslq_u32(is_pair, surrogates, composed); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + selected = + vreinterpretq_u32_u8(vrev16q_u8(vreinterpretq_u8_u32(selected))); + } + // Attempting to shuffle and store would be complex, just scalarize. + uint32_t buffer[4]; + vst1q_u32(buffer, selected); + // Test for the top bit of the surrogate mask. Remove due to issue 514 + // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : + // 0x00800000; + for (size_t i = 0; i < 3; i++) { + // Surrogate + // Used to be if (buffer[i] & SURROGATE_MASK) { + // See discussion above. + // patch for issue https://github.com/simdutf/simdutf/issues/514 + if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { + utf16_output[0] = uint16_t(buffer[i] >> 16); + utf16_output[1] = uint16_t(buffer[i] & 0xFFFF); + utf16_output += 2; + } else { + utf16_output[0] = uint16_t(buffer[i] & 0xFFFF); + utf16_output++; + } + } + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } +} +/* end file src/arm64/arm_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/arm64/arm_convert_utf8_to_utf32.cpp */ +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_out) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint32_t *&utf32_output = reinterpret_cast(utf32_out); + uint8x16_t in = vld1q_u8(reinterpret_cast(input)); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xFFF; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + if (utf8_end_of_code_point_mask == 0xfff) { + // We process in chunks of 12 bytes. + // use fast implementation in src/simdutf/arm64/simd.h + // Ideally the compiler can keep the tables in registers. + simd8 temp{vreinterpretq_s8_u8(in)}; + temp.store_ascii_as_utf32_tbl(utf32_out); + utf32_output += 12; // We wrote 12 32-bit characters. + return 12; // We consumed 12 bytes. + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. Convert to UTF-16 + uint16x4_t composed_utf16 = convert_utf8_3_byte_to_utf16(in); + // Zero extend and store via ST2 with a zero. + uint16x4x2_t interleaver = {{composed_utf16, vmov_n_u16(0)}}; + vst2_u16(reinterpret_cast(utf32_output), interleaver); + utf32_output += 4; // We wrote 4 32-bit characters. + return 12; // We consumed 12 bytes. + } + + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if (input_utf8_end_of_code_point_mask == 0xaaa) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte + // UTF-32 code units. Convert to UTF-16 + uint16x8_t composed_utf16 = convert_utf8_2_byte_to_utf16(in); + // Zero extend and store via ST2 with a zero. + uint16x8x2_t interleaver = {{composed_utf16, vmovq_n_u16(0)}}; + vst2q_u16(reinterpret_cast(utf32_output), interleaver); + utf32_output += 6; // We wrote 6 32-bit characters. + return 12; // We consumed 12 bytes. + } + /// Either no fast path or an unimportant fast path. + + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + uint16x8_t composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); + // Zero extend and store with ST2 and zero + uint16x8x2_t interleaver = {{composed_utf16, vmovq_n_u16(0)}}; + vst2q_u16(reinterpret_cast(utf32_output), interleaver); + utf32_output += 6; // We wrote 6 32-bit characters. + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + // Shuffle + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); + // Split + // 00000000 00000000 0ccccccc + uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F)); // 6 or 7 bits + // Note: unmasked + // xxxxxxxx aaaaxxxx xxxxxxxx + uint32x4_t high = vshrq_n_u32(perm, 4); // 4 bits + // Use 16 bit bic instead of and. + // The top bits will be corrected later in the bsl + // 00000000 10bbbbbb 00000000 + uint32x4_t middle = vreinterpretq_u32_u16( + vbicq_u16(vreinterpretq_u16_u32(perm), + vmovq_n_u16(uint16_t(~0xff00)))); // 5 or 6 bits + // Combine low and middle with shift right accumulate + // 00000000 00xxbbbb bbcccccc + uint32x4_t lowmid = vsraq_n_u32(ascii, middle, 2); + // Insert top 4 bits from high byte with bitwise select + // 00000000 aaaabbbb bbcccccc + uint32x4_t composed = vbslq_u32(vmovq_n_u32(0x0000F000), high, lowmid); + vst1q_u32(utf32_output, composed); + utf32_output += 4; // We wrote 4 32-bit characters. + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-32 code units. This uses the same method as the fixed 3 byte + // version, reversing and shift left insert. However, there is no need for + // a shuffle mask now, just rev16 and rev32. + // + // This version does not use the LUT, but 4 byte sequences are less common + // and the overhead of the extra memory access is less important than the + // early branch overhead in shorter sequences, so it comes last. + + // Swap pairs of bytes + // 10dddddd|10cccccc|10bbbbbb|11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + uint16x8_t swap1 = vreinterpretq_u16_u8(vrev16q_u8(in)); + // Shift left and insert + // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb + uint16x8_t merge1 = vsliq_n_u16(swap1, vreinterpretq_u16_u8(in), 6); + // Swap 16-bit lanes + // xxxxcccc ccdddddd xxxxxxxa aabbbbbb + // xxxxxxxa aabbbbbb xxxxcccc ccdddddd + uint32x4_t swap2 = vreinterpretq_u32_u16(vrev32q_u16(merge1)); + // Shift insert again + // xxxxxxxx xxxaaabb bbbbcccc ccdddddd + uint32x4_t merge2 = vsliq_n_u32(swap2, vreinterpretq_u32_u16(merge1), 12); + // Clear the garbage + // 00000000 000aaabb bbbbcccc ccdddddd + uint32x4_t composed = vandq_u32(merge2, vmovq_n_u32(0x1FFFFF)); + // Store + vst1q_u32(utf32_output, composed); + + utf32_output += 3; // We wrote 3 32-bit characters. + return 12; // We consumed 12 bytes. + } + // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit + // due to surrogates no longer being involved. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 2 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); + // Ascii + uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F)); + uint32x4_t middle = vandq_u32(perm, vmovq_n_u32(0x3f00)); + // When converting the way we do, the 3 byte prefix will be interpreted as + // the 18th bit being set, since the code would interpret the lead byte + // (0b1110bbbb) as a continuation byte (0b10bbbbbb). To fix this, we can + // either xor or do an 8 bit add of the 6th bit shifted right by 1. Since + // NEON has shift right accumulate, we use that. + // 4 byte 3 byte + // 10bbbbbb 1110bbbb + // 00000000 01000000 6th bit + // 00000000 00100000 shift right + // 10bbbbbb 0000bbbb add + // 00bbbbbb 0000bbbb mask + uint8x16_t correction = + vreinterpretq_u8_u32(vandq_u32(perm, vmovq_n_u32(0x00400000))); + uint32x4_t corrected = vreinterpretq_u32_u8( + vsraq_n_u8(vreinterpretq_u8_u32(perm), correction, 1)); + // 00000000 00000000 0000cccc ccdddddd + uint32x4_t cd = vsraq_n_u32(ascii, middle, 2); + // Insert twice + // xxxxxxxx xxxaaabb bbbbxxxx xxxxxxxx + uint32x4_t ab = vbslq_u32(vmovq_n_u32(0x01C0000), vshrq_n_u32(corrected, 6), + vshrq_n_u32(corrected, 4)); + // 00000000 000aaabb bbbbcccc ccdddddd + uint32x4_t composed = vbslq_u32(vmovq_n_u32(0xFFE00FFF), cd, ab); + // Store + vst1q_u32(utf32_output, composed); + utf32_output += 3; // We wrote 3 32-bit characters. + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } +} +/* end file src/arm64/arm_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/arm64/arm_convert_utf16_to_latin1.cpp */ + +template +std::pair +arm_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 8) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if simdutf_constexpr (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + if (vmaxvq_u16(in) <= 0xff) { + // 1. pack the bytes + uint8x8_t latin1_packed = vmovn_u16(in); + // 2. store (8 bytes) + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +template +std::pair +arm_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + while (end - buf >= 8) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if simdutf_constexpr (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + if (vmaxvq_u16(in) <= 0xff) { + // 1. pack the bytes + uint8x8_t latin1_packed = vmovn_u16(in); + // 2. store (8 bytes) + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 8; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/arm64/arm_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/arm64/arm_convert_utf16_to_utf32.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +arm_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *end = buf + len; + + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + + while (end - buf >= 8) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if simdutf_constexpr (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + + const uint16x8_t surrogates_bytemask = + vceqq_u16(vandq_u16(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (vmaxvq_u16(surrogates_bytemask) == 0) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in))); + vst1q_u32(utf32_output + 4, vmovl_high_u16(in)); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(buf, reinterpret_cast(utf32_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +arm_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + + while ((end - buf) >= 8) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if simdutf_constexpr (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + + const uint16x8_t surrogates_bytemask = + vceqq_u16(vandq_u16(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (vmaxvq_u16(surrogates_bytemask) == 0) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in))); + vst1q_u32(utf32_output + 4, vmovl_high_u16(in)); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf32_output)); +} +/* end file src/arm64/arm_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +/* begin file src/arm64/arm_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +arm_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *end = buf + len; + + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if simdutf_constexpr (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + if (vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + uint16x8_t nextin = + vld1q_u16(reinterpret_cast(buf) + 8); + if simdutf_constexpr (!match_system(big_endian)) { + nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); + } + if (vmaxvq_u16(nextin) > 0x7F) { + // 1. pack the bytes + // obviously suboptimal. + uint8x8_t utf8_packed = vmovn_u16(in); + // 2. store (8 bytes) + vst1_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } else { + // 1. pack the bytes + // obviously suboptimal. + uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin); + // 2. store (16 bytes) + vst1q_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + } + + if (vmaxvq_u16(in) <= 0x7FF) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(in, 2); + // t1 = [000a|aaaa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); + const uint8x16_t utf8_unpacked = + vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + const uint16x8_t surrogates_bytemask = + vceqq_u16(vandq_u16(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (vmaxvq_u16(surrogates_bytemask) == 0) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t dup_even = simdutf_make_uint16x8_t( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#else + const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +#endif + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const uint16x8_t t0 = vreinterpretq_u16_u8( + vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + const uint16x8_t s0 = vshrq_n_u16(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + const uint16x8_t s1s = vshlq_n_u16(s1, 2); + // [00bb|bbbb|0000|aaaa] + const uint16x8_t s2 = vorrq_u16(s0, s1s); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); + const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); + const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); + const uint16x8_t m0 = + vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); + const uint16x8_t s4 = veorq_u16(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); + const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t onemask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); + const uint16x8_t twomask = simdutf_make_uint16x8_t( + 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); +#else + const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0100, 0x0400, 0x1000, 0x4000}; + const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, + 0x0200, 0x0800, 0x2000, 0x8000}; +#endif + const uint16x8_t combined = + vorrq_u16(vandq_u16(one_byte_bytemask, onemask), + vandq_u16(one_or_two_bytes_bytemask, twomask)); + const uint16_t mask = vaddvq_u16(combined); + // The following fast path may or may not be beneficial. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); + vst1q_u8(utf8_output, utf8_0); + utf8_output += 12; + vst1q_u8(utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + + vst1q_u8(utf8_output, utf8_0); + utf8_output += row0[0]; + vst1q_u8(utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +arm_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if simdutf_constexpr (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + if (vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + uint16x8_t nextin = + vld1q_u16(reinterpret_cast(buf) + 8); + if simdutf_constexpr (!match_system(big_endian)) { + nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); + } + if (vmaxvq_u16(nextin) > 0x7F) { + // 1. pack the bytes + // obviously suboptimal. + uint8x8_t utf8_packed = vmovn_u16(in); + // 2. store (8 bytes) + vst1_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } else { + // 1. pack the bytes + // obviously suboptimal. + uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin); + // 2. store (16 bytes) + vst1q_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + } + + if (vmaxvq_u16(in) <= 0x7FF) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(in, 2); + // t1 = [000a|aaaa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); + const uint8x16_t utf8_unpacked = + vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + const uint16x8_t surrogates_bytemask = + vceqq_u16(vandq_u16(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (vmaxvq_u16(surrogates_bytemask) == 0) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t dup_even = simdutf_make_uint16x8_t( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#else + const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +#endif + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const uint16x8_t t0 = vreinterpretq_u16_u8( + vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + const uint16x8_t s0 = vshrq_n_u16(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + const uint16x8_t s1s = vshlq_n_u16(s1, 2); + // [00bb|bbbb|0000|aaaa] + const uint16x8_t s2 = vorrq_u16(s0, s1s); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); + const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); + const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); + const uint16x8_t m0 = + vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); + const uint16x8_t s4 = veorq_u16(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); + const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t onemask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); + const uint16x8_t twomask = simdutf_make_uint16x8_t( + 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); +#else + const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0100, 0x0400, 0x1000, 0x4000}; + const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, + 0x0200, 0x0800, 0x2000, 0x8000}; +#endif + const uint16x8_t combined = + vorrq_u16(vandq_u16(one_byte_bytemask, onemask), + vandq_u16(one_or_two_bytes_bytemask, twomask)); + const uint16_t mask = vaddvq_u16(combined); + // The following fast path may or may not be beneficial. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); + vst1q_u8(utf8_output, utf8_0); + utf8_output += 12; + vst1q_u8(utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + + vst1q_u8(utf8_output, utf8_0); + utf8_output += row0[0]; + vst1q_u8(utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} + +template +simdutf_really_inline size_t +arm64_utf8_length_from_utf16_bytemask(const char16_t *in, size_t size) { + constexpr size_t N = + 16; // we process 16 char16_t at a time, this is NEON specific + + if (N + 1 > size) { + return scalar::utf16::utf8_length_from_utf16(in, size); + } // special case for short inputs + size_t count = 0; + const auto one = vmovq_n_u8(1); + + // The general strategy is as follows: + // 1. each code unit yields at least one byte, we can account for that by + // adding the size of the input to the count. + // 2. ASCII bytes then count for zero. + // 3. Values that yield 2 or 3 bytes in UTF-8 add 1 or 2 to the count. + // 4. Surrogate pairs are handled by adding 1 for each surrogate code unit + // for a total of 4 bytes for the pair. + size_t pos = 0; + // We will go through the input at least once. + for (; size - pos >= N; pos += N) { + auto base_input = vld2q_u8(reinterpret_cast(in + pos)); + // + size_t idx = 1; // we use the second lane of the deinterleaved load + if simdutf_constexpr (!match_system(big_endian)) { + idx = 0; + } + size_t idx_lsb = idx ^ 1; + auto c0 = + vminq_u8(vorrq_u8(vandq_u8(base_input.val[idx_lsb], vdupq_n_u8(0x80)), + base_input.val[idx]), + one); + auto c1 = vminq_u8(vandq_u8(base_input.val[idx], vdupq_n_u8(0xf8)), one); + auto is_surrogate = vcleq_u8( + vsubq_u8(base_input.val[idx], vdupq_n_u8(0xd8)), vdupq_n_u8(7)); + + auto v_count = vaddq_u8(c1, c0); + v_count = vaddq_u8(v_count, is_surrogate); + count += vaddlvq_u8(v_count); // sum the counts in the vector + ///////// + // The vaddlvq_u8 instruction could be slow on some hardware. We could + // consider various alternatives if that is an issue such as accumulating + // into a vector of uint16_t or uint8_t and summing only at the end or + // periodically. However, on fast chipsets, like Apple Silicon, it is + // likely fast enough, or even faster than alternatives. + ///////// + } + count += pos; + // If we end with a high surrogate, it might be unpaired or not, we + // don't know. It counts as a pair suggarate for now. + + if (scalar::utf16::is_high_surrogate(in[pos - 1])) { + if (pos == size) { + count += 2; + } else if (scalar::utf16::is_low_surrogate(in[pos])) { + pos += 1; + count += 2; + } + } + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline result +arm64_utf8_length_from_utf16_with_replacement(const char16_t *in, size_t size) { + constexpr size_t N = + 16; // we process 16 char16_t at a time, this is NEON specific + + if (N + 1 > size) { + return scalar::utf16::utf8_length_from_utf16_with_replacement( + in, size); + } // special case for short input + size_t count = 0; + bool any_surrogates = false; + const auto one = vmovq_n_u8(1); + + // The general strategy is as follows: + // 1. each code unit yields at least one byte, we can account for that by + // adding the size of the input to the count. + // 2. ASCII bytes then count for zero. + // 3. Values that yield 2 or 3 bytes in UTF-8 add 1 or 2 to the count. + // 4. Surrogate pairs are handled by adding 1 for each surrogate code unit + // for a total of 4 bytes for the pair. + // 5. Unpaired surrogate elements have value 0xfffd in UTF-8, which is 3 + // bytes, + // so we need to add 2 more bytes for each unpaired surrogate. In effect, + // an unpaired surrogate should count for 1 (+1 for the ) + // + // Our strategy is to proceed like the arm64_utf8_length_from_utf16_bytemask + // function, but, at the same time, to record the number of unpaired + // surrogates. and then adjust the count accordingly. + + // If we start with a low surrogate, it is unpaired and the SIMD code won't + // detect it, so we handle that here. + size_t number_of_unpaired_surrogates = 0; + if (scalar::utf16::is_low_surrogate(in[0])) { + number_of_unpaired_surrogates += 1; + any_surrogates = true; + } + size_t pos = 0; + // We will go through the input at least once. + for (; size - pos >= N + 1; pos += N) { + auto base_input = vld2q_u8(reinterpret_cast(in + pos)); + size_t idx = 1; // we use the second lane of the deinterleaved load + if simdutf_constexpr (!match_system(big_endian)) { + idx = 0; + } + size_t idx_lsb = idx ^ 1; + auto is_surrogate = vcleq_u8( + vsubq_u8(base_input.val[idx], vdupq_n_u8(0xd8)), vdupq_n_u8(7)); + // We count on the fact that most inputs do not have surrogates. + if (vmaxvq_u32(vreinterpretq_u32_u8(is_surrogate)) || + scalar::utf16::is_low_surrogate(in[pos + N])) { + any_surrogates = true; + // there is at least one surrogate in the block + // We use this to check that surrogates are paired correctly. + // It is the input shifted by one code unit (two bytes). + // We use it to detect *low* surrogates. + auto one_unit_offset_input = + vld2q_u8(reinterpret_cast(in + pos + 1)); + // + + auto lb_masked = vandq_u8(base_input.val[idx], vdupq_n_u8(0xfc)); + auto block_masked = + vandq_u8(one_unit_offset_input.val[idx], vdupq_n_u8(0xfc)); + auto lb_is_high = vceqq_u8(lb_masked, vdupq_n_u8(0xd8)); + auto block_is_low = vceqq_u8(block_masked, vdupq_n_u8(0xdc)); + + // illseq will mark every low surrogate in the offset block. + // that is not preceded by a high surrogate + // + // It will also mark every high surrogate in the main block + // that is not followed by a low surrogate + // + // This means that it will miss undetectable errors, like a high surrogate + // at the last index of the main block. And similarly a low surrogate + // at the index prior to the main block that was not preceded by a high + // surrogate. + // + // The interpretation of the values is that they start with the end value + // of the prior block, and end just before the end of the main block + // (minus one). + auto illseq = veorq_u8(lb_is_high, block_is_low); + number_of_unpaired_surrogates += vaddlvq_u8(vandq_u8(illseq, one)); + } + auto c0 = + vminq_u8(vorrq_u8(vandq_u8(base_input.val[idx_lsb], vdupq_n_u8(0x80)), + base_input.val[idx]), + one); + auto c1 = vminq_u8(vandq_u8(base_input.val[idx], vdupq_n_u8(0xf8)), one); + + auto v_count = vaddq_u8(c1, c0); + v_count = vaddq_u8(v_count, is_surrogate); + count += vaddlvq_u8(v_count); // sum the counts in the vector + ///////// + // The vaddlvq_u8 instruction could be slow on some hardware. We could + // consider various alternatives if that is an issue such as accumulating + // into a vector of uint16_t or uint8_t and summing only at the end or + // periodically. However, on fast chipsets, like Apple Silicon, it is + // likely fast enough, or even faster than alternatives. + ///////// + } + + //!!!!!!!!!!!!!!! + // Here, we have processed up to pos - 1 (inclusive) code units. Except for + // the case where the value at pos is a low surrogate not preceded by a high + // surrogate. In this special case, we have already added one to the count for + // the unpaired low surrogate. + //!!!!!!!!!!!!!!! + if (scalar::utf16::is_low_surrogate(in[pos])) { + any_surrogates = true; + if (!scalar::utf16::is_high_surrogate(in[pos - 1])) { + number_of_unpaired_surrogates -= 1; + count += 2; + pos += 1; + } + } + count += pos; + count += number_of_unpaired_surrogates; + // If we end with a high surrogate, it might be unpaired or not, we + // don't know. It counts as a pair suggarate for now. + if (scalar::utf16::is_high_surrogate(in[pos - 1])) { + any_surrogates = true; + if (pos == size) { + count += 2; + } else if (scalar::utf16::is_low_surrogate(in[pos])) { + pos += 1; + count += 2; + } + } + result scalar_result = + scalar::utf16::utf8_length_from_utf16_with_replacement( + in + pos, size - pos); + return {any_surrogates ? SURROGATE : scalar_result.error, + count + scalar_result.count}; +} +/* end file src/arm64/arm_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/arm64/arm_base64.cpp */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ + +/** + * Insert a line feed character in the 16-byte input at index K in [0,16). + */ +inline uint8x16_t insert_line_feed16(uint8x16_t input, size_t K) { + static const uint8_t shuffle_masks[16][16] = { + {0x80, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 0x80, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 0x80, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 0x80, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 0x80, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 0x80, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 0x80, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 0x80, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 0x80, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 0x80, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x80, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0x80, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0x80, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0x80, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0x80}}; + // Prepare a vector with '\n' (0x0A) + uint8x16_t line_feed_vector = vdupq_n_u8('\n'); + + // Load the precomputed shuffle mask for K + uint8x16_t mask = vld1q_u8(shuffle_masks[K]); + + // Create a mask where 0x80 indicates the line feed position + uint8x16_t lf_pos = vceqq_u8(mask, vdupq_n_u8(0x80)); + + uint8x16_t result = vqtbl1q_u8(input, mask); + + // Use vbsl to select '\n' where lf_pos is true, else keep input bytes + return vbslq_u8(lf_pos, line_feed_vector, result); +} + +// offset is the number of characters in the current line. +// It can range from 0 to line_length (inclusive). +// If offset == line_length, we need to insert a line feed before writing +// anything. +size_t write_output_with_line_feeds(uint8_t *dst, uint8x16_t src, + size_t line_length, size_t &offset) { + // Fast path: no need to insert line feeds + // If we are at offset, we would write from [offset, offset + 16). + // We need that line_length >= offset + 16. + if (offset + 16 <= line_length) { + // No need to insert line feeds + vst1q_u8(dst, src); + offset += 16; // offset could be line_length here. + return 16; + } + + // We have that offset + 16 >= line_length + // the common case is that line_length is greater than 16 + if (simdutf_likely(line_length >= 16)) { + // offset <= line_length. + // offset + 16 > line_length + // So line_length - offset < 16 + // and line_length - offset >= 0 + uint8x16_t chunk = insert_line_feed16(src, line_length - offset); + vst1q_u8(dst, chunk); + // Not ideal to pull the last element and write it separately but + // it simplifies the code. + *(dst + 16) = vgetq_lane_u8(src, 15); + offset += 16 - line_length; + return 16 + 1; // we wrote 16 bytes plus one line feed + } + // Uncommon case where line_length < 16 + // This is going to be SLOW. + else { + uint8_t buffer[16]; + vst1q_u8(buffer, src); + size_t out_pos = 0; + size_t local_offset = offset; + for (size_t i = 0; i < 16;) { + if (local_offset == line_length) { + dst[out_pos++] = '\n'; + local_offset = 0; + } + dst[out_pos++] = buffer[i++]; + local_offset++; + } + offset = local_offset; + return out_pos; + } +} + +template +size_t encode_base64_impl(char *dst, const char *src, size_t srclen, + base64_options options, + size_t line_length = simdutf::default_line_length) { + size_t offset = 0; + if (line_length < 4) { + line_length = 4; // We do not support line_length less than 4 + } + // credit: Wojciech Muła + uint8_t *out = (uint8_t *)dst; + constexpr static uint8_t source_table[64] = { + 'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D', + 'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W', + 'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p', + '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8', + 'N', 'd', 't', '9', 'O', 'e', 'u', '+', 'P', 'f', 'v', '/', + }; + constexpr static uint8_t source_table_url[64] = { + 'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D', + 'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W', + 'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p', + '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8', + 'N', 'd', 't', '9', 'O', 'e', 'u', '-', 'P', 'f', 'v', '_', + }; + const uint8x16_t v3f = vdupq_n_u8(0x3f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + // When trying to load a uint8_t array, Visual Studio might + // error with: error C2664: '__n128x4 neon_ld4m_q8(const char *)': + // cannot convert argument 1 from 'const uint8_t [64]' to 'const char * + const uint8x16x4_t table = vld4q_u8( + (reinterpret_cast(options & base64_url) ? source_table_url + : source_table)); +#else + const uint8x16x4_t table = + vld4q_u8((options & base64_url) ? source_table_url : source_table); +#endif + size_t i = 0; + for (; i + 16 * 3 <= srclen; i += 16 * 3) { + const uint8x16x3_t in = vld3q_u8((const uint8_t *)src + i); + uint8x16x4_t result; + result.val[0] = vshrq_n_u8(in.val[0], 2); + result.val[1] = + vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[1], 4), in.val[0], 4), v3f); + result.val[2] = + vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[2], 6), in.val[1], 2), v3f); + result.val[3] = vandq_u8(in.val[2], v3f); + result.val[0] = vqtbl4q_u8(table, result.val[0]); + result.val[1] = vqtbl4q_u8(table, result.val[1]); + result.val[2] = vqtbl4q_u8(table, result.val[2]); + result.val[3] = vqtbl4q_u8(table, result.val[3]); + if (insert_line_feeds) { + if (line_length >= 64) { // fast path + vst4q_u8(out, result); + if (offset + 64 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 64 - location_end; + std::memmove(out + location_end + 1, out + location_end, to_move); + out[location_end] = '\n'; + offset = to_move; + out += 64 + 1; + } else { + offset += 64; + out += 64; + } + } else { // slow path + uint8x16x2_t Z0 = vzipq_u8(result.val[0], result.val[1]); + uint8x16x2_t Z1 = vzipq_u8(result.val[2], result.val[3]); + uint16x8x2_t Z2 = vzipq_u16(vreinterpretq_u16_u8(Z0.val[0]), + vreinterpretq_u16_u8(Z1.val[0])); + uint16x8x2_t Z3 = vzipq_u16(vreinterpretq_u16_u8(Z0.val[1]), + vreinterpretq_u16_u8(Z1.val[1])); + uint8x16_t T0 = vreinterpretq_u8_u16(Z2.val[0]); + uint8x16_t T1 = vreinterpretq_u8_u16(Z2.val[1]); + uint8x16_t T2 = vreinterpretq_u8_u16(Z3.val[0]); + uint8x16_t T3 = vreinterpretq_u8_u16(Z3.val[1]); + out += write_output_with_line_feeds(out, T0, line_length, offset); + out += write_output_with_line_feeds(out, T1, line_length, offset); + out += write_output_with_line_feeds(out, T2, line_length, offset); + out += write_output_with_line_feeds(out, T3, line_length, offset); + } + } else { + vst4q_u8(out, result); + out += 64; + } + } + + if (i + 24 <= srclen) { + const uint8x8_t v3f_d = vdup_n_u8(0x3f); + const uint8x8x3_t in = vld3_u8((const uint8_t *)src + i); + uint8x8x4_t result; + result.val[0] = vshr_n_u8(in.val[0], 2); + result.val[1] = + vand_u8(vsli_n_u8(vshr_n_u8(in.val[1], 4), in.val[0], 4), v3f_d); + result.val[2] = + vand_u8(vsli_n_u8(vshr_n_u8(in.val[2], 6), in.val[1], 2), v3f_d); + result.val[3] = vand_u8(in.val[2], v3f_d); + result.val[0] = vqtbl4_u8(table, result.val[0]); + result.val[1] = vqtbl4_u8(table, result.val[1]); + result.val[2] = vqtbl4_u8(table, result.val[2]); + result.val[3] = vqtbl4_u8(table, result.val[3]); + if (insert_line_feeds) { + if (line_length >= 32) { // fast path + vst4_u8(out, result); + if (offset + 32 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 32 - location_end; + std::memmove(out + location_end + 1, out + location_end, to_move); + out[location_end] = '\n'; + offset = to_move; + out += 32 + 1; + } else { + offset += 32; + out += 32; + } + } else { // slow path + uint8x8x2_t Z0 = vzip_u8(result.val[0], result.val[1]); + uint8x8x2_t Z1 = vzip_u8(result.val[2], result.val[3]); + uint16x4x2_t Z2 = vzip_u16(vreinterpret_u16_u8(Z0.val[0]), + vreinterpret_u16_u8(Z1.val[0])); + uint16x4x2_t Z3 = vzip_u16(vreinterpret_u16_u8(Z0.val[1]), + vreinterpret_u16_u8(Z1.val[1])); + uint8x8_t T0 = vreinterpret_u8_u16(Z2.val[0]); + uint8x8_t T1 = vreinterpret_u8_u16(Z2.val[1]); + uint8x8_t T2 = vreinterpret_u8_u16(Z3.val[0]); + uint8x8_t T3 = vreinterpret_u8_u16(Z3.val[1]); + uint8x16_t TT0 = vcombine_u8(T0, T1); + uint8x16_t TT1 = vcombine_u8(T2, T3); + out += write_output_with_line_feeds(out, TT0, line_length, offset); + out += write_output_with_line_feeds(out, TT1, line_length, offset); + } + } else { + vst4_u8(out, result); + out += 32; + } + i += 24; + } + out += scalar::base64::tail_encode_base64_impl( + (char *)out, src + i, srclen - i, options, line_length, offset); + return size_t((char *)out - dst); +} + +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + return encode_base64_impl(dst, src, srclen, options); +} + +static inline void compress(uint8x16_t data, uint16_t mask, char *output) { + if (mask == 0) { + vst1q_u8((uint8_t *)output, data); + return; + } + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint64x2_t compactmasku64 = {tables::base64::thintable_epi8[mask1], + tables::base64::thintable_epi8[mask2]}; + uint8x16_t compactmask = vreinterpretq_u8_u64(compactmasku64); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t off = + simdutf_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); +#else + const uint8x16_t off = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8}; +#endif + + compactmask = vaddq_u8(compactmask, off); + uint8x16_t pruned = vqtbl1q_u8(data, compactmask); + + int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + compactmask = vld1q_u8(tables::base64::pshufb_combine_table + pop1 * 8); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8((uint8_t *)output, answer); +} + +struct block64 { + uint8x16_t chunks[4]; +}; + +static_assert(sizeof(block64) == 64, "block64 is not 64 bytes"); +template +uint64_t to_base64_mask(block64 *b, bool *error) { + uint8x16_t v0f = vdupq_n_u8(0xf); + uint8x16_t v01 = vdupq_n_u8(0x1); + + uint8x16_t lo_nibbles0 = vandq_u8(b->chunks[0], v0f); + uint8x16_t lo_nibbles1 = vandq_u8(b->chunks[1], v0f); + uint8x16_t lo_nibbles2 = vandq_u8(b->chunks[2], v0f); + uint8x16_t lo_nibbles3 = vandq_u8(b->chunks[3], v0f); + + // Needed by the decoding step. + uint8x16_t hi_bits0 = vshrq_n_u8(b->chunks[0], 3); + uint8x16_t hi_bits1 = vshrq_n_u8(b->chunks[1], 3); + uint8x16_t hi_bits2 = vshrq_n_u8(b->chunks[2], 3); + uint8x16_t hi_bits3 = vshrq_n_u8(b->chunks[3], 3); + uint8x16_t lut_lo; +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + if (default_or_url) { + lut_lo = + simdutf_make_uint8x16_t(0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, + 0xf8, 0xf9, 0xf1, 0xa2, 0xa1, 0xa5, 0xa0, 0xa6); + } else if (base64_url) { + lut_lo = + simdutf_make_uint8x16_t(0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, + 0xf8, 0xf9, 0xf1, 0xa0, 0xa1, 0xa5, 0xa0, 0xa2); + } else { + lut_lo = + simdutf_make_uint8x16_t(0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, + 0xf8, 0xf9, 0xf1, 0xa2, 0xa1, 0xa1, 0xa0, 0xa4); + } +#else + if (default_or_url) { + lut_lo = uint8x16_t{0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, + 0xf8, 0xf9, 0xf1, 0xa2, 0xa1, 0xa5, 0xa0, 0xa6}; + } else if (base64_url) { + lut_lo = uint8x16_t{0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, + 0xf8, 0xf9, 0xf1, 0xa0, 0xa1, 0xa5, 0xa0, 0xa2}; + } else { + lut_lo = uint8x16_t{0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, + 0xf8, 0xf9, 0xf1, 0xa2, 0xa1, 0xa1, 0xa0, 0xa4}; + } +#endif + uint8x16_t lo0 = vqtbl1q_u8(lut_lo, lo_nibbles0); + uint8x16_t lo1 = vqtbl1q_u8(lut_lo, lo_nibbles1); + uint8x16_t lo2 = vqtbl1q_u8(lut_lo, lo_nibbles2); + uint8x16_t lo3 = vqtbl1q_u8(lut_lo, lo_nibbles3); + uint8x16_t lut_hi; +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + if (default_or_url) { + lut_hi = + simdutf_make_uint8x16_t(0x0, 0x1, 0x0, 0x0, 0x1, 0x6, 0x8, 0x8, 0x10, + 0x20, 0x20, 0x12, 0x40, 0x80, 0x80, 0x40); + } else if (base64_url) { + lut_hi = + simdutf_make_uint8x16_t(0x0, 0x1, 0x0, 0x0, 0x1, 0x6, 0x8, 0x8, 0x10, + 0x20, 0x20, 0x12, 0x40, 0x80, 0x80, 0x40); + } else { + lut_hi = + simdutf_make_uint8x16_t(0x0, 0x1, 0x0, 0x0, 0x1, 0x6, 0x8, 0x8, 0x10, + 0x20, 0x20, 0x10, 0x40, 0x80, 0x80, 0x40); + } +#else + if (default_or_url) { + lut_hi = uint8x16_t{0x0, 0x1, 0x0, 0x0, 0x1, 0x6, 0x8, 0x8, + 0x10, 0x20, 0x20, 0x12, 0x40, 0x80, 0x80, 0x40}; + } else if (base64_url) { + lut_hi = uint8x16_t{0x0, 0x1, 0x0, 0x0, 0x1, 0x4, 0x8, 0x8, + 0x10, 0x20, 0x20, 0x12, 0x40, 0x80, 0x80, 0x40}; + } else { + lut_hi = uint8x16_t{0x0, 0x1, 0x0, 0x0, 0x1, 0x6, 0x8, 0x8, + 0x10, 0x20, 0x20, 0x10, 0x40, 0x80, 0x80, 0x40}; + } +#endif + uint8x16_t hi0 = vqtbl1q_u8(lut_hi, hi_bits0); + uint8x16_t hi1 = vqtbl1q_u8(lut_hi, hi_bits1); + uint8x16_t hi2 = vqtbl1q_u8(lut_hi, hi_bits2); + uint8x16_t hi3 = vqtbl1q_u8(lut_hi, hi_bits3); + + // maps error byte to 0 and space byte to 1, valid bytes are >1 + uint8x16_t res0 = vandq_u8(lo0, hi0); + uint8x16_t res1 = vandq_u8(lo1, hi1); + uint8x16_t res2 = vandq_u8(lo2, hi2); + uint8x16_t res3 = vandq_u8(lo3, hi3); + + uint8_t checks = + vminvq_u8(vminq_u8(vminq_u8(res0, res1), vminq_u8(res2, res3))); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + uint64_t badcharmask = 0; + *error = checks == 0; + if (checks <= 1) { + // Add each of the elements next to each other, successively, to stuff each + // 8 byte mask into one. + uint8x16_t test0 = vcleq_u8(res0, v01); + uint8x16_t test1 = vcleq_u8(res1, v01); + uint8x16_t test2 = vcleq_u8(res2, v01); + uint8x16_t test3 = vcleq_u8(res3, v01); + uint8x16_t sum0 = + vpaddq_u8(vandq_u8(test0, bit_mask), vandq_u8(test1, bit_mask)); + uint8x16_t sum1 = + vpaddq_u8(vandq_u8(test2, bit_mask), vandq_u8(test3, bit_mask)); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + badcharmask = vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + // This is the transformation step that can be done while we are waiting for + // sum0 + uint8x16_t roll_lut; +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + if (default_or_url) { + roll_lut = + simdutf_make_uint8x16_t(0xBF, 0xE0, 0xB9, 0x13, 0x04, 0xBF, 0xBF, 0xB9, + 0xB9, 0x00, 0xFF, 0x11, 0xFF, 0xBF, 0x10, 0xB9); + } else if (base64_url) { + roll_lut = + simdutf_make_uint8x16_t(0xB9, 0xB9, 0xBF, 0xBF, 0x04, 0x11, 0xE0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + } else { + roll_lut = + simdutf_make_uint8x16_t(0xB9, 0xB9, 0xBF, 0xBF, 0x04, 0x10, 0x13, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + } +#else + if (default_or_url) { + roll_lut = uint8x16_t{0xBF, 0xE0, 0xB9, 0x13, 0x04, 0xBF, 0xBF, 0xB9, + 0xB9, 0x00, 0xFF, 0x11, 0xFF, 0xBF, 0x10, 0xB9}; + } else if (base64_url) { + roll_lut = uint8x16_t{0xB9, 0xB9, 0xBF, 0xBF, 0x04, 0x11, 0xE0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + } else { + roll_lut = uint8x16_t{0xB9, 0xB9, 0xBF, 0xBF, 0x04, 0x10, 0x13, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + } +#endif + uint8x16_t roll0, roll1, roll2, roll3; + if (default_or_url) { +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t delta_asso = + simdutf_make_uint8x16_t(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16); +#else + const uint8x16_t delta_asso = + uint8x16_t{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16}; +#endif + // the logic of translating is based on westmere + uint8x16_t delta_hash0 = + vrhaddq_u8(vqtbl1q_u8(delta_asso, lo_nibbles0), hi_bits0); + uint8x16_t delta_hash1 = + vrhaddq_u8(vqtbl1q_u8(delta_asso, lo_nibbles1), hi_bits1); + uint8x16_t delta_hash2 = + vrhaddq_u8(vqtbl1q_u8(delta_asso, lo_nibbles2), hi_bits2); + uint8x16_t delta_hash3 = + vrhaddq_u8(vqtbl1q_u8(delta_asso, lo_nibbles3), hi_bits3); + const uint8x16x2_t roll_lut_2 = {roll_lut, roll_lut}; + roll0 = vqtbl2q_u8(roll_lut_2, delta_hash0); + roll1 = vqtbl2q_u8(roll_lut_2, delta_hash1); + roll2 = vqtbl2q_u8(roll_lut_2, delta_hash2); + roll3 = vqtbl2q_u8(roll_lut_2, delta_hash3); + } else { + uint8x16_t delta_hash0 = vclzq_u8(res0); + uint8x16_t delta_hash1 = vclzq_u8(res1); + uint8x16_t delta_hash2 = vclzq_u8(res2); + uint8x16_t delta_hash3 = vclzq_u8(res3); + roll0 = vqtbl1q_u8(roll_lut, delta_hash0); + roll1 = vqtbl1q_u8(roll_lut, delta_hash1); + roll2 = vqtbl1q_u8(roll_lut, delta_hash2); + roll3 = vqtbl1q_u8(roll_lut, delta_hash3); + } + + b->chunks[0] = vaddq_u8(b->chunks[0], roll0); + b->chunks[1] = vaddq_u8(b->chunks[1], roll1); + b->chunks[2] = vaddq_u8(b->chunks[2], roll2); + b->chunks[3] = vaddq_u8(b->chunks[3], roll3); + return badcharmask; +} + +void copy_block(block64 *b, char *output) { + vst1q_u8((uint8_t *)output, b->chunks[0]); + vst1q_u8((uint8_t *)output + 16, b->chunks[1]); + vst1q_u8((uint8_t *)output + 32, b->chunks[2]); + vst1q_u8((uint8_t *)output + 48, b->chunks[3]); +} + +uint64_t compress_block(block64 *b, uint64_t mask, char *output) { + uint64_t popcounts = + vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + uint64_t offsets = popcounts * 0x0101010101010101; + compress(b->chunks[0], uint16_t(mask), output); + compress(b->chunks[1], uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF]); + compress(b->chunks[2], uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF]); + compress(b->chunks[3], uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF]); + return offsets >> 56; +} + +// The caller of this function is responsible to ensure that there are 64 bytes +// available from reading at src. The data is read into a block64 structure. +void load_block(block64 *b, const char *src) { + b->chunks[0] = vld1q_u8(reinterpret_cast(src)); + b->chunks[1] = vld1q_u8(reinterpret_cast(src) + 16); + b->chunks[2] = vld1q_u8(reinterpret_cast(src) + 32); + b->chunks[3] = vld1q_u8(reinterpret_cast(src) + 48); +} + +// The caller of this function is responsible to ensure that there are 32 bytes +// available from reading at data. It returns a 16-byte value, narrowing with +// saturation the 16-bit words. +inline uint8x16_t load_satured(const uint16_t *data) { + uint16x8_t in1 = vld1q_u16(data); + uint16x8_t in2 = vld1q_u16(data + 8); + return vqmovn_high_u16(vqmovn_u16(in1), in2); +} + +// The caller of this function is responsible to ensure that there are 128 bytes +// available from reading at src. The data is read into a block64 structure. +void load_block(block64 *b, const char16_t *src) { + b->chunks[0] = load_satured(reinterpret_cast(src)); + b->chunks[1] = load_satured(reinterpret_cast(src) + 16); + b->chunks[2] = load_satured(reinterpret_cast(src) + 32); + b->chunks[3] = load_satured(reinterpret_cast(src) + 48); +} + +// decode 64 bytes and output 48 bytes +void base64_decode_block(char *out, const char *src) { + uint8x16x4_t str = vld4q_u8((uint8_t *)src); + uint8x16x3_t outvec; + outvec.val[0] = vsliq_n_u8(vshrq_n_u8(str.val[1], 4), str.val[0], 2); + outvec.val[1] = vsliq_n_u8(vshrq_n_u8(str.val[2], 2), str.val[1], 4); + outvec.val[2] = vsliq_n_u8(str.val[3], str.val[2], 6); + vst3q_u8((uint8_t *)out, outvec); +} + +static size_t compress_block_single(block64 *b, uint64_t mask, char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + + // Predefine the index vector +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t v1 = simdutf_make_uint8x16_t(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15); +#else // SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t v1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; +#endif // SIMDUTF_REGULAR_VISUAL_STUDIO + + switch (pos64 >> 4) { + case 0b00: { + const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); + const uint8x16_t v2 = + vcgtq_s8(vreinterpretq_s8_u8(v1), + vreinterpretq_s8_u8(v0)); // Compare greater than + const uint8x16_t sh = vsubq_u8(v1, v2); // Subtract + const uint8x16_t compressed = + vqtbl1q_u8(b->chunks[0], sh); // Table lookup (shuffle) + + vst1q_u8((uint8_t *)(output + 0 * 16), compressed); + vst1q_u8((uint8_t *)(output + 1 * 16 - 1), b->chunks[1]); + vst1q_u8((uint8_t *)(output + 2 * 16 - 1), b->chunks[2]); + vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); + } break; + + case 0b01: { + vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); + + const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); + const uint8x16_t v2 = + vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); + const uint8x16_t sh = vsubq_u8(v1, v2); + const uint8x16_t compressed = vqtbl1q_u8(b->chunks[1], sh); + + vst1q_u8((uint8_t *)(output + 1 * 16), compressed); + vst1q_u8((uint8_t *)(output + 2 * 16 - 1), b->chunks[2]); + vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); + } break; + + case 0b10: { + vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); + vst1q_u8((uint8_t *)(output + 1 * 16), b->chunks[1]); + + const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); + const uint8x16_t v2 = + vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); + const uint8x16_t sh = vsubq_u8(v1, v2); + const uint8x16_t compressed = vqtbl1q_u8(b->chunks[2], sh); + + vst1q_u8((uint8_t *)(output + 2 * 16), compressed); + vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); + } break; + + case 0b11: { + vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); + vst1q_u8((uint8_t *)(output + 1 * 16), b->chunks[1]); + vst1q_u8((uint8_t *)(output + 2 * 16), b->chunks[2]); + + const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); + const uint8x16_t v2 = + vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); + const uint8x16_t sh = vsubq_u8(v1, v2); + const uint8x16_t compressed = vqtbl1q_u8(b->chunks[3], sh); + + vst1q_u8((uint8_t *)(output + 3 * 16), compressed); + } break; + } + return 63; +} + +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +template +full_result +compress_decode_base64(char *dst, const char_type *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = + default_or_url ? tables::base64::to_base64_default_or_url_value + : (base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + auto ri = simdutf::scalar::base64::find_end(src, srclen, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + srclen = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + const char_type *const srcinit = src; + const char *const dstinit = dst; + const char_type *const srcend = src + srclen; + + constexpr size_t block_size = 10; + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const char_type *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b; + load_block(&b, src); + src += 64; + bool error = false; + uint64_t badcharmask = + to_base64_mask(&b, &error); + if (badcharmask) { + if (error && !ignore_garbage) { + src -= 64; + while (src < srcend && scalar::base64::is_eight_byte(*src) && + to_base64[uint8_t(*src)] <= 64) { + src++; + } + if (src < srcend) { + // should never happen + } + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + } + + if (badcharmask != 0) { + // optimization opportunity: check for simple masks like those made of + // continuous 1s followed by continuous 0s. And masks containing a + // single bad character. + if (is_power_of_two(badcharmask)) { + bufferptr += compress_block_single(&b, badcharmask, bufferptr); + } else { + bufferptr += compress_block(&b, badcharmask, bufferptr); + } + } else { + // optimization opportunity: if bufferptr == buffer and mask == 0, we + // can avoid the call to compress_block and decode directly. + copy_block(&b, bufferptr); + bufferptr += 64; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 1); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if ((!scalar::base64::is_eight_byte(*src) || val > 64) && + !ignore_garbage) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + base64_decode_block(dst, buffer_start); + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 4); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result( + r, size_t(src - srcinit), size_t(dst - dstinit), equallocation, + full_input_length, last_chunk_options); + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(srcinit + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(srcinit + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; + } + if (equalsigns > 0 && !ignore_garbage) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} +/* end file src/arm64/arm_base64.cpp */ +/* begin file src/arm64/arm_find.cpp */ +simdutf_really_inline const char *util_find(const char *start, const char *end, + char character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + + const size_t widestep = 64; + const size_t step = 16; + uint8x16_t char_vec = vdupq_n_u8(static_cast(character)); + + // Handle unaligned beginning + uintptr_t misalignment = reinterpret_cast(start) % step; + if (misalignment != 0) { + size_t adjustment = step - misalignment; + if (size_t(end - start) < adjustment) { + adjustment = end - start; + } + for (size_t i = 0; i < adjustment; ++i) { + if (start[i] == character) { + return start + i; + } + } + start += adjustment; + } + + // Main loop for full 64-byte chunks + while (size_t(end - start) >= widestep) { + uint8x16_t data1 = vld1q_u8(reinterpret_cast(start)); + uint8x16_t data2 = vld1q_u8(reinterpret_cast(start) + 16); + uint8x16_t data3 = vld1q_u8(reinterpret_cast(start) + 32); + uint8x16_t data4 = vld1q_u8(reinterpret_cast(start) + 48); + + uint8x16_t cmp1 = vceqq_u8(data1, char_vec); + uint8x16_t cmp2 = vceqq_u8(data2, char_vec); + uint8x16_t cmp3 = vceqq_u8(data3, char_vec); + uint8x16_t cmp4 = vceqq_u8(data4, char_vec); + uint8x16_t cmpall = vorrq_u8(vorrq_u8(cmp1, cmp2), vorrq_u8(cmp3, cmp4)); + + uint64_t mask = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmpall), 4)), 0); + + if (mask != 0) { + // Found a match, return the first one + uint64_t mask1 = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp1), 4)), 0); + if (mask1 != 0) { + // Found a match in the first chunk + int index = trailing_zeroes(mask1) / 4; // Each character maps to 4 bits + return start + index; + } + uint64_t mask2 = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp2), 4)), 0); + if (mask2 != 0) { + // Found a match in the second chunk + int index = trailing_zeroes(mask2) / 4; // Each character maps to 4 bits + return start + index + 16; + } + uint64_t mask3 = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp3), 4)), 0); + if (mask3 != 0) { + // Found a match in the third chunk + int index = trailing_zeroes(mask3) / 4; // Each character maps to 4 bits + return start + index + 32; + } + uint64_t mask4 = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp4), 4)), 0); + if (mask4 != 0) { + // Found a match in the fourth chunk + int index = trailing_zeroes(mask4) / 4; // Each character maps to 4 bits + return start + index + 48; + } + } + + start += widestep; + } + + // Main loop for full 16-byte chunks + while (size_t(end - start) >= step) { + uint8x16_t data = vld1q_u8(reinterpret_cast(start)); + uint8x16_t cmp = vceqq_u8(data, char_vec); + uint64_t mask = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4)), 0); + + if (mask != 0) { + // Found a match, return the first one + int index = trailing_zeroes(mask) / 4; // Each character maps to 4 bits + return start + index; + } + + start += step; + } + + // Handle remaining bytes with scalar loop + for (; start < end; ++start) { + if (*start == character) { + return start; + } + } + + return end; +} + +simdutf_really_inline const char16_t *util_find(const char16_t *start, + const char16_t *end, + char16_t character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + + const size_t step = 8; + uint16x8_t char_vec = vdupq_n_u16(character); + + // Handle unaligned beginning + uintptr_t misalignment = + reinterpret_cast(start) % (step * sizeof(char16_t)); + if (misalignment != 0 && misalignment % 2 == 0) { + size_t adjustment = + (step * sizeof(char16_t) - misalignment) / sizeof(char16_t); + if (size_t(end - start) < adjustment) { + adjustment = end - start; + } + for (size_t i = 0; i < adjustment; ++i) { + if (start[i] == character) { + return start + i; + } + } + start += adjustment; + } + + // Main loop for full 8-element chunks with unrolling + while (size_t(end - start) >= 4 * step) { + uint16x8_t data1 = vld1q_u16(reinterpret_cast(start)); + uint16x8_t data2 = + vld1q_u16(reinterpret_cast(start) + step); + uint16x8_t data3 = + vld1q_u16(reinterpret_cast(start) + 2 * step); + uint16x8_t data4 = + vld1q_u16(reinterpret_cast(start) + 3 * step); + + uint16x8_t cmp1 = vceqq_u16(data1, char_vec); + uint16x8_t cmp2 = vceqq_u16(data2, char_vec); + uint16x8_t cmp3 = vceqq_u16(data3, char_vec); + uint16x8_t cmp4 = vceqq_u16(data4, char_vec); + + uint64_t mask1 = vget_lane_u64( + vreinterpret_u64_u16(vshrn_n_u32(vreinterpretq_u32_u16(cmp1), 4)), 0); + if (mask1 != 0) { + int index = trailing_zeroes(mask1) / 8; + return start + index; + } + + uint64_t mask2 = vget_lane_u64( + vreinterpret_u64_u16(vshrn_n_u32(vreinterpretq_u32_u16(cmp2), 4)), 0); + if (mask2 != 0) { + int index = trailing_zeroes(mask2) / 8; + return start + index + step; + } + + uint64_t mask3 = vget_lane_u64( + vreinterpret_u64_u16(vshrn_n_u32(vreinterpretq_u32_u16(cmp3), 4)), 0); + if (mask3 != 0) { + int index = trailing_zeroes(mask3) / 8; + return start + index + 2 * step; + } + + uint64_t mask4 = vget_lane_u64( + vreinterpret_u64_u16(vshrn_n_u32(vreinterpretq_u32_u16(cmp4), 4)), 0); + if (mask4 != 0) { + int index = trailing_zeroes(mask4) / 8; + return start + index + 3 * step; + } + + start += 4 * step; + } + + // Main loop for full 8-element chunks + while (size_t(end - start) >= step) { + uint16x8_t data = vld1q_u16(reinterpret_cast(start)); + uint16x8_t cmp = vceqq_u16(data, char_vec); + uint64_t mask = vget_lane_u64( + vreinterpret_u64_u16(vshrn_n_u32(vreinterpretq_u32_u16(cmp), 4)), 0); + + if (mask != 0) { + int index = trailing_zeroes(mask) / 8; + return start + index; + } + + start += step; + } + + // Handle remaining elements with scalar loop + for (; start < end; ++start) { + if (*start == character) { + return start; + } + } + + return end; +} +/* end file src/arm64/arm_find.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/arm64/arm_convert_utf32_to_latin1.cpp */ +std::pair +arm_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + while (end - buf >= 8) { + uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf)); + uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf + 4)); + + uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2)); + if (vmaxvq_u16(utf16_packed) <= 0xff) { + // 1. pack the bytes + uint8x8_t latin1_packed = vmovn_u16(utf16_packed); + // 2. store (8 bytes) + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +std::pair +arm_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; + + while (end - buf >= 8) { + uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf)); + uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf + 4)); + + uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2)); + + if (vmaxvq_u16(utf16_packed) <= 0xff) { + // 1. pack the bytes + uint8x8_t latin1_packed = vmovn_u16(utf16_packed); + // 2. store (8 bytes) + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 8; k++) { + uint32_t word = buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/arm64/arm_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 +/* begin file src/arm64/arm_convert_utf32_to_utf16.cpp */ +struct expansion_result_t { + size_t u16count; + uint8x16_t compressed_v; +}; + +// This function is used to check for invalid UTF-32 characters +// and surrogate pairs in the input +simdutf_really_inline uint64_t invalid_utf32(const uint32x4x2_t in) { + const auto standardmax = vdupq_n_u32(0x10ffff); + const auto v_d800 = vdupq_n_u32(0xd800); + const auto v_fffff800 = vdupq_n_u32(0xfffff800); + const auto too_large1 = vcgtq_u32(in.val[0], standardmax); + const auto too_large2 = vcgtq_u32(in.val[1], standardmax); + const auto surrogate1 = vceqq_u32(vandq_u32(in.val[0], v_fffff800), v_d800); + const auto surrogate2 = vceqq_u32(vandq_u32(in.val[1], v_fffff800), v_d800); + const auto err1 = vorrq_u32(too_large1, surrogate1); + const auto err2 = vorrq_u32(too_large2, surrogate2); + const auto err = + vuzp2q_u16(vreinterpretq_u16_u32(err1), vreinterpretq_u16_u32(err2)); + + return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(err, 8)), 0); +} + +// This function is used to check for surrogate pairs in the input +simdutf_really_inline uint64_t fast_invalid_utf32(const uint32x4x2_t in) { + const auto v_d800 = vdupq_n_u32(0xd800); + const auto v_fffff800 = vdupq_n_u32(0xfffff800); + const auto surrogate1 = vceqq_u32(vandq_u32(in.val[0], v_fffff800), v_d800); + const auto surrogate2 = vceqq_u32(vandq_u32(in.val[1], v_fffff800), v_d800); + const auto err = vuzp2q_u16(vreinterpretq_u16_u32(surrogate1), + vreinterpretq_u16_u32(surrogate2)); + return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(err, 8)), 0); +} + +template +simdutf_really_inline expansion_result_t +neon_expand_surrogate(const uint32x4_t in) { + const uint32x4_t v_ffff0000 = vdupq_n_u32(0xffff0000); + const uint32x4_t non_surrogate_mask = vceqzq_u32(vandq_u32(in, v_ffff0000)); + const uint64_t cmp_bits = + vget_lane_u64(vreinterpret_u64_u32(vshrn_n_u64( + vreinterpretq_u64_u32(non_surrogate_mask), 31)), + 0); + const uint8_t mask = + uint8_t(~((cmp_bits & 0x3) | ((cmp_bits >> 30) & 0xc)) & 0xf); + const uint32x4_t v_10000 = vdupq_n_u32(0x00010000); + const uint32x4_t t0 = vsubq_u32(in, v_10000); + const uint32x4_t t1 = vandq_u32(t0, vdupq_n_u32(0xfffff)); + const uint32x4_t t2 = vshrq_n_u32(t1, 10); + const uint32x4_t t3 = vsliq_n_u32(t2, t1, 16); + const uint32x4_t surrogates = vorrq_u32( + vandq_u32(t3, vdupq_n_u32(0x03ff03ff)), vdupq_n_u32(0xdc00d800)); + const uint8x16_t merged = + vreinterpretq_u8_u32(vbslq_u32(non_surrogate_mask, in, surrogates)); + + const uint8x16_t shuffle_v = vld1q_u8(reinterpret_cast( + (byte_order == endianness::LITTLE) + ? tables::utf32_to_utf16::pack_utf32_to_utf16le[mask] + : tables::utf32_to_utf16::pack_utf32_to_utf16be[mask])); + + const size_t u16count = 4 + vget_lane_u8(vcnt_u8(vcreate_u8(mask)), 0); + const uint8x16_t compressed_v = vqtbl1q_u8(merged, shuffle_v); + + return {u16count, compressed_v}; +} + +template +std::pair +arm_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *end = buf + len; + + uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0); + // To avoid buffer overflow while writing compressed_v + const size_t safety_margin = 4; + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { + uint32x4x2_t in = vld1q_u32_x2(reinterpret_cast(buf)); + + // Check if no bits set above 16th + uint32_t max_val = vmaxvq_u32(vmaxq_u32(in.val[0], in.val[1])); + if (simdutf_likely(max_val <= 0xFFFF)) { + uint16x8_t utf16_packed = vuzp1q_u16(vreinterpretq_u16_u32(in.val[0]), + vreinterpretq_u16_u32(in.val[1])); + + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + forbidden_bytemask = + vorrq_u16(vceqq_u16(vandq_u16(utf16_packed, v_f800), v_d800), + forbidden_bytemask); + + if simdutf_constexpr (!match_system(big_endian)) { + utf16_packed = vreinterpretq_u16_u8( + vrev16q_u8(vreinterpretq_u8_u16(utf16_packed))); + } + vst1q_u16(utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; + } else { + if (simdutf_unlikely(fast_invalid_utf32(in) || max_val > 0x10ffff)) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + expansion_result_t res = neon_expand_surrogate(in.val[0]); + vst1q_u8(reinterpret_cast(utf16_output), res.compressed_v); + utf16_output += res.u16count; + res = neon_expand_surrogate(in.val[1]); + vst1q_u8(reinterpret_cast(utf16_output), res.compressed_v); + utf16_output += res.u16count; + buf += 8; + } + } + + // check for invalid input + if (vmaxvq_u32(vreinterpretq_u32_u16(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, reinterpret_cast(utf16_output)); + } + + return std::make_pair(buf, reinterpret_cast(utf16_output)); +} + +template +std::pair +arm_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + // To avoid buffer overflow while writing compressed_v + const size_t safety_margin = 4; + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { + uint32x4x2_t in = vld1q_u32_x2(reinterpret_cast(buf)); + + // Check if no bits set above 16th + uint32_t max_val = vmaxvq_u32(vmaxq_u32(in.val[0], in.val[1])); + if (simdutf_likely(max_val <= 0xFFFF)) { + uint16x8_t utf16_packed = vuzp1q_u16(vreinterpretq_u16_u32(in.val[0]), + vreinterpretq_u16_u32(in.val[1])); + + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t forbidden_bytemask = + vceqq_u16(vandq_u16(utf16_packed, v_f800), v_d800); + if (vmaxvq_u16(forbidden_bytemask) != 0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf16_output)); + } + + if simdutf_constexpr (!match_system(big_endian)) { + utf16_packed = vreinterpretq_u16_u8( + vrev16q_u8(vreinterpretq_u8_u16(utf16_packed))); + } + vst1q_u16(utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; + } else { + const uint64_t err = + max_val <= 0x10ffff ? fast_invalid_utf32(in) : invalid_utf32(in); + if (simdutf_unlikely(err)) { + const size_t pos = trailing_zeroes(err) / 8; + for (size_t k = 0; k < pos; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + const uint32_t word = buf[pos]; + const size_t error_pos = buf - start + pos; + if (word > 0x10FFFF) { + return {result(error_code::TOO_LARGE, error_pos), + reinterpret_cast(utf16_output)}; + } + if (word >= 0xD800 && word <= 0xDFFF) { + return {result(error_code::SURROGATE, error_pos), + reinterpret_cast(utf16_output)}; + } + return {result(error_code::OTHER, error_pos), + reinterpret_cast(utf16_output)}; + } + expansion_result_t res = neon_expand_surrogate(in.val[0]); + vst1q_u8(reinterpret_cast(utf16_output), res.compressed_v); + utf16_output += res.u16count; + res = neon_expand_surrogate(in.val[1]); + vst1q_u8(reinterpret_cast(utf16_output), res.compressed_v); + utf16_output += res.u16count; + buf += 8; + } + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf16_output)); +} +/* end file src/arm64/arm_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF8 +/* begin file src/arm64/arm_convert_utf32_to_utf8.cpp */ +std::pair +arm_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *end = buf + len; + + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + + uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (buf + 16 + safety_margin < end) { + uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); + uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf + 4)); + + // Check if no bits set above 16th + if (vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp) + uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin)); + if (vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + uint8x8_t utf8_packed = vmovn_u16(utf16_packed); + // 2. store (8 bytes) + vst1_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; // we are done for this round! + } + + if (vmaxvq_u16(utf16_packed) <= 0x7FF) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); + const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16( + vbslq_u16(one_byte_bytemask, utf16_packed, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff); + forbidden_bytemask = + vorrq_u16(vandq_u16(vcleq_u16(utf16_packed, v_dfff), + vcgeq_u16(utf16_packed, v_d800)), + forbidden_bytemask); + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t dup_even = simdutf_make_uint16x8_t( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#else + const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +#endif + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const uint16x8_t t0 = + vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), + vreinterpretq_u8_u16(dup_even))); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + const uint16x8_t s1 = + vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + const uint16x8_t s1s = vshlq_n_u16(s1, 2); + // [00bb|bbbb|0000|aaaa] + const uint16x8_t s2 = vorrq_u16(s0, s1s); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); + const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); + const uint16x8_t one_or_two_bytes_bytemask = + vcleq_u16(utf16_packed, v_07ff); + const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), + one_or_two_bytes_bytemask); + const uint16x8_t s4 = veorq_u16(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); + const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t onemask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); + const uint16x8_t twomask = simdutf_make_uint16x8_t( + 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); +#else + const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0100, 0x0400, 0x1000, 0x4000}; + const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, + 0x0200, 0x0800, 0x2000, 0x8000}; +#endif + const uint16x8_t combined = + vorrq_u16(vandq_u16(one_byte_bytemask, onemask), + vandq_u16(one_or_two_bytes_bytemask, twomask)); + const uint16_t mask = vaddvq_u16(combined); + // The following fast path may or may not be beneficial. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); + vst1q_u8(utf8_output, utf8_0); + utf8_output += 12; + vst1q_u8(utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + + vst1q_u8(utf8_output, utf8_0); + utf8_output += row0[0]; + vst1q_u8(utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + // check for invalid input + if (vmaxvq_u16(forbidden_bytemask) != 0) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} + +std::pair +arm_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (buf + 16 + safety_margin < end) { + uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); + uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf + 4)); + + // Check if no bits set above 16th + if (vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp) + uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin)); + if (vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + uint8x8_t utf8_packed = vmovn_u16(utf16_packed); + // 2. store (8 bytes) + vst1_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; // we are done for this round! + } + + if (vmaxvq_u16(utf16_packed) <= 0x7FF) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); + const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16( + vbslq_u16(one_byte_bytemask, utf16_packed, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + + // check for invalid input + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff); + const uint16x8_t forbidden_bytemask = vandq_u16( + vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800)); + if (vmaxvq_u16(forbidden_bytemask) != 0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); + } + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t dup_even = simdutf_make_uint16x8_t( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#else + const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +#endif + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const uint16x8_t t0 = + vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), + vreinterpretq_u8_u16(dup_even))); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + const uint16x8_t s1 = + vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + const uint16x8_t s1s = vshlq_n_u16(s1, 2); + // [00bb|bbbb|0000|aaaa] + const uint16x8_t s2 = vorrq_u16(s0, s1s); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); + const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); + const uint16x8_t one_or_two_bytes_bytemask = + vcleq_u16(utf16_packed, v_07ff); + const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), + one_or_two_bytes_bytemask); + const uint16x8_t s4 = veorq_u16(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); + const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t onemask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); + const uint16x8_t twomask = simdutf_make_uint16x8_t( + 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); +#else + const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0100, 0x0400, 0x1000, 0x4000}; + const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, + 0x0200, 0x0800, 0x2000, 0x8000}; +#endif + const uint16x8_t combined = + vorrq_u16(vandq_u16(one_byte_bytemask, onemask), + vandq_u16(one_or_two_bytes_bytemask, twomask)); + const uint16_t mask = vaddvq_u16(combined); + // The following fast path may or may not be beneficial. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); + vst1q_u8(utf8_output, utf8_0); + utf8_output += 12; + vst1q_u8(utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + + vst1q_u8(utf8_output, utf8_0); + utf8_output += row0[0]; + vst1q_u8(utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/arm64/arm_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF8 + +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf + +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace arm64 { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); + +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} + +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; +} + +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_validation { + +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); +} + +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; + } + + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + } + } + + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_validation { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); +} + +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); +} + +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); +} + +} // namespace utf8_validation +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace ascii_validation { + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); + + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + return false; + } + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + return in.is_ascii(); +} + +} // namespace ascii_validation +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_utf16 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } + + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_utf16 { + +using namespace simd; + +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + // transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_utf32 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_utf32 { + +using namespace simd; + +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +// other functions +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf16 { + +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; + } + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} + +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); + + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; + } + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} + +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; + + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } + + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + // transcoding from UTF-8 to Latin 1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, + + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace arm64 +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +// +// Implementation-specific overrides +// +namespace simdutf { +namespace arm64 { + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + // todo: reimplement as a one-pass algorithm. + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return arm64::utf8_validation::generic_validate_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return arm64::utf8_validation::generic_validate_utf8_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return arm64::ascii_validation::generic_validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return arm64::ascii_validation::generic_validate_ascii_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char16_t *tail = + arm_validate_utf16_as_ascii(buf, len); + if (tail) { + return scalar::utf16::validate_as_ascii( + tail, len - (tail - buf)); + } else { + return false; + } +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char16_t *tail = arm_validate_utf16_as_ascii(buf, len); + if (tail) { + return scalar::utf16::validate_as_ascii( + tail, len - (tail - buf)); + } else { + return false; + } +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char16_t *tail = arm_validate_utf16(buf, len); + if (tail) { + return scalar::utf16::validate(tail, + len - (tail - buf)); + } else { + return false; + } +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char16_t *tail = arm_validate_utf16(buf, len); + if (tail) { + return scalar::utf16::validate(tail, len - (tail - buf)); + } else { + return false; + } +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = arm_validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = arm_validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16fix_neon_64bits(input, len, output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16fix_neon_64bits(input, len, output); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char32_t *tail = arm_validate_utf32le(buf, len); + if (tail) { + return scalar::utf32::validate(tail, len - (tail - buf)); + } else { + return false; + } +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = arm_validate_utf32le_with_errors(buf, len); + if (res.count != len) { + result scalar_res = + scalar::utf32::validate_with_errors(buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + arm_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; + + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + arm_convert_latin1_to_utf16(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + arm_convert_latin1_to_utf16(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + arm_convert_latin1_to_utf32(buf, len, utf32_output); + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return arm64::utf8_to_latin1::convert_valid(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16le_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return 0; + } + std::pair ret = + arm_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf32_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + // See + // https://lemire.me/blog/2023/05/15/computing-the-utf-8-size-of-a-latin-1-string-quickly-arm-neon-edition/ + // credit to Pete Cawley + const uint8_t *data = reinterpret_cast(input); + uint64_t result = 0; + const int lanes = sizeof(uint8x16_t); + uint8_t rem = length % lanes; + const uint8_t *simd_end = data + (length / lanes) * lanes; + const uint8x16_t threshold = vdupq_n_u8(0x80); + for (; data < simd_end; data += lanes) { + // load 16 bytes + uint8x16_t input_vec = vld1q_u8(data); + // compare to threshold (0x80) + uint8x16_t withhighbit = vcgeq_u8(input_vec, threshold); + // vertical addition + result -= vaddvq_s8(vreinterpretq_s8_u8(withhighbit)); + } + return result + (length / lanes) * lanes + + scalar::latin1::utf8_length_from_latin1((const char *)simd_end, rem); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return arm64_utf8_length_from_utf16_bytemask(input, + length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return arm64_utf8_length_from_utf16_bytemask(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8(input, length); +} +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return arm64_utf8_length_from_utf16_with_replacement( + input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return arm64_utf8_length_from_utf16_with_replacement(input, + length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const uint32x4_t v_7f = vmovq_n_u32((uint32_t)0x7f); + const uint32x4_t v_7ff = vmovq_n_u32((uint32_t)0x7ff); + const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); + const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1); + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos)); + const uint32x4_t ascii_bytes_bytemask = vcleq_u32(in, v_7f); + const uint32x4_t one_two_bytes_bytemask = vcleq_u32(in, v_7ff); + const uint32x4_t two_bytes_bytemask = + veorq_u32(one_two_bytes_bytemask, ascii_bytes_bytemask); + const uint32x4_t three_bytes_bytemask = + veorq_u32(vcleq_u32(in, v_ffff), one_two_bytes_bytemask); + + const uint16x8_t reduced_ascii_bytes_bytemask = + vreinterpretq_u16_u32(vandq_u32(ascii_bytes_bytemask, v_1)); + const uint16x8_t reduced_two_bytes_bytemask = + vreinterpretq_u16_u32(vandq_u32(two_bytes_bytemask, v_1)); + const uint16x8_t reduced_three_bytes_bytemask = + vreinterpretq_u16_u32(vandq_u32(three_bytes_bytemask, v_1)); + + const uint16x8_t compressed_bytemask0 = + vpaddq_u16(reduced_ascii_bytes_bytemask, reduced_two_bytes_bytemask); + const uint16x8_t compressed_bytemask1 = + vpaddq_u16(reduced_three_bytes_bytemask, reduced_three_bytes_bytemask); + + size_t ascii_count = count_ones( + vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 0)); + size_t two_bytes_count = count_ones( + vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 1)); + size_t three_bytes_count = count_ones( + vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask1), 0)); + + count += 16 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; + } + return count + + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); + const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1); + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos)); + const uint32x4_t surrogate_bytemask = vcgtq_u32(in, v_ffff); + const uint16x8_t reduced_bytemask = + vreinterpretq_u16_u32(vandq_u32(surrogate_bytemask, v_1)); + const uint16x8_t compressed_bytemask = + vpaddq_u16(reduced_bytemask, reduced_bytemask); + size_t surrogate_count = count_ones( + vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask), 0)); + count += 4 + surrogate_count; + } + return count + + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + return encode_base64(output, input, length, options); +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + return encode_base64_impl(output, input, length, options, line_length); +} + +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + return util_find(start, end, character); +} + +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + return util_find(start, end, character); +} +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace arm64 +} // namespace simdutf + +/* begin file src/simdutf/arm64/end.h */ +#undef SIMDUTF_SIMD_HAS_BYTEMASK +/* end file src/simdutf/arm64/end.h */ +/* end file src/arm64/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_FALLBACK +/* begin file src/fallback/implementation.cpp */ +/* begin file src/simdutf/fallback/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "fallback" +// #define SIMDUTF_IMPLEMENTATION fallback +/* end file src/simdutf/fallback/begin.h */ + +namespace simdutf { +namespace fallback { + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + int out = 0; + // todo: reimplement as a one-pass algorithm. + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return scalar::utf8::validate(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return scalar::utf8::validate_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return scalar::ascii::validate(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return scalar::ascii::validate_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return scalar::utf16::validate_as_ascii(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return scalar::utf16::validate_as_ascii(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + return scalar::utf16::validate(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + return scalar::utf16::validate(buf, len); +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + return scalar::utf16::validate_with_errors(buf, len); +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + return scalar::utf16::validate_with_errors(buf, len); +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return scalar::utf16::to_well_formed_utf16(input, len, + output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return scalar::utf16::to_well_formed_utf16(input, len, + output); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + return scalar::utf32::validate(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + return scalar::utf32::validate_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + return scalar::latin1_to_utf8::convert(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::latin1_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::latin1_to_utf16::convert(buf, len, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::latin1_to_utf32::convert(buf, len, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf8_to_latin1::convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf8_to_latin1::convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf8_to_latin1::convert_valid(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert_valid(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert_valid(buf, len, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf8_to_utf32::convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf8_to_utf32::convert_with_errors(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return scalar::utf8_to_utf32::convert_valid(input, size, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert(buf, len, + latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert(buf, len, + latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert_with_errors( + buf, len, latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert_with_errors( + buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert_valid( + buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert_valid(buf, len, + latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert(buf, len, + utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert(buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_with_errors( + buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_with_errors( + buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_valid(buf, len, + utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_valid(buf, len, + utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf32_to_latin1::convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf32_to_latin1::convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf32_to_latin1::convert_valid(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert(buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_valid( + buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_valid(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert(buf, len, + utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert(buf, len, + utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_with_errors( + buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_with_errors( + buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_valid( + buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_valid(buf, len, + utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + scalar::utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return scalar::utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return scalar::utf8::count_code_points(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + return scalar::latin1_to_utf8::utf8_length_from_latin1(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16(input, + length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf32_length_from_utf16(input, + length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf32_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return scalar::utf8::utf16_length_from_utf8(input, length); +} +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::LITTLE>(input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::BIG>(input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return scalar::utf32::utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return scalar::utf32::utf16_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return scalar::utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 + +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return simdutf::scalar::base64::base64_to_binary_details_impl( + input, length, output, options, last_chunk_options); +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return simdutf::scalar::base64::base64_to_binary_details_impl( + input, length, output, options, last_chunk_options); +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return simdutf::scalar::base64::base64_to_binary_details_impl( + input, length, output, options, last_chunk_options); +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return simdutf::scalar::base64::base64_to_binary_details_impl( + input, length, output, options, last_chunk_options); +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + return scalar::base64::tail_encode_base64(output, input, length, options); +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + return scalar::base64::tail_encode_base64_impl(output, input, length, + options, line_length); +} + +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + return std::find(start, end, character); +} + +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + return std::find(start, end, character); +} +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace fallback +} // namespace simdutf + +/* begin file src/simdutf/fallback/end.h */ +/* end file src/simdutf/fallback/end.h */ +/* end file src/fallback/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_ICELAKE +/* begin file src/icelake/implementation.cpp */ +#include +#include + +/* begin file src/simdutf/icelake/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "icelake" +// #define SIMDUTF_IMPLEMENTATION icelake + +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_TARGET_ICELAKE +#endif + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +// clang-format off +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +// clang-format on +#endif // end of workaround +/* end file src/simdutf/icelake/begin.h */ +namespace simdutf { +namespace icelake { +namespace { +#ifndef SIMDUTF_ICELAKE_H + #error "icelake.h must be included" +#endif +using namespace simd; + +/* begin file src/icelake/icelake_macros.inl.cpp */ + +/* + This upcoming macro (SIMDUTF_ICELAKE_TRANSCODE16) takes 16 + 4 bytes (of a + UTF-8 string) and loads all possible 4-byte substring into an AVX512 + register. + + For example if we have bytes abcdefgh... we create following 32-bit lanes + + [abcd|bcde|cdef|defg|efgh|...] + ^ ^ + byte 0 of reg byte 63 of reg +*/ +/** pshufb + # lane{0,1,2} have got bytes: [ 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, + 11, 12, 13, 14, 15] # lane3 has got bytes: [ 16, 17, 18, 19, 4, 5, + 6, 8, 9, 10, 11, 12, 13, 14, 15] + + expand_ver2 = [ + # lane 0: + 0, 1, 2, 3, + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + + # lane 1: + 4, 5, 6, 7, + 5, 6, 7, 8, + 6, 7, 8, 9, + 7, 8, 9, 10, + + # lane 2: + 8, 9, 10, 11, + 9, 10, 11, 12, + 10, 11, 12, 13, + 11, 12, 13, 14, + + # lane 3 order: 13, 14, 15, 16 14, 15, 16, 17, 15, 16, 17, 18, 16, + 17, 18, 19 12, 13, 14, 15, 13, 14, 15, 0, 14, 15, 0, 1, 15, 0, 1, 2, + ] +*/ + +#define SIMDUTF_ICELAKE_TRANSCODE16(LANE0, LANE1, MASKED) \ + { \ + const __m512i merged = _mm512_mask_mov_epi32(LANE0, 0x1000, LANE1); \ + const __m512i expand_ver2 = _mm512_setr_epi64( \ + 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, \ + 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, \ + 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); \ + const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); \ + \ + __mmask16 leading_bytes; \ + const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); \ + const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); \ + const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); \ + leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); \ + \ + __m512i char_class; \ + char_class = _mm512_srli_epi32(input, 4); \ + /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ \ + const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); \ + const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); \ + char_class = \ + _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); \ + \ + const int valid_count = static_cast(count_ones(leading_bytes)); \ + const __m512i utf32 = expanded_utf8_to_utf32(char_class, input); \ + \ + const __m512i out = _mm512_mask_compress_epi32(_mm512_setzero_si512(), \ + leading_bytes, utf32); \ + \ + if (UTF32) { \ + if (MASKED) { \ + const __mmask16 valid = uint16_t((1 << valid_count) - 1); \ + _mm512_mask_storeu_epi32((__m512i *)output, valid, out); \ + } else { \ + _mm512_storeu_si512((__m512i *)output, out); \ + } \ + output += valid_count; \ + } else { \ + if (MASKED) { \ + output += utf32_to_utf16_masked( \ + byteflip, out, valid_count, reinterpret_cast(output)); \ + } else { \ + output += utf32_to_utf16( \ + byteflip, out, valid_count, reinterpret_cast(output)); \ + } \ + } \ + } + +#define SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(INPUT, VALID_COUNT, MASKED) \ + { \ + if (UTF32) { \ + if (MASKED) { \ + const __mmask16 valid_mask = uint16_t((1 << VALID_COUNT) - 1); \ + _mm512_mask_storeu_epi32((__m512i *)output, valid_mask, INPUT); \ + } else { \ + _mm512_storeu_si512((__m512i *)output, INPUT); \ + } \ + output += VALID_COUNT; \ + } else { \ + if (MASKED) { \ + output += utf32_to_utf16_masked( \ + byteflip, INPUT, VALID_COUNT, \ + reinterpret_cast(output)); \ + } else { \ + output += \ + utf32_to_utf16(byteflip, INPUT, VALID_COUNT, \ + reinterpret_cast(output)); \ + } \ + } \ + } + +#define SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) \ + if (UTF32) { \ + const __m128i t0 = _mm512_castsi512_si128(utf8); \ + const __m128i t1 = _mm512_extracti32x4_epi32(utf8, 1); \ + const __m128i t2 = _mm512_extracti32x4_epi32(utf8, 2); \ + const __m128i t3 = _mm512_extracti32x4_epi32(utf8, 3); \ + _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ + _mm512_cvtepu8_epi32(t0)); \ + _mm512_storeu_si512((__m512i *)(output + 1 * 16), \ + _mm512_cvtepu8_epi32(t1)); \ + _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ + _mm512_cvtepu8_epi32(t2)); \ + _mm512_storeu_si512((__m512i *)(output + 3 * 16), \ + _mm512_cvtepu8_epi32(t3)); \ + } else { \ + const __m256i h0 = _mm512_castsi512_si256(utf8); \ + const __m256i h1 = _mm512_extracti64x4_epi64(utf8, 1); \ + if (big_endian) { \ + _mm512_storeu_si512( \ + (__m512i *)(output + 0 * 16), \ + _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h0), byteflip)); \ + _mm512_storeu_si512( \ + (__m512i *)(output + 2 * 16), \ + _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h1), byteflip)); \ + } else { \ + _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ + _mm512_cvtepu8_epi16(h0)); \ + _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ + _mm512_cvtepu8_epi16(h1)); \ + } \ + } +/* end file src/icelake/icelake_macros.inl.cpp */ +/* begin file src/icelake/icelake_common.inl.cpp */ +// file included directly +/** + * Store the last N bytes of previous followed by 512-N bytes from input. + */ +template __m512i prev(__m512i input, __m512i previous) { + static_assert(N <= 32, "N must be no larger than 32"); + const __m512i movemask = + _mm512_setr_epi32(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous); +#if SIMDUTF_GCC8 || SIMDUTF_GCC9 + constexpr int shift = 16 - N; // workaround for GCC8,9 + return _mm512_alignr_epi8(input, rotated, shift); +#else + return _mm512_alignr_epi8(input, rotated, 16 - N); +#endif // SIMDUTF_GCC8 || SIMDUTF_GCC9 +} + +template +__m512i shuffle_epi128(__m512i v) { + static_assert((idx0 >= 0 && idx0 <= 3), "idx0 must be in range 0..3"); + static_assert((idx1 >= 0 && idx1 <= 3), "idx1 must be in range 0..3"); + static_assert((idx2 >= 0 && idx2 <= 3), "idx2 must be in range 0..3"); + static_assert((idx3 >= 0 && idx3 <= 3), "idx3 must be in range 0..3"); + + constexpr unsigned shuffle = idx0 | (idx1 << 2) | (idx2 << 4) | (idx3 << 6); + return _mm512_shuffle_i32x4(v, v, shuffle); +} + +template constexpr __m512i broadcast_epi128(__m512i v) { + return shuffle_epi128(v); +} + +simdutf_really_inline __m512i broadcast_128bit_lane(__m128i lane) { + const __m512i tmp = _mm512_castsi128_si512(lane); + + return broadcast_epi128<0>(tmp); +} +/* end file src/icelake/icelake_common.inl.cpp */ +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/icelake/icelake_utf8_common.inl.cpp */ +// Common procedures for both validating and non-validating conversions from +// UTF-8. +enum block_processing_mode { SIMDUTF_FULL, SIMDUTF_TAIL }; + +using utf8_to_utf16_result = std::pair; +using utf8_to_utf32_result = std::pair; + +/* + process_block_utf8_to_utf16 converts up to 64 bytes from 'in' from UTF-8 + to UTF-16. When tail = SIMDUTF_FULL, then the full input buffer (64 bytes) + might be used. When tail = SIMDUTF_TAIL, we take into account 'gap' which + indicates how many input bytes are relevant. + + Returns true when the result is correct, otherwise it returns false. + + The provided in and out pointers are advanced according to how many input + bytes have been processed, upon success. +*/ +template +simdutf_really_inline bool +process_block_utf8_to_utf16(const char *&in, char16_t *&out, size_t gap) { + // constants + __m512i mask_identity = _mm512_set_epi8( + 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, + 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, + 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1, 0); + __m512i mask_c0c0c0c0 = _mm512_set1_epi32(0xc0c0c0c0); + __m512i mask_80808080 = _mm512_set1_epi32(0x80808080); + __m512i mask_f0f0f0f0 = _mm512_set1_epi32(0xf0f0f0f0); + __m512i mask_dfdfdfdf_tail = _mm512_set_epi64( + 0xffffdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, + 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, + 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf); + __m512i mask_c2c2c2c2 = _mm512_set1_epi32(0xc2c2c2c2); + __m512i mask_ffffffff = _mm512_set1_epi32(0xffffffff); + __m512i mask_d7c0d7c0 = _mm512_set1_epi32(0xd7c0d7c0); + __m512i mask_dc00dc00 = _mm512_set1_epi32(0xdc00dc00); + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + // Note that 'tail' is a compile-time constant ! + __mmask64 b = + (tail == SIMDUTF_FULL) ? 0xFFFFFFFFFFFFFFFF : (uint64_t(1) << gap) - 1; + __m512i input = (tail == SIMDUTF_FULL) ? _mm512_loadu_si512(in) + : _mm512_maskz_loadu_epi8(b, in); + __mmask64 m1 = (tail == SIMDUTF_FULL) + ? _mm512_cmplt_epu8_mask(input, mask_80808080) + : _mm512_mask_cmplt_epu8_mask(b, input, mask_80808080); + if (_ktestc_mask64_u8(m1, + b)) { // NOT(m1) AND b -- if all zeroes, then all ASCII + // alternatively, we could do 'if (m1 == b) { ' + if (tail == SIMDUTF_FULL) { + in += 64; // consumed 64 bytes + // we convert a full 64-byte block, writing 128 bytes. + __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); + if (big_endian) { + input1 = _mm512_shuffle_epi8(input1, byteflip); + } + _mm512_storeu_si512(out, input1); + out += 32; + __m512i input2 = + _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1)); + if (big_endian) { + input2 = _mm512_shuffle_epi8(input2, byteflip); + } + _mm512_storeu_si512(out, input2); + out += 32; + return true; // we are done + } else { + in += gap; + if (gap <= 32) { + __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); + if (big_endian) { + input1 = _mm512_shuffle_epi8(input1, byteflip); + } + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << (gap)) - 1), + input1); + out += gap; + } else { + __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); + if (big_endian) { + input1 = _mm512_shuffle_epi8(input1, byteflip); + } + _mm512_storeu_si512(out, input1); + out += 32; + __m512i input2 = + _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1)); + if (big_endian) { + input2 = _mm512_shuffle_epi8(input2, byteflip); + } + _mm512_mask_storeu_epi16( + out, __mmask32((uint32_t(1) << (gap - 32)) - 1), input2); + out += gap - 32; + } + return true; // we are done + } + } + // classify characters further + __mmask64 m234 = _mm512_cmp_epu8_mask( + mask_c0c0c0c0, input, + _MM_CMPINT_LE); // 0xc0 <= input, 2, 3, or 4 leading byte + __mmask64 m34 = + _mm512_cmp_epu8_mask(mask_dfdfdfdf_tail, input, + _MM_CMPINT_LT); // 0xdf < input, 3 or 4 leading byte + + __mmask64 milltwobytes = _mm512_mask_cmp_epu8_mask( + m234, input, mask_c2c2c2c2, + _MM_CMPINT_LT); // 0xc0 <= input < 0xc2 (illegal two byte sequence) + // Overlong 2-byte sequence + if (_ktestz_mask64_u8(milltwobytes, milltwobytes) == 0) { + // Overlong 2-byte sequence + return false; + } + if (_ktestz_mask64_u8(m34, m34) == 0) { + // We have a 3-byte sequence and/or a 2-byte sequence, or possibly even a + // 4-byte sequence! + __mmask64 m4 = _mm512_cmp_epu8_mask( + input, mask_f0f0f0f0, + _MM_CMPINT_NLT); // 0xf0 <= zmm0 (4 byte start bytes) + + __mmask64 mask_not_ascii = (tail == SIMDUTF_FULL) + ? _knot_mask64(m1) + : _kand_mask64(_knot_mask64(m1), b); + + __mmask64 mp1 = _kshiftli_mask64(m234, 1); + __mmask64 mp2 = _kshiftli_mask64(m34, 2); + // We could do it as follows... + // if (_kortestz_mask64_u8(m4,m4)) { // compute the bitwise OR of the 64-bit + // masks a and b and return 1 if all zeroes but GCC generates better code + // when we do: + if (m4 == 0) { // compute the bitwise OR of the 64-bit masks a and b and + // return 1 if all zeroes + // Fast path with 1,2,3 bytes + __mmask64 mc = _kor_mask64(mp1, mp2); // expected continuation bytes + __mmask64 m1234 = _kor_mask64(m1, m234); + // mismatched continuation bytes: + if (tail == SIMDUTF_FULL) { + __mmask64 xnormcm1234 = _kxnor_mask64( + mc, + m1234); // XNOR of mc and m1234 should be all zero if they differ + // the presence of a 1 bit indicates that they overlap. + // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return + // 1 if all zeroes. + if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { + return false; + } + } else { + __mmask64 bxorm1234 = _kxor_mask64(b, m1234); + if (mc != bxorm1234) { + return false; + } + } + // mend: identifying the last bytes of each sequence to be decoded + __mmask64 mend = _kshiftri_mask64(m1234, 1); + if (tail != SIMDUTF_FULL) { + mend = _kor_mask64(mend, (uint64_t(1) << (gap - 1))); + } + + __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity); + __m512i last_and_thirdu16 = + _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third)); + + __m512i nonasciitags = _mm512_maskz_mov_epi8( + mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000 other: 11000000 + __m512i clearedbytes = _mm512_andnot_si512( + nonasciitags, input); // high two bits cleared where not ASCII + __m512i lastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, last_and_thirdu16, + clearedbytes); // the last byte of each character + + __mmask64 mask_before_non_ascii = _kshiftri_mask64( + mask_not_ascii, 1); // bytes that precede non-ASCII bytes + __m512i indexofsecondlastbytes = _mm512_add_epi16( + mask_ffffffff, last_and_thirdu16); // indices of the second last bytes + __m512i beforeasciibytes = + _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes); + __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, indexofsecondlastbytes, + beforeasciibytes); // the second last bytes (of two, three byte seq, + // surrogates) + secondlastbytes = + _mm512_slli_epi16(secondlastbytes, 6); // shifted into position + + __m512i indexofthirdlastbytes = _mm512_add_epi16( + mask_ffffffff, + indexofsecondlastbytes); // indices of the second last bytes + __m512i thirdlastbyte = + _mm512_maskz_mov_epi8(m34, + clearedbytes); // only those that are the third + // last byte of a sequence + __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, indexofthirdlastbytes, + thirdlastbyte); // the third last bytes (of three byte sequences, hi + // surrogate) + thirdlastbytes = + _mm512_slli_epi16(thirdlastbytes, 12); // shifted into position + __m512i Wout = _mm512_ternarylogic_epi32(lastbytes, secondlastbytes, + thirdlastbytes, 254); + // the elements of Wout excluding the last element if it happens to be a + // high surrogate: + + __mmask64 mprocessed = + (tail == SIMDUTF_FULL) + ? _pdep_u64(0xFFFFFFFF, mend) + : _pdep_u64( + 0xFFFFFFFF, + _kand_mask64( + mend, b)); // we adjust mend at the end of the output. + + // Encodings out of range... + { + // the location of 3-byte sequence start bytes in the input + __mmask64 m3 = m34 & (b ^ m4); + // code units in Wout corresponding to 3-byte sequences. + __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend)); + __m512i mask_08000800 = _mm512_set1_epi32(0x08000800); + __mmask32 Msmall800 = + _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800); + __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800); + __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800); + __mmask32 M3s = + _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800); + if (_kor_mask32(Msmall800, M3s)) { + return false; + } + } + int64_t nout = _mm_popcnt_u64(mprocessed); + in += 64 - _lzcnt_u64(mprocessed); + if (big_endian) { + Wout = _mm512_shuffle_epi8(Wout, byteflip); + } + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout); + out += nout; + return true; // ok + } + // + // We have a 4-byte sequence, this is the general case. + // Slow! + __mmask64 mp3 = _kshiftli_mask64(m4, 3); + __mmask64 mc = + _kor_mask64(_kor_mask64(mp1, mp2), mp3); // expected continuation bytes + __mmask64 m1234 = _kor_mask64(m1, m234); + + // mend: identifying the last bytes of each sequence to be decoded + __mmask64 mend = + _kor_mask64(_kshiftri_mask64(_kor_mask64(mp3, m1234), 1), mp3); + if (tail != SIMDUTF_FULL) { + mend = _kor_mask64(mend, __mmask64(uint64_t(1) << (gap - 1))); + } + __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity); + __m512i last_and_thirdu16 = + _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third)); + + __m512i nonasciitags = _mm512_maskz_mov_epi8( + mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000 other: 11000000 + __m512i clearedbytes = _mm512_andnot_si512( + nonasciitags, input); // high two bits cleared where not ASCII + __m512i lastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, last_and_thirdu16, + clearedbytes); // the last byte of each character + + __mmask64 mask_before_non_ascii = _kshiftri_mask64( + mask_not_ascii, 1); // bytes that precede non-ASCII bytes + __m512i indexofsecondlastbytes = _mm512_add_epi16( + mask_ffffffff, last_and_thirdu16); // indices of the second last bytes + __m512i beforeasciibytes = + _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes); + __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, indexofsecondlastbytes, + beforeasciibytes); // the second last bytes (of two, three byte seq, + // surrogates) + secondlastbytes = + _mm512_slli_epi16(secondlastbytes, 6); // shifted into position + + __m512i indexofthirdlastbytes = _mm512_add_epi16( + mask_ffffffff, + indexofsecondlastbytes); // indices of the second last bytes + __m512i thirdlastbyte = _mm512_maskz_mov_epi8( + m34, + clearedbytes); // only those that are the third last byte of a sequence + __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, indexofthirdlastbytes, + thirdlastbyte); // the third last bytes (of three byte sequences, hi + // surrogate) + thirdlastbytes = + _mm512_slli_epi16(thirdlastbytes, 12); // shifted into position + __m512i thirdsecondandlastbytes = _mm512_ternarylogic_epi32( + lastbytes, secondlastbytes, thirdlastbytes, 254); + uint64_t Mlo_uint64 = _pext_u64(mp3, mend); + __mmask32 Mlo = __mmask32(Mlo_uint64); + __mmask32 Mhi = __mmask32(Mlo_uint64 >> 1); + __m512i lo_surr_mask = _mm512_maskz_mov_epi16( + Mlo, + mask_dc00dc00); // lo surr: 1101110000000000, other: 0000000000000000 + __m512i shifted4_thirdsecondandlastbytes = + _mm512_srli_epi16(thirdsecondandlastbytes, + 4); // hi surr: 00000WVUTSRQPNML vuts = WVUTS - 1 + __m512i tagged_lo_surrogates = _mm512_or_si512( + thirdsecondandlastbytes, + lo_surr_mask); // lo surr: 110111KJHGFEDCBA, other: unchanged + __m512i Wout = _mm512_mask_add_epi16( + tagged_lo_surrogates, Mhi, shifted4_thirdsecondandlastbytes, + mask_d7c0d7c0); // hi sur: 110110vutsRQPNML, other: unchanged + // the elements of Wout excluding the last element if it happens to be a + // high surrogate: + __mmask32 Mout = ~(Mhi & 0x80000000); + __mmask64 mprocessed = + (tail == SIMDUTF_FULL) + ? _pdep_u64(Mout, mend) + : _pdep_u64( + Mout, + _kand_mask64(mend, + b)); // we adjust mend at the end of the output. + + // mismatched continuation bytes: + if (tail == SIMDUTF_FULL) { + __mmask64 xnormcm1234 = _kxnor_mask64( + mc, m1234); // XNOR of mc and m1234 should be all zero if they differ + // the presence of a 1 bit indicates that they overlap. + // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return 1 + // if all zeroes. + if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { + return false; + } + } else { + __mmask64 bxorm1234 = _kxor_mask64(b, m1234); + if (mc != bxorm1234) { + return false; + } + } + // Encodings out of range... + { + // the location of 3-byte sequence start bytes in the input + __mmask64 m3 = m34 & (b ^ m4); + // code units in Wout corresponding to 3-byte sequences. + __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend)); + __m512i mask_08000800 = _mm512_set1_epi32(0x08000800); + __mmask32 Msmall800 = + _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800); + __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800); + __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800); + __mmask32 M3s = + _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800); + __m512i mask_04000400 = _mm512_set1_epi32(0x04000400); + __mmask32 M4s = + _mm512_mask_cmpge_epu16_mask(Mhi, Moutminusd800, mask_04000400); + if (!_kortestz_mask32_u8(M4s, _kor_mask32(Msmall800, M3s))) { + return false; + } + } + in += 64 - _lzcnt_u64(mprocessed); + int64_t nout = _mm_popcnt_u64(mprocessed); + if (big_endian) { + Wout = _mm512_shuffle_epi8(Wout, byteflip); + } + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout); + out += nout; + return true; // ok + } + // Fast path 2: all ASCII or 2 byte + __mmask64 continuation_or_ascii = (tail == SIMDUTF_FULL) + ? _knot_mask64(m234) + : _kand_mask64(_knot_mask64(m234), b); + // on top of -0xc0 we subtract -2 which we get back later of the + // continuation byte tags + __m512i leading2byte = _mm512_maskz_sub_epi8(m234, input, mask_c2c2c2c2); + __mmask64 leading = tail == (tail == SIMDUTF_FULL) + ? _kor_mask64(m1, m234) + : _kand_mask64(_kor_mask64(m1, m234), + b); // first bytes of each sequence + if (tail == SIMDUTF_FULL) { + __mmask64 xnor234leading = + _kxnor_mask64(_kshiftli_mask64(m234, 1), leading); + if (!_kortestz_mask64_u8(xnor234leading, xnor234leading)) { + return false; + } + } else { + __mmask64 bxorleading = _kxor_mask64(b, leading); + if (_kshiftli_mask64(m234, 1) != bxorleading) { + return false; + } + } + // + if (tail == SIMDUTF_FULL) { + // In the two-byte/ASCII scenario, we are easily latency bound, so we want + // to increment the input buffer as quickly as possible. + // We process 32 bytes unless the byte at index 32 is a continuation byte, + // in which case we include it as well for a total of 33 bytes. + // Note that if x is an ASCII byte, then the following is false: + // int8_t(x) <= int8_t(0xc0) under two's complement. + in += 32; + if (int8_t(*in) <= int8_t(0xc0)) + in++; + // The alternative is to do + // in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); + // but it requires loading the input, doing the mask computation, and + // converting back the mask to a general register. It just takes too long, + // leaving the processor likely to be idle. + } else { + in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); + } + __m512i lead = _mm512_maskz_compress_epi8( + leading, leading2byte); // will contain zero for ascii, and the data + lead = _mm512_cvtepu8_epi16( + _mm512_castsi512_si256(lead)); // ... zero extended into code units + __m512i follow = _mm512_maskz_compress_epi8( + continuation_or_ascii, input); // the last bytes of each sequence + follow = _mm512_cvtepu8_epi16( + _mm512_castsi512_si256(follow)); // ... zero extended into code units + lead = _mm512_slli_epi16(lead, 6); // shifted into position + __m512i final = _mm512_add_epi16(follow, lead); // combining lead and follow + + if (big_endian) { + final = _mm512_shuffle_epi8(final, byteflip); + } + if (tail == SIMDUTF_FULL) { + // Next part is UTF-16 specific and can be generalized to UTF-32. + int nout = _mm_popcnt_u32(uint32_t(leading)); + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final); + out += nout; // UTF-8 to UTF-16 is only expansionary in this case. + } else { + int nout = int(_mm_popcnt_u64(_pdep_u64(0xFFFFFFFF, leading))); + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final); + out += nout; // UTF-8 to UTF-16 is only expansionary in this case. + } + + return true; // we are fine. +} + +/* + utf32_to_utf16_masked converts `count` lower UTF-32 code units + from input `utf32` into UTF-16. It differs from utf32_to_utf16 + in that it 'masks' the writes. + + Returns how many 16-bit code units were stored. + + byteflip is used for flipping 16-bit code units, and it should be + __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809 + ); + We pass it to the (always inlined) function to encourage the compiler to + keep the value in a (constant) register. +*/ +template +simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, + __m512i utf32, + unsigned int count, + char16_t *output) { + + const __mmask16 valid = uint16_t((1 << count) - 1); + // 1. check if we have any surrogate pairs + const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff); + const __mmask16 sp_mask = + _mm512_mask_cmpgt_epu32_mask(valid, utf32, v_0000_ffff); + + if (sp_mask == 0) { + if (big_endian) { + _mm256_mask_storeu_epi16( + (__m256i *)output, valid, + _mm256_shuffle_epi8(_mm512_cvtepi32_epi16(utf32), + _mm512_castsi512_si256(byteflip))); + + } else { + _mm256_mask_storeu_epi16((__m256i *)output, valid, + _mm512_cvtepi32_epi16(utf32)); + } + return count; + } + + { + // build surrogate pair code units in 32-bit lanes + + // t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb] + const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000); + const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000); + + // t1 = 8 x [000000aaaaaaaaaa|bbbbbbbbbb000000] + const __m512i t1 = _mm512_slli_epi32(t0, 6); + + // t2 = 8 x [000000aaaaaaaaaa|aaaaaabbbbbbbbbb] -- copy hi word from t1 + // to t0 + // 0xe4 = (t1 and v_ffff_0000) or (t0 and not v_ffff_0000) + const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000); + const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4); + + // t2 = 8 x [110110aaaaaaaaaa|110111bbbbbbbbbb] -- copy hi word from t1 + // to t0 + // 0xba = (t2 and not v_fc00_fc000) or v_d800_dc00 + const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00); + const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00); + const __m512i t3 = + _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba); + const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3); + __m512i t5 = _mm512_ror_epi32(t4, 16); + // Here we want to trim all of the upper 16-bit code units from the 2-byte + // characters represented as 4-byte values. We can compute it from + // sp_mask or the following... It can be more optimized! + const __mmask32 nonzero = _kor_mask32( + 0xaaaaaaaa, _mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512())); + const __mmask32 nonzero_masked = + _kand_mask32(nonzero, __mmask32((uint64_t(1) << (2 * count)) - 1)); + if (big_endian) { + t5 = _mm512_shuffle_epi8(t5, byteflip); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(nonzero_masked, t5); + _mm512_mask_storeu_epi16( + output, _bzhi_u32(0xFFFFFFFF, count + _mm_popcnt_u32(sp_mask)), + compressed); + //_mm512_mask_compressstoreu_epi16(output, nonzero_masked, t5); + } + + return count + static_cast(count_ones(sp_mask)); +} + +/* + utf32_to_utf16 converts `count` lower UTF-32 code units + from input `utf32` into UTF-16. It may overflow. + + Returns how many 16-bit code units were stored. + + byteflip is used for flipping 16-bit code units, and it should be + __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809 + ); + We pass it to the (always inlined) function to encourage the compiler to + keep the value in a (constant) register. +*/ +template +simdutf_really_inline size_t utf32_to_utf16(const __m512i byteflip, + __m512i utf32, unsigned int count, + char16_t *output) { + // check if we have any surrogate pairs + const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff); + const __mmask16 sp_mask = _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff); + + if (sp_mask == 0) { + // technically, it should be _mm256_storeu_epi16 + if (big_endian) { + _mm256_storeu_si256( + (__m256i *)output, + _mm256_shuffle_epi8(_mm512_cvtepi32_epi16(utf32), + _mm512_castsi512_si256(byteflip))); + } else { + _mm256_storeu_si256((__m256i *)output, _mm512_cvtepi32_epi16(utf32)); + } + return count; + } + + { + // build surrogate pair code units in 32-bit lanes + + // t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb] + const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000); + const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000); + + // t1 = 8 x [000000aaaaaaaaaa|bbbbbbbbbb000000] + const __m512i t1 = _mm512_slli_epi32(t0, 6); + + // t2 = 8 x [000000aaaaaaaaaa|aaaaaabbbbbbbbbb] -- copy hi word from t1 + // to t0 + // 0xe4 = (t1 and v_ffff_0000) or (t0 and not v_ffff_0000) + const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000); + const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4); + + // t2 = 8 x [110110aaaaaaaaaa|110111bbbbbbbbbb] -- copy hi word from t1 + // to t0 + // 0xba = (t2 and not v_fc00_fc000) or v_d800_dc00 + const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00); + const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00); + const __m512i t3 = + _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba); + const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3); + __m512i t5 = _mm512_ror_epi32(t4, 16); + const __mmask32 nonzero = _kor_mask32( + 0xaaaaaaaa, _mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512())); + if (big_endian) { + t5 = _mm512_shuffle_epi8(t5, byteflip); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (zen4) + __m512i compressed = _mm512_maskz_compress_epi16(nonzero, t5); + _mm512_mask_storeu_epi16( + output, + (1 << (count + static_cast(count_ones(sp_mask)))) - 1, + compressed); + //_mm512_mask_compressstoreu_epi16(output, nonzero, t5); + } + + return count + static_cast(count_ones(sp_mask)); +} + +/* + expanded_utf8_to_utf32 converts expanded UTF-8 characters (`utf8`) + stored at separate 32-bit lanes. + + For each lane we have also a character class (`char_class), given in form + 0x8080800N, where N is 4 highest bits from the leading byte; 0x80 resets + corresponding bytes during pshufb. +*/ +simdutf_really_inline __m512i expanded_utf8_to_utf32(__m512i char_class, + __m512i utf8) { + /* + Input: + - utf8: bytes stored at separate 32-bit code units + - valid: which code units have valid UTF-8 characters + + Bit layout of single word. We show 4 cases for each possible + UTF-8 character encoding. The `?` denotes bits we must not + assume their value. + + |10dd.dddd|10cc.cccc|10bb.bbbb|1111.0aaa| 4-byte char + |????.????|10cc.cccc|10bb.bbbb|1110.aaaa| 3-byte char + |????.????|????.????|10bb.bbbb|110a.aaaa| 2-byte char + |????.????|????.????|????.????|0aaa.aaaa| ASCII char + byte 3 byte 2 byte 1 byte 0 + */ + + /* 1. Reset control bits of continuation bytes and the MSB + of the leading byte; this makes all bytes unsigned (and + does not alter ASCII char). + + |00dd.dddd|00cc.cccc|00bb.bbbb|0111.0aaa| 4-byte char + |00??.????|00cc.cccc|00bb.bbbb|0110.aaaa| 3-byte char + |00??.????|00??.????|00bb.bbbb|010a.aaaa| 2-byte char + |00??.????|00??.????|00??.????|0aaa.aaaa| ASCII char + ^^ ^^ ^^ ^ + */ + __m512i values; + const __m512i v_3f3f_3f7f = _mm512_set1_epi32(0x3f3f3f7f); + values = _mm512_and_si512(utf8, v_3f3f_3f7f); + + /* 2. Swap and join fields A-B and C-D + + |0000.cccc|ccdd.dddd|0001.110a|aabb.bbbb| 4-byte char + |0000.cccc|cc??.????|0001.10aa|aabb.bbbb| 3-byte char + |0000.????|????.????|0001.0aaa|aabb.bbbb| 2-byte char + |0000.????|????.????|000a.aaaa|aa??.????| ASCII char */ + const __m512i v_0140_0140 = _mm512_set1_epi32(0x01400140); + values = _mm512_maddubs_epi16(values, v_0140_0140); + + /* 3. Swap and join fields AB & CD + + |0000.0001|110a.aabb|bbbb.cccc|ccdd.dddd| 4-byte char + |0000.0001|10aa.aabb|bbbb.cccc|cc??.????| 3-byte char + |0000.0001|0aaa.aabb|bbbb.????|????.????| 2-byte char + |0000.000a|aaaa.aa??|????.????|????.????| ASCII char */ + const __m512i v_0001_1000 = _mm512_set1_epi32(0x00011000); + values = _mm512_madd_epi16(values, v_0001_1000); + + /* 4. Shift left the values by variable amounts to reset highest UTF-8 bits + |aaab.bbbb|bccc.cccd|dddd.d000|0000.0000| 4-byte char -- by 11 + |aaaa.bbbb|bbcc.cccc|????.??00|0000.0000| 3-byte char -- by 10 + |aaaa.abbb|bbb?.????|????.???0|0000.0000| 2-byte char -- by 9 + |aaaa.aaa?|????.????|????.????|?000.0000| ASCII char -- by 7 */ + { + /** pshufb + + continuation = 0 + ascii = 7 + _2_bytes = 9 + _3_bytes = 10 + _4_bytes = 11 + + shift_left_v3 = 4 * [ + ascii, # 0000 + ascii, # 0001 + ascii, # 0010 + ascii, # 0011 + ascii, # 0100 + ascii, # 0101 + ascii, # 0110 + ascii, # 0111 + continuation, # 1000 + continuation, # 1001 + continuation, # 1010 + continuation, # 1011 + _2_bytes, # 1100 + _2_bytes, # 1101 + _3_bytes, # 1110 + _4_bytes, # 1111 + ] */ + const __m512i shift_left_v3 = _mm512_setr_epi64( + 0x0707070707070707, 0x0b0a090900000000, 0x0707070707070707, + 0x0b0a090900000000, 0x0707070707070707, 0x0b0a090900000000, + 0x0707070707070707, 0x0b0a090900000000); + + const __m512i shift = _mm512_shuffle_epi8(shift_left_v3, char_class); + values = _mm512_sllv_epi32(values, shift); + } + + /* 5. Shift right the values by variable amounts to reset lowest bits + |0000.0000|000a.aabb|bbbb.cccc|ccdd.dddd| 4-byte char -- by 11 + |0000.0000|0000.0000|aaaa.bbbb|bbcc.cccc| 3-byte char -- by 16 + |0000.0000|0000.0000|0000.0aaa|aabb.bbbb| 2-byte char -- by 21 + |0000.0000|0000.0000|0000.0000|0aaa.aaaa| ASCII char -- by 25 */ + { + // 4 * [25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 21, 21, 16, 11] + const __m512i shift_right = _mm512_setr_epi64( + 0x1919191919191919, 0x0b10151500000000, 0x1919191919191919, + 0x0b10151500000000, 0x1919191919191919, 0x0b10151500000000, + 0x1919191919191919, 0x0b10151500000000); + + const __m512i shift = _mm512_shuffle_epi8(shift_right, char_class); + values = _mm512_srlv_epi32(values, shift); + } + + return values; +} + +simdutf_really_inline __m512i expand_and_identify(__m512i lane0, __m512i lane1, + int &count) { + const __m512i merged = _mm512_mask_mov_epi32(lane0, 0x1000, lane1); + const __m512i expand_ver2 = _mm512_setr_epi64( + 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, + 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, + 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); + const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); + const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); + const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); + const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); + const __mmask16 leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); + count = static_cast(count_ones(leading_bytes)); + return _mm512_mask_compress_epi32(_mm512_setzero_si512(), leading_bytes, + input); +} + +simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) { + __m512i char_class = _mm512_srli_epi32(input, 4); + /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ + const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); + const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); + char_class = + _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); + return expanded_utf8_to_utf32(char_class, input); +} +/* end file src/icelake/icelake_utf8_common.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/icelake/icelake_utf8_validation.inl.cpp */ +// file included directly + +simdutf_really_inline __m512i check_special_cases(__m512i input, + const __m512i prev1) { + __m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080); + const __m512i v_0f = _mm512_set1_epi8(0x0f); + __m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f); + + __m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1); + __m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb); + __m512i index2 = _mm512_and_si512(prev1, v_0f); + + __m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2); + __m512i mask3 = + _mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101, + 0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6, + 0x101010101010101, 0x1010101babaaee6); + __m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f); + __m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3); + return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128); +} + +simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input, + const __m512i prev_input, + const __m512i sc) { + __m512i prev2 = prev<2>(input, prev_input); + __m512i prev3 = prev<3>(input, prev_input); + __m512i is_third_byte = _mm512_subs_epu8( + prev2, _mm512_set1_epi8(0b11100000u - 1)); // Only 111_____ will be > 0 + __m512i is_fourth_byte = _mm512_subs_epu8( + prev3, _mm512_set1_epi8(0b11110000u - 1)); // Only 1111____ will be > 0 + __m512i is_third_or_fourth_byte = + _mm512_or_si512(is_third_byte, is_fourth_byte); + const __m512i v_7f = _mm512_set1_epi8(char(0x7f)); + is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte); + // We want to compute (is_third_or_fourth_byte AND v80) XOR sc. + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc, + 0b1101010); + //__m512i is_third_or_fourth_byte_mask = + //_mm512_and_si512(is_third_or_fourth_byte, v_80); return + // _mm512_xor_si512(is_third_or_fourth_byte_mask, sc); +} +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline __m512i is_incomplete(const __m512i input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + __m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xbfdfefffffffffff); + return _mm512_subs_epu8(input, max_value); +} + +struct avx512_utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + __m512i error{}; + + // The last input we received + __m512i prev_input_block{}; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + __m512i prev_incomplete{}; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const __m512i input, + const __m512i prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + __m512i prev1 = prev<1>(input, prev_input); + __m512i sc = check_special_cases(input, prev1); + this->error = _mm512_or_si512( + check_multibyte_lengths(input, prev_input, sc), this->error); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error = _mm512_or_si512(this->error, this->prev_incomplete); + } + + // returns true if ASCII. + simdutf_really_inline bool check_next_input(const __m512i input) { + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80); + if (ascii == 0) { + this->error = _mm512_or_si512(this->error, this->prev_incomplete); + return true; + } else { + this->check_utf8_bytes(input, this->prev_input_block); + this->prev_incomplete = is_incomplete(input); + this->prev_input_block = input; + return false; + } + } + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return _mm512_test_epi8_mask(this->error, this->error) != 0; + } +}; // struct avx512_utf8_checker +/* end file src/icelake/icelake_utf8_validation.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && \ + (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) +/* begin file src/icelake/icelake_from_valid_utf8.inl.cpp */ +// file included directly + +// File contains conversion procedure from VALID UTF-8 strings. + +/* + valid_utf8_to_fixed_length converts a valid UTF-8 string into UTF-32. + + The `OUTPUT` template type decides what to do with UTF-32: store + it directly or convert into UTF-16 (with AVX512). + + Input: + - str - valid UTF-8 string + - len - string length + - out_buffer - output buffer + + Result: + - pair.first - the first unprocessed input byte + - pair.second - the first unprocessed output word +*/ +template +std::pair +valid_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { + constexpr bool UTF32 = std::is_same::value; + constexpr bool UTF16 = std::is_same::value; + static_assert( + UTF32 or UTF16, + "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); + static_assert(!(UTF32 and big_endian), + "we do not currently support big-endian UTF-32"); + + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + const char *ptr = str; + const char *end = ptr + len; + + OUTPUT *output = dwords; + /** + * In the main loop, we consume 64 bytes per iteration, + * but we access 64 + 4 bytes. + * We check for ptr + 64 + 64 <= end because + * we want to be do maskless writes without overruns. + */ + while (end - ptr >= 64 + 4) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80); + if (ascii == 0) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + continue; + } + + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + const __m512i lane3 = broadcast_epi128<3>(utf8); + int valid_count2; + __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); + uint32_t tmp1; + ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); + const __m512i lane4 = _mm512_set1_epi32(tmp1); + int valid_count3; + __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); + if (valid_count2 + valid_count3 <= 16) { + vec2 = _mm512_mask_expand_epi32( + vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); + valid_count2 += valid_count3; + vec2 = expand_utf8_to_utf32(vec2); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + } else { + vec2 = expand_utf8_to_utf32(vec2); + vec3 = expand_utf8_to_utf32(vec3); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) + } + ptr += 4 * 16; + } + + if (end - ptr >= 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80); + if (ascii == 0) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + } else { + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + + const __m512i lane3 = broadcast_epi128<3>(utf8); + SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) + + ptr += 3 * 16; + } + } + return {ptr, output}; +} + +using utf8_to_utf16_result = std::pair; +/* end file src/icelake/icelake_from_valid_utf8.inl.cpp */ +/* begin file src/icelake/icelake_from_utf8.inl.cpp */ +// file included directly + +// File contains conversion procedure from possibly invalid UTF-8 strings. + +template +// todo: replace with the utf-8 to utf-16 routine adapted to utf-32. This code +// is legacy. +std::pair +validating_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { + constexpr bool UTF32 = std::is_same::value; + constexpr bool UTF16 = std::is_same::value; + static_assert( + UTF32 or UTF16, + "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); + static_assert(!(UTF32 and big_endian), + "we do not currently support big-endian UTF-32"); + + const char *ptr = str; + const char *end = ptr + len; + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + OUTPUT *output = dwords; + avx512_utf8_checker checker{}; + /** + * In the main loop, we consume 64 bytes per iteration, + * but we access 64 + 4 bytes. + * We use masked writes to avoid overruns, see + * https://github.com/simdutf/simdutf/issues/471 + */ + while (end - ptr >= 64 + 4) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + if (checker.check_next_input(utf8)) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + continue; + } + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + const __m512i lane3 = broadcast_epi128<3>(utf8); + int valid_count2; + __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); + uint32_t tmp1; + ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); + const __m512i lane4 = _mm512_set1_epi32(tmp1); + int valid_count3; + __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); + if (valid_count2 + valid_count3 <= 16) { + vec2 = _mm512_mask_expand_epi32( + vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); + valid_count2 += valid_count3; + vec2 = expand_utf8_to_utf32(vec2); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + } else { + vec2 = expand_utf8_to_utf32(vec2); + vec3 = expand_utf8_to_utf32(vec3); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) + } + ptr += 4 * 16; + } + const char *validatedptr = ptr; // validated up to ptr + + // For the final pass, we validate 64 bytes, but we only transcode + // 3*16 bytes, so we may end up double-validating 16 bytes. + if (end - ptr >= 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + if (checker.check_next_input(utf8)) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + } else { + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + + const __m512i lane3 = broadcast_epi128<3>(utf8); + SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) + + ptr += 3 * 16; + } + validatedptr += 4 * 16; + } + if (end != validatedptr) { + const __m512i utf8 = + _mm512_maskz_loadu_epi8(~UINT64_C(0) >> (64 - (end - validatedptr)), + (const __m512i *)validatedptr); + checker.check_next_input(utf8); + } + checker.check_eof(); + if (checker.errors()) { + return {ptr, nullptr}; // We found an error. + } + return {ptr, output}; +} + +// Like validating_utf8_to_fixed_length but returns as soon as an error is +// identified todo: replace with the utf-8 to utf-16 routine adapted to utf-32. +// This code is legacy. +template +std::tuple +validating_utf8_to_fixed_length_with_constant_checks(const char *str, + size_t len, + OUTPUT *dwords) { + constexpr bool UTF32 = std::is_same::value; + constexpr bool UTF16 = std::is_same::value; + static_assert( + UTF32 or UTF16, + "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); + static_assert(!(UTF32 and big_endian), + "we do not currently support big-endian UTF-32"); + + const char *ptr = str; + const char *end = ptr + len; + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + OUTPUT *output = dwords; + avx512_utf8_checker checker{}; + /** + * In the main loop, we consume 64 bytes per iteration, + * but we access 64 + 4 bytes. + */ + while (end - ptr >= 4 + 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + bool ascii = checker.check_next_input(utf8); + if (checker.errors()) { + return {ptr, output, false}; // We found an error. + } + if (ascii) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + continue; + } + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + const __m512i lane3 = broadcast_epi128<3>(utf8); + int valid_count2; + __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); + uint32_t tmp1; + ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); + const __m512i lane4 = _mm512_set1_epi32(tmp1); + int valid_count3; + __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); + if (valid_count2 + valid_count3 <= 16) { + vec2 = _mm512_mask_expand_epi32( + vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); + valid_count2 += valid_count3; + vec2 = expand_utf8_to_utf32(vec2); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + } else { + vec2 = expand_utf8_to_utf32(vec2); + vec3 = expand_utf8_to_utf32(vec3); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) + } + ptr += 4 * 16; + } + const char *validatedptr = ptr; // validated up to ptr + + // For the final pass, we validate 64 bytes, but we only transcode + // 3*16 bytes, so we may end up double-validating 16 bytes. + if (end - ptr >= 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + bool ascii = checker.check_next_input(utf8); + if (checker.errors()) { + return {ptr, output, false}; // We found an error. + } + if (ascii) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + } else { + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + + const __m512i lane3 = broadcast_epi128<3>(utf8); + SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) + + ptr += 3 * 16; + } + validatedptr += 4 * 16; + } + if (end != validatedptr) { + const __m512i utf8 = + _mm512_maskz_loadu_epi8(~UINT64_C(0) >> (64 - (end - validatedptr)), + (const __m512i *)validatedptr); + checker.check_next_input(utf8); + } + checker.check_eof(); + if (checker.errors()) { + return {ptr, output, false}; // We found an error. + } + return {ptr, output, true}; +} +/* end file src/icelake/icelake_from_utf8.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/icelake/icelake_utf16fix.cpp */ +#include + +/* + * Process one block of 32 characters. If in_place is false, + * copy the block from in to out. If there is a sequencing + * error in the block, overwrite the illsequenced characters + * with the replacement character. This function reads one + * character before the beginning of the buffer as a lookback. + * If that character is illsequenced, it too is overwritten. + */ +template +simdutf_really_inline void utf16fix_block(char16_t *out, const char16_t *in) { + const char16_t replacement = scalar::utf16::replacement(); + __m512i lookback, block, lb_masked, block_masked; + __mmask32 lb_is_high, block_is_low, illseq; + auto swap_if_needed = [](uint16_t x) simdutf_constexpr -> uint16_t { + return scalar::utf16::swap_if_needed(x); + }; + + lookback = _mm512_loadu_si512((const __m512i *)(in - 1)); + block = _mm512_loadu_si512((const __m512i *)in); + lb_masked = + _mm512_and_epi32(lookback, _mm512_set1_epi16(swap_if_needed(0xfc00U))); + block_masked = + _mm512_and_epi32(block, _mm512_set1_epi16(swap_if_needed(0xfc00U))); + + lb_is_high = _mm512_cmpeq_epi16_mask( + lb_masked, _mm512_set1_epi16(swap_if_needed(0xd800U))); + block_is_low = _mm512_cmpeq_epi16_mask( + block_masked, _mm512_set1_epi16(swap_if_needed(0xdc00U))); + illseq = _kxor_mask32(lb_is_high, block_is_low); + if (!_ktestz_mask32_u8(illseq, illseq)) { + __mmask32 lb_illseq, block_illseq; + + /* compute the cause of the illegal sequencing */ + lb_illseq = _kandn_mask32(block_is_low, lb_is_high); + block_illseq = _kor_mask32(_kandn_mask32(lb_is_high, block_is_low), + _kshiftri_mask32(lb_illseq, 1)); + + /* fix illegal sequencing in the lookback */ + lb_illseq = _kand_mask32(lb_illseq, _cvtu32_mask32(1)); + _mm512_mask_storeu_epi16(out - 1, lb_illseq, + _mm512_set1_epi16(replacement)); + + /* fix illegal sequencing in the main block */ + if (in_place) { + _mm512_mask_storeu_epi16(out, block_illseq, + _mm512_set1_epi16(replacement)); + } else { + _mm512_storeu_epi32( + out, _mm512_mask_blend_epi16(block_illseq, block, + _mm512_set1_epi16(replacement))); + } + } else if (!in_place) { + _mm512_storeu_si512((__m512i *)out, block); + } +} + +/* + * Special case for inputs of 0--32 bytes. Works for both in-place and + * out-of-place operation. + */ +template +void utf16fix_short(const char16_t *in, size_t n, char16_t *out) { + const char16_t replacement = scalar::utf16::replacement(); + __m512i lookback, block, lb_masked, block_masked; + __mmask32 lb_is_high, block_is_low, illseq; + uint32_t mask = 0xFFFFFFFF >> (32 - n); + auto swap_if_needed = [](uint16_t x) simdutf_constexpr -> uint16_t { + return scalar::utf16::swap_if_needed(x); + }; + lookback = _mm512_maskz_loadu_epi16(_cvtmask32_u32(mask << 1), + (const uint16_t *)(in - 1)); + block = _mm512_maskz_loadu_epi16(_cvtmask32_u32(mask), (const uint16_t *)in); + lb_masked = + _mm512_and_epi32(lookback, _mm512_set1_epi16(swap_if_needed(0xfc00u))); + block_masked = + _mm512_and_epi32(block, _mm512_set1_epi16(swap_if_needed(0xfc00u))); + + lb_is_high = _mm512_cmpeq_epi16_mask( + lb_masked, _mm512_set1_epi16(swap_if_needed(0xd800u))); + block_is_low = _mm512_cmpeq_epi16_mask( + block_masked, _mm512_set1_epi16(swap_if_needed(0xdc00u))); + illseq = _kxor_mask32(lb_is_high, block_is_low); + if (!_ktestz_mask32_u8(illseq, illseq)) { + __mmask32 lb_illseq, block_illseq; + + /* compute the cause of the illegal sequencing */ + lb_illseq = _kandn_mask32(block_is_low, lb_is_high); + block_illseq = _kor_mask32(_kandn_mask32(lb_is_high, block_is_low), + _kshiftri_mask32(lb_illseq, 1)); + + /* fix illegal sequencing in the main block */ + _mm512_mask_storeu_epi16( + (uint16_t *)out, _cvtmask32_u32(mask), + _mm512_mask_blend_epi16(block_illseq, block, + _mm512_set1_epi16(replacement))); + } else { + _mm512_mask_storeu_epi16((uint16_t *)out, _cvtmask32_u32(mask), block); + } + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} + +template +void utf16fix_avx512(const char16_t *in, size_t n, char16_t *out) { + const char16_t replacement = scalar::utf16::replacement(); + size_t i; + + if (n == 0) + return; + else if (n < 33) { + utf16fix_short(in, n, out); + return; + } + out[0] = + scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; + + /* duplicate code to have the compiler specialise utf16fix_block() */ + if (in == out) { + for (i = 1; i + 32 < n; i += 32) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - 32, in + n - 32); + } else { + for (i = 1; i + 32 < n; i += 32) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - 32, in + n - 32); + } + + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} +/* end file src/icelake/icelake_utf16fix.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ +// file included directly + +// File contains conversion procedure from possibly invalid UTF-8 strings. + +template +simdutf_really_inline size_t process_block_from_utf8_to_latin1( + const char *buf, size_t len, char *latin_output, __m512i minus64, + __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) { + __mmask64 load_mask = + is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL; + __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf); + __mmask64 nonascii = _mm512_movepi8_mask(input); + if (nonascii == 0) { + if (*next_leading_ptr) { // If we ended with a leading byte, it is an error. + return 0; // Indicates error + } + is_remaining + ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input) + : _mm512_storeu_si512((__m512i *)latin_output, input); + return len; + } + + const __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64); + + __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62)); + __mmask64 invalid_leading_bytes = + _mm512_mask_cmpgt_epu8_mask(leading, highbits, one); + + if (invalid_leading_bytes) { + return 0; // Indicates error + } + + __mmask64 leading_shift = (leading << 1) | *next_leading_ptr; + + if ((nonascii ^ leading) != leading_shift) { + return 0; // Indicates error + } + + const __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one); + input = + _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64); + + __mmask64 retain = ~leading & load_mask; + __m512i output = _mm512_maskz_compress_epi8(retain, input); + int64_t written_out = count_ones(retain); + if (written_out == 0) { + return 0; // Indicates error + } + *next_bit6_ptr = bit6 >> 63; + *next_leading_ptr = leading >> 63; + + __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out); + + _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output); + + return written_out; +} + +size_t utf8_to_latin1_avx512(const char *&inbuf, size_t len, + char *&inlatin_output) { + const char *buf = inbuf; + char *latin_output = inlatin_output; + char *start = latin_output; + size_t pos = 0; + __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000 + __m512i one = _mm512_set1_epi8(1); + __mmask64 next_leading = 0; + __mmask64 next_bit6 = 0; + + while (pos + 64 <= len) { + size_t written = process_block_from_utf8_to_latin1( + buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6); + if (written == 0) { + inlatin_output = latin_output; + inbuf = buf + pos - next_leading; + return 0; // Indicates error at pos or after, or just before pos (too + // short error) + } + latin_output += written; + pos += 64; + } + + if (pos < len) { + size_t remaining = len - pos; + size_t written = process_block_from_utf8_to_latin1( + buf + pos, remaining, latin_output, minus64, one, &next_leading, + &next_bit6); + if (written == 0) { + inbuf = buf + pos - next_leading; + inlatin_output = latin_output; + return 0; // Indicates error at pos or after, or just before pos (too + // short error) + } + latin_output += written; + } + if (next_leading) { + inbuf = buf + len - next_leading; + inlatin_output = latin_output; + return 0; // Indicates error at end of buffer + } + inlatin_output = latin_output; + inbuf += len; + return size_t(latin_output - start); +} +/* end file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ +/* begin file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ +// file included directly + +// File contains conversion procedure from valid UTF-8 strings. + +template +simdutf_really_inline size_t process_valid_block_from_utf8_to_latin1( + const char *buf, size_t len, char *latin_output, __m512i minus64, + __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) { + __mmask64 load_mask = + is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL; + __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf); + __mmask64 nonascii = _mm512_movepi8_mask(input); + + if (nonascii == 0) { + is_remaining + ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input) + : _mm512_storeu_si512((__m512i *)latin_output, input); + return len; + } + + __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64); + + __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62)); + + *next_leading_ptr = leading >> 63; + + __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one); + input = + _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64); + *next_bit6_ptr = bit6 >> 63; + + __mmask64 retain = ~leading & load_mask; + __m512i output = _mm512_maskz_compress_epi8(retain, input); + int64_t written_out = count_ones(retain); + if (written_out == 0) { + return 0; // Indicates error + } + __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out); + // Optimization opportunity: sometimes, masked writes are not needed. + _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output); + return written_out; +} + +size_t valid_utf8_to_latin1_avx512(const char *buf, size_t len, + char *latin_output) { + char *start = latin_output; + size_t pos = 0; + __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000 + __m512i one = _mm512_set1_epi8(1); + __mmask64 next_leading = 0; + __mmask64 next_bit6 = 0; + + while (pos + 64 <= len) { + size_t written = process_valid_block_from_utf8_to_latin1( + buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6); + latin_output += written; + pos += 64; + } + + if (pos < len) { + size_t remaining = len - pos; + size_t written = process_valid_block_from_utf8_to_latin1( + buf + pos, remaining, latin_output, minus64, one, &next_leading, + &next_bit6); + latin_output += written; + } + + return (size_t)(latin_output - start); +} +/* end file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ +// file included directly +template +size_t icelake_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + __m512i v_0xFF = _mm512_set1_epi16(0xff); + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + __m512i shufmask = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, + 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + while (end - buf >= 32) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { + return 0; + } + _mm256_storeu_si256( + (__m256i *)latin1_output, + _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + latin1_output += 32; + buf += 32; + } + if (buf < end) { + uint32_t mask(uint32_t(1 << (end - buf)) - 1); + __m512i in = _mm512_maskz_loadu_epi16(mask, buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { + return 0; + } + _mm256_mask_storeu_epi8( + latin1_output, mask, + _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + } + return len; +} + +template +std::pair +icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + const char16_t *start = buf; + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + __m512i v_0xFF = _mm512_set1_epi16(0xff); + __m512i shufmask = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, + 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + while (end - buf >= 32) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { + uint16_t word; + while ((word = scalar::utf16::swap_if_needed( + uint16_t(*buf))) <= 0xff) { + *latin1_output++ = uint8_t(word); + buf++; + } + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + latin1_output); + } + _mm256_storeu_si256( + (__m256i *)latin1_output, + _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + latin1_output += 32; + buf += 32; + } + if (buf < end) { + uint32_t mask(uint32_t(1 << (end - buf)) - 1); + __m512i in = _mm512_maskz_loadu_epi16(mask, buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { + + uint16_t word; + while ((word = scalar::utf16::swap_if_needed( + uint16_t(*buf))) <= 0xff) { + *latin1_output++ = uint8_t(word); + buf++; + } + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + latin1_output); + } + _mm256_mask_storeu_epi8( + latin1_output, mask, + _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + } + return std::make_pair(result(error_code::SUCCESS, len), latin1_output); +} +/* end file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ +// file included directly + +/** + * This function converts the input (inbuf, inlen), assumed to be valid + * UTF16 (little endian) into UTF-8 (to outbuf). The number of code units + * written is written to 'outlen' and the function reports the number of input + * word consumed. + */ +template +size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen, + unsigned char *outbuf, size_t *outlen) { + __m512i in; + __mmask32 inmask = _cvtu32_mask32(0x7fffffff); + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + const char16_t *const inbuf_orig = inbuf; + const unsigned char *const outbuf_orig = outbuf; + int adjust = 0; + int carry = 0; + + while (inlen >= 32) { + in = _mm512_loadu_si512(inbuf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + inlen -= 31; + lastiteration: + inbuf += 31; + + failiteration: + const __mmask32 is234byte = _mm512_mask_cmp_epu16_mask( + inmask, in, _mm512_set1_epi16(0x0080), _MM_CMPINT_NLT); + + if (_ktestz_mask32_u8(inmask, is234byte)) { + // fast path for ASCII only + _mm512_mask_cvtepi16_storeu_epi8(outbuf, inmask, in); + outbuf += 31; + carry = 0; + + if (inlen < 32) { + goto tail; + } else { + continue; + } + } + + const __mmask32 is12byte = + _mm512_cmp_epu16_mask(in, _mm512_set1_epi16(0x0800), _MM_CMPINT_LT); + + if (_ktestc_mask32_u8(is12byte, inmask)) { + // fast path for 1 and 2 byte only + + const __m512i twobytes = _mm512_ternarylogic_epi32( + _mm512_slli_epi16(in, 8), _mm512_srli_epi16(in, 6), + _mm512_set1_epi16(0x3f3f), 0xa8); // (A|B)&C + in = _mm512_mask_add_epi16(in, is234byte, twobytes, + _mm512_set1_epi16(int16_t(0x80c0))); + const __m512i cmpmask = + _mm512_mask_blend_epi16(inmask, _mm512_set1_epi16(int16_t(0xffff)), + _mm512_set1_epi16(0x0800)); + const __mmask64 smoosh = + _mm512_cmp_epu8_mask(in, cmpmask, _MM_CMPINT_NLT); + const __m512i out = _mm512_maskz_compress_epi8(smoosh, in); + _mm512_mask_storeu_epi8(outbuf, + _cvtu64_mask64(_pext_u64(_cvtmask64_u64(smoosh), + _cvtmask64_u64(smoosh))), + out); + outbuf += 31 + _mm_popcnt_u32(_cvtmask32_u32(is234byte)); + carry = 0; + + if (inlen < 32) { + goto tail; + } else { + continue; + } + } + __m512i lo = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)); + __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)); + + __m512i taglo = _mm512_set1_epi32(0x8080e000); + __m512i taghi = taglo; + + const __m512i fc00masked = + _mm512_and_epi32(in, _mm512_set1_epi16(int16_t(0xfc00))); + const __mmask32 hisurr = _mm512_mask_cmp_epu16_mask( + inmask, fc00masked, _mm512_set1_epi16(int16_t(0xd800)), _MM_CMPINT_EQ); + const __mmask32 losurr = _mm512_cmp_epu16_mask( + fc00masked, _mm512_set1_epi16(int16_t(0xdc00)), _MM_CMPINT_EQ); + + int carryout = 0; + if (!_kortestz_mask32_u8(hisurr, losurr)) { + // handle surrogates + + __m512i los = _mm512_alignr_epi32(hi, lo, 1); + __m512i his = _mm512_alignr_epi32(lo, hi, 1); + + const __mmask32 hisurrhi = _kshiftri_mask32(hisurr, 16); + taglo = _mm512_mask_mov_epi32(taglo, __mmask16(hisurr), + _mm512_set1_epi32(0x808080f0)); + taghi = _mm512_mask_mov_epi32(taghi, __mmask16(hisurrhi), + _mm512_set1_epi32(0x808080f0)); + + lo = _mm512_mask_slli_epi32(lo, __mmask16(hisurr), lo, 10); + hi = _mm512_mask_slli_epi32(hi, __mmask16(hisurrhi), hi, 10); + los = _mm512_add_epi32(los, _mm512_set1_epi32(0xfca02400)); + his = _mm512_add_epi32(his, _mm512_set1_epi32(0xfca02400)); + lo = _mm512_mask_add_epi32(lo, __mmask16(hisurr), lo, los); + hi = _mm512_mask_add_epi32(hi, __mmask16(hisurrhi), hi, his); + + carryout = _cvtu32_mask32(_kshiftri_mask32(hisurr, 30)); + + const uint32_t h = _cvtmask32_u32(hisurr); + const uint32_t l = _cvtmask32_u32(losurr); + // check for mismatched surrogates + if ((h + h + carry) ^ l) { + const uint32_t lonohi = l & ~(h + h + carry); + const uint32_t hinolo = h & ~(l >> 1); + inlen = _tzcnt_u32(hinolo | lonohi); + inmask = __mmask32(0x7fffffff & ((1U << inlen) - 1)); + in = _mm512_maskz_mov_epi16(inmask, in); + adjust = (int)inlen - 31; + inlen = 0; + goto failiteration; + } + } + + hi = _mm512_maskz_mov_epi32(_cvtu32_mask16(0x7fff), hi); + carry = carryout; + + __m512i mslo = + _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), lo); + + __m512i mshi = + _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), hi); + + const __mmask32 outmask = __mmask32(_kandn_mask64(losurr, inmask)); + const __mmask64 outmhi = _kshiftri_mask64(outmask, 16); + + const __mmask32 is1byte = __mmask32(_knot_mask64(is234byte)); + const __mmask64 is1bhi = _kshiftri_mask64(is1byte, 16); + const __mmask64 is12bhi = _kshiftri_mask64(is12byte, 16); + + taglo = _mm512_mask_mov_epi32(taglo, __mmask16(is12byte), + _mm512_set1_epi32(0x80c00000)); + taghi = _mm512_mask_mov_epi32(taghi, __mmask16(is12bhi), + _mm512_set1_epi32(0x80c00000)); + __m512i magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), + _mm512_set1_epi32(0xffffffff), + _mm512_set1_epi32(0x00010101)); + __m512i magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), + _mm512_set1_epi32(0xffffffff), + _mm512_set1_epi32(0x00010101)); + + magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), + _mm512_set1_epi32(0xffffffff), + _mm512_set1_epi32(0x00010101)); + magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), + _mm512_set1_epi32(0xffffffff), + _mm512_set1_epi32(0x00010101)); + + mslo = _mm512_ternarylogic_epi32(mslo, _mm512_set1_epi32(0x3f3f3f3f), taglo, + 0xea); // A&B|C + mshi = _mm512_ternarylogic_epi32(mshi, _mm512_set1_epi32(0x3f3f3f3f), taghi, + 0xea); + mslo = _mm512_mask_slli_epi32(mslo, __mmask16(is1byte), lo, 24); + + mshi = _mm512_mask_slli_epi32(mshi, __mmask16(is1bhi), hi, 24); + + const __mmask64 wantlo = + _mm512_cmp_epu8_mask(mslo, magiclo, _MM_CMPINT_NLT); + const __mmask64 wanthi = + _mm512_cmp_epu8_mask(mshi, magichi, _MM_CMPINT_NLT); + const __m512i outlo = _mm512_maskz_compress_epi8(wantlo, mslo); + const __m512i outhi = _mm512_maskz_compress_epi8(wanthi, mshi); + const uint64_t wantlo_uint64 = _cvtmask64_u64(wantlo); + const uint64_t wanthi_uint64 = _cvtmask64_u64(wanthi); + + uint64_t advlo = _mm_popcnt_u64(wantlo_uint64); + uint64_t advhi = _mm_popcnt_u64(wanthi_uint64); + + _mm512_mask_storeu_epi8( + outbuf, _cvtu64_mask64(_pext_u64(wantlo_uint64, wantlo_uint64)), outlo); + _mm512_mask_storeu_epi8( + outbuf + advlo, _cvtu64_mask64(_pext_u64(wanthi_uint64, wanthi_uint64)), + outhi); + outbuf += advlo + advhi; + } + outbuf += -adjust; + +tail: + if (inlen != 0) { + // We must have inlen < 31. + inmask = _cvtu32_mask32((1U << inlen) - 1); + in = _mm512_maskz_loadu_epi16(inmask, inbuf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + adjust = (int)inlen - 31; + inlen = 0; + goto lastiteration; + } + *outlen = (outbuf - outbuf_orig) + adjust; + return ((inbuf - inbuf_orig) + adjust); +} +/* end file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf8_to_utf16.inl.cpp */ +// file included directly + +// File contains conversion procedure from possibly invalid UTF-8 strings. + +/** + * Attempts to convert up to len 1-byte code units from in (in UTF-8 format) to + * out. + * Returns the position of the input and output after the processing is + * completed. Upon error, the output is set to null. + */ + +template +utf8_to_utf16_result +fast_avx512_convert_utf8_to_utf16(const char *in, size_t len, char16_t *out) { + const char *const final_in = in + len; + bool result = true; + while (result) { + if (final_in - in >= 64) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else if (in < final_in) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else { + break; + } + } + if (!result) { + out = nullptr; + } + return std::make_pair(in, out); +} + +template +simdutf::result fast_avx512_convert_utf8_to_utf16_with_errors(const char *in, + size_t len, + char16_t *out) { + const char *const init_in = in; + const char16_t *const init_out = out; + const char *const final_in = in + len; + bool result = true; + while (result) { + if (final_in - in >= 64) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else if (in < final_in) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else { + break; + } + } + if (!result) { + size_t pos = size_t(in - init_in); + if (pos < len && (init_in[pos] & 0xc0) == 0x80 && pos >= 64) { + // We must check whether we are the fourth continuation byte + bool c1 = (init_in[pos - 1] & 0xc0) == 0x80; + bool c2 = (init_in[pos - 2] & 0xc0) == 0x80; + bool c3 = (init_in[pos - 3] & 0xc0) == 0x80; + if (c1 && c2 && c3) { + return {simdutf::TOO_LONG, pos}; + } + } + // rewind_and_convert_with_errors will seek a potential error from in + // onward, with the ability to go back up to in - init_in bytes, and read + // final_in - in bytes forward. + simdutf::result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + in - init_in, in, final_in - in, out); + res.count += (in - init_in); + return res; + } else { + return simdutf::result(error_code::SUCCESS, out - init_out); + } +} +/* end file src/icelake/icelake_convert_utf8_to_utf16.inl.cpp */ +/* begin file src/icelake/icelake_utf8_length_from_utf16.inl.cpp */ +template +simdutf_really_inline size_t icelake_utf8_length_from_utf16(const char16_t *in, + size_t size) { + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; // 32 on AVX-512 + if (N + 1 > size) { + result scalar_result = + scalar::utf16::utf8_length_from_utf16_with_replacement( + in, size); + return scalar_result.count; + } // special case for short inputs + size_t pos = 0; + + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + + size_t count = 0; + + for (; pos < size / (2 * N) * (2 * N); pos += 2 * N) { + + __m512i input1 = + _mm512_loadu_si512(reinterpret_cast(in + pos)); + __m512i input2 = + _mm512_loadu_si512(reinterpret_cast(in + pos + N)); + + if simdutf_constexpr (!match_system(big_endian)) { + input1 = _mm512_shuffle_epi8(input1, byteflip); + input2 = _mm512_shuffle_epi8(input2, byteflip); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + __mmask32 is_surrogate1 = _mm512_cmpeq_epi16_mask( + _mm512_and_si512(input1, _mm512_set1_epi16(uint16_t(0xf800))), + _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 is_surrogate2 = _mm512_cmpeq_epi16_mask( + _mm512_and_si512(input2, _mm512_set1_epi16(uint16_t(0xf800))), + _mm512_set1_epi16(uint16_t(0xd800))); + // c0 - chars that yield 2- or 3-byte UTF-8 codes + __mmask32 c01 = + _mm512_test_epi16_mask(input1, _mm512_set1_epi16(uint16_t(0xff80))); + __mmask32 c02 = + _mm512_test_epi16_mask(input2, _mm512_set1_epi16(uint16_t(0xff80))); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + __mmask32 c11 = + _mm512_test_epi16_mask(input1, _mm512_set1_epi16(uint16_t(0xf800))); + __mmask32 c12 = + _mm512_test_epi16_mask(input2, _mm512_set1_epi16(uint16_t(0xf800))); + count += count_ones32(c01); + count += count_ones32(c11); + count -= count_ones32(is_surrogate1); + count += count_ones32(c02); + count += count_ones32(c12); + count -= count_ones32(is_surrogate2); + } + if (pos + N <= size) { + __m512i input = + _mm512_loadu_si512(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = _mm512_shuffle_epi8(input, byteflip); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + __mmask32 is_surrogate = _mm512_cmpeq_epi16_mask( + _mm512_and_si512(input, _mm512_set1_epi16(uint16_t(0xf800))), + _mm512_set1_epi16(uint16_t(0xd800))); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + __mmask32 c0 = + _mm512_test_epi16_mask(input, _mm512_set1_epi16(uint16_t(0xff80))); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + __mmask32 c1 = + _mm512_test_epi16_mask(input, _mm512_set1_epi16(uint16_t(0xf800))); + count += count_ones32(c0); + count += count_ones32(c1); + count -= count_ones32(is_surrogate); + pos += N; + } + // At this point, we have processed 'pos' char16 values and we have less than + // N remaining. + __mmask32 remaining_mask = + 0xFFFFFFFFULL >> + (32 - (size - pos)); // mask for the remaining char16 values + __m512i input = _mm512_maskz_loadu_epi16(remaining_mask, in + pos); + if simdutf_constexpr (!match_system(big_endian)) { + input = _mm512_shuffle_epi8(input, byteflip); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + __mmask32 is_surrogate = _mm512_cmpeq_epi16_mask( + _mm512_and_si512(input, _mm512_set1_epi16(uint16_t(0xf800))), + _mm512_set1_epi16(uint16_t(0xd800))); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + __mmask32 c0 = + _mm512_test_epi16_mask(input, _mm512_set1_epi16(uint16_t(0xff80))); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + __mmask32 c1 = + _mm512_test_epi16_mask(input, _mm512_set1_epi16(uint16_t(0xf800))); + count += count_ones32(c0); + count += count_ones32(c1); + count -= count_ones32(is_surrogate); + pos = size; + + count += pos; + return count; +} + +template +simdutf_really_inline result icelake_utf8_length_from_utf16_with_replacement( + const char16_t *in, size_t size) { + /////// + // We repeat 3 times the same algorithm. + // First, we proceed with an unrolled loop of 2*N char16 values (for speed). + // Second, we process N char16 values. + // Finally, we process the remaining char16 values (less than N). + /////// + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; // 32 on AVX-512 + if (N + 1 > size) { + return scalar::utf16::utf8_length_from_utf16_with_replacement( + in, size); + } // special case for short inputs + size_t pos = 0; + + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + + constexpr uint32_t straddle_mask = + match_system(big_endian) ? 0xfc00fc00 : 0x00fc00fc; + constexpr uint32_t straddle_pair = + match_system(big_endian) ? 0xdc00d800 : 0x00dc00d8; + + size_t count = 0; + bool any_surrogates = false; + // We assume all surrogates are mismatched and count here the matched + // ones. + size_t matches = 0; + + for (; pos < (size - 1) / (2 * N) * (2 * N); pos += 2 * N) { + __m512i current1 = + _mm512_loadu_si512(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + current1 = _mm512_shuffle_epi8(current1, byteflip); + } + __m512i current2 = + _mm512_loadu_si512(reinterpret_cast(in + pos + N)); + if simdutf_constexpr (!match_system(big_endian)) { + current2 = _mm512_shuffle_epi8(current2, byteflip); + } + + __mmask32 is_surrogate1 = _mm512_cmpeq_epi16_mask( + _mm512_and_si512(current1, _mm512_set1_epi16(uint16_t(0xf800))), + _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 is_surrogate2 = _mm512_cmpeq_epi16_mask( + _mm512_and_si512(current2, _mm512_set1_epi16(uint16_t(0xf800))), + _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 c01 = + _mm512_test_epi16_mask(current1, _mm512_set1_epi16(uint16_t(0xff80))); + __mmask32 c11 = + _mm512_test_epi16_mask(current1, _mm512_set1_epi16(uint16_t(0xf800))); + __mmask32 c02 = + _mm512_test_epi16_mask(current2, _mm512_set1_epi16(uint16_t(0xff80))); + __mmask32 c12 = + _mm512_test_epi16_mask(current2, _mm512_set1_epi16(uint16_t(0xf800))); + count += count_ones32(c01); + count += count_ones32(c11); + count += count_ones32(c02); + count += count_ones32(c12); + if (_kor_mask32(is_surrogate1, is_surrogate2)) { + any_surrogates = true; + __m512i lb_masked1 = + _mm512_and_si512(current1, _mm512_set1_epi16(uint16_t(0xfc00))); + __mmask32 hi_surrogates1 = _mm512_cmpeq_epi16_mask( + lb_masked1, _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 lo_surrogates1 = _mm512_cmpeq_epi16_mask( + lb_masked1, _mm512_set1_epi16(uint16_t(0xdc00))); + __m512i lb_masked2 = + _mm512_and_si512(current2, _mm512_set1_epi16(uint16_t(0xfc00))); + __mmask32 hi_surrogates2 = _mm512_cmpeq_epi16_mask( + lb_masked2, _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 lo_surrogates2 = _mm512_cmpeq_epi16_mask( + lb_masked2, _mm512_set1_epi16(uint16_t(0xdc00))); + matches += count_ones32( + _kand_mask32(_kshiftli_mask32(hi_surrogates1, 1), lo_surrogates1)); + matches += count_ones32( + _kand_mask32(_kshiftli_mask32(hi_surrogates2, 1), lo_surrogates2)); + uint32_t straddle1, straddle2; + memcpy(&straddle1, in + pos + 1 * N - 1, sizeof(uint32_t)); + memcpy(&straddle2, in + pos + 2 * N - 1, sizeof(uint32_t)); + matches += ((straddle1 & straddle_mask) == straddle_pair) + + ((straddle2 & straddle_mask) == straddle_pair); + } + } + if (pos + N + 1 <= size) { + __m512i input = + _mm512_loadu_si512(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = _mm512_shuffle_epi8(input, byteflip); + } + + __mmask32 is_surrogate = _mm512_cmpeq_epi16_mask( + _mm512_and_si512(input, _mm512_set1_epi16(uint16_t(0xf800))), + _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 c0 = + _mm512_test_epi16_mask(input, _mm512_set1_epi16(uint16_t(0xff80))); + __mmask32 c1 = + _mm512_test_epi16_mask(input, _mm512_set1_epi16(uint16_t(0xf800))); + count += count_ones32(c0); + count += count_ones32(c1); + if (is_surrogate) { + any_surrogates = true; + __m512i lb_masked = + _mm512_and_si512(input, _mm512_set1_epi16(uint16_t(0xfc00))); + __mmask32 hi_surrogates = _mm512_cmpeq_epi16_mask( + lb_masked, _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 lo_surrogates = _mm512_cmpeq_epi16_mask( + lb_masked, _mm512_set1_epi16(uint16_t(0xdc00))); + matches += count_ones32( + _kand_mask32(_kshiftli_mask32(hi_surrogates, 1), lo_surrogates)); + uint32_t straddle; + memcpy(&straddle, in + pos + N - 1, sizeof(uint32_t)); + matches += (straddle & straddle_mask) == straddle_pair; + } + pos += N; + } + + size_t overshoot = 32 - (size - pos); + __mmask32 remaining_mask(uint32_t(0xFFFFFFFFULL << overshoot)); + __m512i input = + _mm512_maskz_loadu_epi16(remaining_mask, in + pos - overshoot); + if simdutf_constexpr (!match_system(big_endian)) { + input = _mm512_shuffle_epi8(input, byteflip); + } + + __mmask32 is_surrogate = _mm512_cmpeq_epi16_mask( + _mm512_and_si512(input, _mm512_set1_epi16(uint16_t(0xf800))), + _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 c0 = + _mm512_test_epi16_mask(input, _mm512_set1_epi16(uint16_t(0xff80))); + __mmask32 c1 = + _mm512_test_epi16_mask(input, _mm512_set1_epi16(uint16_t(0xf800))); + + count += count_ones32(c0); + count += count_ones32(c1); + if (is_surrogate) { + any_surrogates = true; + __m512i lb_masked = + _mm512_and_si512(input, _mm512_set1_epi16(uint16_t(0xfc00))); + __mmask32 hi_surrogates = + _mm512_cmpeq_epi16_mask(lb_masked, _mm512_set1_epi16(uint16_t(0xd800))); + __mmask32 lo_surrogates = + _mm512_cmpeq_epi16_mask(lb_masked, _mm512_set1_epi16(uint16_t(0xdc00))); + matches += count_ones32( + _kand_mask32(_kshiftli_mask32(hi_surrogates, 1), lo_surrogates)); + } + pos = size; + count += pos; + + count -= 2 * matches; + return {any_surrogates ? SURROGATE : SUCCESS, count}; +} +/* end file src/icelake/icelake_utf8_length_from_utf16.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ +// file included directly + +/* + Returns a pair: the first unprocessed byte from buf and utf32_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::tuple +convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *end = buf + len; + const __m512i v_fc00 = _mm512_set1_epi16((uint16_t)0xfc00); + const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800); + const __m512i v_dc00 = _mm512_set1_epi16((uint16_t)0xdc00); + __mmask32 carry{0}; + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + while (std::distance(buf, end) >= 32) { + // Always safe because buf + 32 <= end so that end - buf >= 32 bytes: + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + + // H - bitmask for high surrogates + const __mmask32 H = + _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_d800); + // H - bitmask for low surrogates + const __mmask32 L = + _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_dc00); + + if ((H | L)) { + // surrogate pair(s) in a register + const __mmask32 V = + (L ^ + (carry | (H << 1))); // A high surrogate must be followed by low one + // and a low one must be preceded by a high one. + // If valid, V should be equal to 0 + + if (V == 0) { + // valid case + /* + Input surrogate pair: + |1101.11aa.aaaa.aaaa|1101.10bb.bbbb.bbbb| + low surrogate high surrogate + */ + /* 1. Expand all code units to 32-bit code units + in + |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb| + */ + const __m512i first = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)); + const __m512i second = + _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)); + + /* 2. Shift by one 16-bit word to align low surrogates with high + surrogates in + |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb| + shifted + |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa| + */ + const __m512i shifted_first = _mm512_alignr_epi32(second, first, 1); + const __m512i shifted_second = + _mm512_alignr_epi32(_mm512_setzero_si512(), second, 1); + + /* 3. Align all high surrogates in first and second by shifting to the + left by 10 bits + |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000| + */ + const __m512i aligned_first = + _mm512_mask_slli_epi32(first, (__mmask16)H, first, 10); + const __m512i aligned_second = + _mm512_mask_slli_epi32(second, (__mmask16)(H >> 16), second, 10); + + /* 4. Remove surrogate prefixes and add offset 0x10000 by adding in, + shifted and constant in + |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000| + shifted + |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa| + constant|1111.1100.1010.0000.0010.0100.0000.0000|1111.1100.1010.0000.0010.0100.0000.0000| + */ + const __m512i constant = _mm512_set1_epi32((uint32_t)0xfca02400); + const __m512i added_first = _mm512_mask_add_epi32( + aligned_first, (__mmask16)H, aligned_first, shifted_first); + const __m512i utf32_first = _mm512_mask_add_epi32( + added_first, (__mmask16)H, added_first, constant); + + const __m512i added_second = + _mm512_mask_add_epi32(aligned_second, (__mmask16)(H >> 16), + aligned_second, shifted_second); + const __m512i utf32_second = _mm512_mask_add_epi32( + added_second, (__mmask16)(H >> 16), added_second, constant); + + // 5. Store all valid UTF-32 code units (low surrogate positions and + // 32nd word are invalid) + const __mmask32 valid = ~L & 0x7fffffff; + // We deliberately do a _mm512_maskz_compress_epi32 followed by + // storeu_epi32 to ease performance portability to Zen 4. + const __m512i compressed_first = + _mm512_maskz_compress_epi32((__mmask16)(valid), utf32_first); + const size_t howmany1 = count_ones((uint16_t)(valid)); + _mm512_storeu_si512((__m512i *)utf32_output, compressed_first); + utf32_output += howmany1; + const __m512i compressed_second = + _mm512_maskz_compress_epi32((__mmask16)(valid >> 16), utf32_second); + const size_t howmany2 = count_ones((uint16_t)(valid >> 16)); + // The following could be unsafe in some cases? + //_mm512_storeu_epi32((__m512i *) utf32_output, compressed_second); + _mm512_mask_storeu_epi32((__m512i *)utf32_output, + __mmask16((1 << howmany2) - 1), + compressed_second); + utf32_output += howmany2; + // Only process 31 code units, but keep track if the 31st word is a high + // surrogate as a carry + buf += 31; + carry = (H >> 30) & 0x1; + } else { + // invalid case + return std::make_tuple(buf + carry, utf32_output, false); + } + } else { + // no surrogates + // extend all thirty-two 16-bit code units to thirty-two 32-bit code units + _mm512_storeu_si512((__m512i *)(utf32_output), + _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in))); + _mm512_storeu_si512( + (__m512i *)(utf32_output) + 1, + _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1))); + utf32_output += 32; + buf += 32; + carry = 0; + } + } // while + return std::make_tuple(buf + carry, utf32_output, true); +} +/* end file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 +/* begin file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ +// file included directly +size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + __m512i v_0xFF = _mm512_set1_epi32(0xff); + __m512i shufmask = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, + 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0); + while (end - buf >= 16) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { + return 0; + } + _mm_storeu_si128( + (__m128i *)latin1_output, + _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); + latin1_output += 16; + buf += 16; + } + if (buf < end) { + uint16_t mask = uint16_t((1 << (end - buf)) - 1); + __m512i in = _mm512_maskz_loadu_epi32(mask, buf); + if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { + return 0; + } + _mm_mask_storeu_epi8( + latin1_output, mask, + _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); + } + return len; +} + +std::pair +icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + const char32_t *start = buf; + __m512i v_0xFF = _mm512_set1_epi32(0xff); + __m512i shufmask = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, + 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0); + while (end - buf >= 16) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { + while (uint32_t(*buf) <= 0xff) { + *latin1_output++ = uint8_t(*buf++); + } + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + latin1_output); + } + _mm_storeu_si128( + (__m128i *)latin1_output, + _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); + latin1_output += 16; + buf += 16; + } + if (buf < end) { + uint16_t mask = uint16_t((1 << (end - buf)) - 1); + __m512i in = _mm512_maskz_loadu_epi32(mask, buf); + if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { + while (uint32_t(*buf) <= 0xff) { + *latin1_output++ = uint8_t(*buf++); + } + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + latin1_output); + } + _mm_mask_storeu_epi8( + latin1_output, mask, + _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); + } + return std::make_pair(result(error_code::SUCCESS, len), latin1_output); +} +/* end file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ +// file included directly + +// Todo: currently, this is just the haswell code, optimize for icelake kernel. +std::pair +avx512_convert_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); + const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); + const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); + const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); + __m256i running_max = _mm256_setzero_si256(); + __m256i forbidden_bytemask = _mm256_setzero_si256(); + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin); + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), + _mm256_and_si256(nextin, v_7fffffff)); + in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); + + // Try to apply UTF-16 => UTF-8 routine on 256 bits + // (haswell/avx2_convert_utf16_to_utf8.cpp) + + if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; + } + // Must check for overflow in packing + const __m256i saturation_bytemask = _mm256_cmpeq_epi32( + _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffffffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); + forbidden_bytemask = _mm256_or_si256( + forbidden_bytemask, + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800)); + + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + } else { + // case: at least one 32-bit word is larger than 0xFFFF <=> it will + // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD may require + // large, non-trivial tables? + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { // 2-byte + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { // 3-byte + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { // 4-byte + if (word > 0x10FFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + // check for invalid input + const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); + if (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi32( + _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) { + return std::make_pair(nullptr, utf8_output); + } + + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf8_output); + } + + return std::make_pair(buf, utf8_output); +} + +// Todo: currently, this is just the haswell code, optimize for icelake kernel. +std::pair +avx512_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + const char32_t *start = buf; + + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); + const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); + const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); + const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); + const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + // Check for too large input + const __m256i max_input = + _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff); + if (static_cast(_mm256_movemask_epi8( + _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + utf8_output); + } + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), + _mm256_and_si256(nextin, v_7fffffff)); + in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); + + // Try to apply UTF-16 => UTF-8 routine on 256 bits + // (haswell/avx2_convert_utf16_to_utf8.cpp) + + if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; + } + // Must check for overflow in packing + const __m256i saturation_bytemask = _mm256_cmpeq_epi32( + _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffffffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + + // Check for illegal surrogate code units + const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); + const __m256i forbidden_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800); + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != + 0x0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf8_output); + } + + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + } else { + // case: at least one 32-bit word is larger than 0xFFFF <=> it will + // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD may require + // large, non-trivial tables? + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { // 2-byte + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { // 3-byte + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { // 4-byte + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); +} +/* end file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ +// file included directly + +template +std::pair +avx512_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *end = buf + len; + __mmask32 forbidden_bytemask = 0; + const __m512i v_00000000 = _mm512_setzero_si512(); + const __m512i v_ffff0000 = _mm512_set1_epi32((int32_t)0xffff0000); + const __m512i v_f800 = _mm512_set1_epi32((uint32_t)0xf800); + const __m512i v_d800 = _mm512_set1_epi32((uint32_t)0xd800); + const __m512i v_10ffff = _mm512_set1_epi32(0x10FFFF); + const __m512i v_10000 = _mm512_set1_epi32(0x10000); + const __m512i v_3ff0000 = _mm512_set1_epi32(0x3FF0000); + const __m512i v_3ff = _mm512_set1_epi32(0x3FF); + const __m512i v_dc00d800 = _mm512_set1_epi32((int32_t)0xDC00D800); + + while (end - buf >= std::ptrdiff_t(16)) { + __m512i in = _mm512_loadu_si512(buf); + + // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs + const __mmask16 saturation_bitmask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000); + + if (saturation_bitmask == 0xffff) { + forbidden_bytemask |= + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); + + __m256i utf16_packed = _mm512_cvtepi32_epi16(in); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, + 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); + } + _mm256_storeu_si256((__m256i *)utf16_output, utf16_packed); + utf16_output += 16; + buf += 16; + } else { + // saturation_bitmask == 1 words will generate 1 utf16 char, + // and saturation_bitmask == 0 words will generate 2 utf16 chars assuming + // no errors. Thus we need a output_mask which has the structure b_2i = 1, + // b_2i+1 = !saturation_bitmask_i + const __mmask32 output_mask = ~_pdep_u32(saturation_bitmask, 0xAAAAAAAA); + const __mmask16 surrogate_bitmask = __mmask16(~saturation_bitmask); + __mmask32 error = _mm512_mask_cmpeq_epi32_mask( + saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); + error |= _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); + if (simdutf_unlikely(error)) { + return std::make_pair(nullptr, utf16_output); + } + __m512i v1, v2, v; + // for the bits saturation_bitmask == 0, we need to unpack the 32-bit word + // into two 16 bit words corresponding to high_surrogate and + // low_surrogate. Once the bits are unpacked and merged, the output will + // be compressed as per output_mask. + in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); + v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); + v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); + v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); + v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); + v = _mm512_or_si512(v1, v2); + in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); + if (big_endian) { + const __m512i swap_512 = _mm512_set_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, + 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, + 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, + 2, 3, 0, 1); + in = _mm512_shuffle_epi8(in, swap_512); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); + auto written_out = _mm_popcnt_u32(output_mask); + _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), + compressed); + //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); + utf16_output += written_out; + buf += 16; + } + } + + size_t remaining_len = size_t(end - buf); + if (remaining_len) { + __mmask16 input_mask = __mmask16((1 << remaining_len) - 1); + __m512i in = _mm512_maskz_loadu_epi32(input_mask, buf); + const __mmask16 saturation_bitmask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000) & + input_mask; + if (saturation_bitmask == input_mask) { + forbidden_bytemask |= + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); + + __m256i utf16_packed = _mm512_cvtepi32_epi16(in); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, + 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); + } + _mm256_mask_storeu_epi16(utf16_output, input_mask, utf16_packed); + utf16_output += remaining_len; + buf += remaining_len; + } else { + const __mmask32 output_max_mask = (1 << (remaining_len * 2)) - 1; + const __mmask32 output_mask = + (~_pdep_u32(saturation_bitmask, 0xAAAAAAAA)) & output_max_mask; + const __mmask16 surrogate_bitmask = + __mmask16(~saturation_bitmask) & input_mask; + __mmask32 error = _mm512_mask_cmpeq_epi32_mask( + saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); + error |= _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); + if (simdutf_unlikely(error)) { + return std::make_pair(nullptr, utf16_output); + } + __m512i v1, v2, v; + in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); + v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); + v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); + v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); + v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); + v = _mm512_or_si512(v1, v2); + in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); + if (big_endian) { + const __m512i swap_512 = _mm512_set_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, + 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, + 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, + 2, 3, 0, 1); + in = _mm512_shuffle_epi8(in, swap_512); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); + auto written_out = _mm_popcnt_u32(output_mask); + _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), + compressed); + //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); + utf16_output += written_out; + buf += remaining_len; + } + } + + // check for invalid input + if (forbidden_bytemask != 0) { + return std::make_pair(nullptr, utf16_output); + } + + return std::make_pair(buf, utf16_output); +} + +template +std::pair +avx512_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; + const __m512i v_00000000 = _mm512_setzero_si512(); + const __m512i v_ffff0000 = _mm512_set1_epi32((int32_t)0xffff0000); + const __m512i v_f800 = _mm512_set1_epi32((uint32_t)0xf800); + const __m512i v_d800 = _mm512_set1_epi32((uint32_t)0xd800); + const __m512i v_10ffff = _mm512_set1_epi32(0x10FFFF); + const __m512i v_10000 = _mm512_set1_epi32(0x10000); + const __m512i v_3ff0000 = _mm512_set1_epi32(0x3FF0000); + const __m512i v_3ff = _mm512_set1_epi32(0x3FF); + const __m512i v_dc00d800 = _mm512_set1_epi32((int32_t)0xDC00D800); + int error_idx = 0; + error_code code = error_code::SUCCESS; + bool err = false; + + while (end - buf >= std::ptrdiff_t(16)) { + __m512i in = _mm512_loadu_si512(buf); + + // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs + const __mmask16 saturation_bitmask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000); + + if (saturation_bitmask == 0xffff) { + __mmask32 forbidden_bytemask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); + + __m256i utf16_packed = _mm512_cvtepi32_epi16(in); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, + 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); + } + if (simdutf_unlikely(forbidden_bytemask)) { + int idx = _tzcnt_u32(forbidden_bytemask); + _mm256_mask_storeu_epi16( + utf16_output, __mmask16(_blsmsk_u32(forbidden_bytemask) >> 1), + utf16_packed); + return std::make_pair(result(error_code::SURROGATE, buf - start + idx), + utf16_output + idx); + } + _mm256_storeu_si256((__m256i *)utf16_output, utf16_packed); + utf16_output += 16; + } else { + __mmask32 output_mask = ~_pdep_u32(saturation_bitmask, 0xAAAAAAAA); + const __mmask16 surrogate_bitmask = __mmask16(~saturation_bitmask); + __mmask32 error_surrogate = _mm512_mask_cmpeq_epi32_mask( + saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); + __mmask32 error_too_large = + _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); + if (simdutf_unlikely(error_surrogate || error_too_large)) { + // Need to find the lowest set bit between the two error masks + // Need to also write the partial chunk until the error index to output. + int large_idx = _tzcnt_u32(error_too_large); + int surrogate_idx = _tzcnt_u32(error_surrogate); + err = true; + if (large_idx < surrogate_idx) { + code = error_code::TOO_LARGE; + error_idx = large_idx; + } else { + code = error_code::SURROGATE; + error_idx = surrogate_idx; + } + output_mask &= ((1 << (2 * error_idx)) - 1); + } + __m512i v1, v2, v; + in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); + v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); + v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); + v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); + v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); + v = _mm512_or_si512(v1, v2); + in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); + if (big_endian) { + const __m512i swap_512 = _mm512_set_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, + 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, + 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, + 2, 3, 0, 1); + in = _mm512_shuffle_epi8(in, swap_512); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); + auto written_out = _mm_popcnt_u32(output_mask); + _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), + compressed); + //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); + utf16_output += written_out; + if (simdutf_unlikely(err)) { + return std::make_pair(result(code, buf - start + error_idx), + utf16_output); + } + } + buf += 16; + } + + size_t remaining_len = size_t(end - buf); + if (remaining_len) { + __mmask16 input_mask = __mmask16((1 << remaining_len) - 1); + __m512i in = _mm512_maskz_loadu_epi32(input_mask, buf); + const __mmask16 saturation_bitmask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000) & + input_mask; + if (saturation_bitmask == input_mask) { + __mmask32 forbidden_bytemask = + _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); + __m256i utf16_packed = _mm512_cvtepi32_epi16(in); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, + 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); + } + if (simdutf_unlikely(forbidden_bytemask)) { + int idx = _tzcnt_u32(forbidden_bytemask); + _mm256_mask_storeu_epi16( + utf16_output, __mmask16(_blsmsk_u32(forbidden_bytemask) >> 1), + utf16_packed); + return std::make_pair(result(error_code::SURROGATE, buf - start + idx), + utf16_output + idx); + } + _mm256_mask_storeu_epi16(utf16_output, input_mask, utf16_packed); + utf16_output += remaining_len; + } else { + const __mmask32 output_max_mask = (1 << (remaining_len * 2)) - 1; + __mmask32 output_mask = + (~_pdep_u32(saturation_bitmask, 0xAAAAAAAA)) & output_max_mask; + const __mmask16 surrogate_bitmask = + __mmask16(~saturation_bitmask) & input_mask; + __mmask32 error_surrogate = _mm512_mask_cmpeq_epi32_mask( + saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); + __mmask32 error_too_large = + _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); + if (simdutf_unlikely(error_surrogate || error_too_large)) { + int large_idx = _tzcnt_u32(error_too_large); + int surrogate_idx = _tzcnt_u32(error_surrogate); + err = true; + if (large_idx < surrogate_idx) { + code = error_code::TOO_LARGE; + error_idx = large_idx; + } else { + code = error_code::SURROGATE; + error_idx = surrogate_idx; + } + output_mask &= ((1 << (2 * error_idx)) - 1); + } + __m512i v1, v2, v; + in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); + v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); + v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); + v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); + v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); + v = _mm512_or_si512(v1, v2); + in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); + if (big_endian) { + const __m512i swap_512 = _mm512_set_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, + 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, + 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, + 2, 3, 0, 1); + in = _mm512_shuffle_epi8(in, swap_512); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); + auto written_out = _mm_popcnt_u32(output_mask); + _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), + compressed); + //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); + utf16_output += written_out; + if (simdutf_unlikely(err)) { + return std::make_pair(result(code, buf - start + error_idx), + utf16_output); + } + } + buf += remaining_len; + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); +} +/* end file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_ASCII +/* begin file src/icelake/icelake_ascii_validation.inl.cpp */ +// file included directly + +bool validate_ascii(const char *buf, size_t len) { + const char *end = buf + len; + const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); + __m512i running_or = _mm512_setzero_si512(); + for (; end - buf >= 64; buf += 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)buf); + running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, + 0xf8); // running_or | (utf8 & ascii) + } + if (buf < end) { + const __m512i utf8 = _mm512_maskz_loadu_epi8( + (uint64_t(1) << (end - buf)) - 1, (const __m512i *)buf); + running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, + 0xf8); // running_or | (utf8 & ascii) + } + return (_mm512_test_epi8_mask(running_or, running_or) == 0); +} +/* end file src/icelake/icelake_ascii_validation.inl.cpp */ +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/icelake/icelake_utf32_validation.inl.cpp */ +// file included directly + +bool validate_utf32(const char32_t *buf, size_t len) { + if (simdutf_unlikely(len == 0)) { + return true; + } + const char32_t *end = buf + len; + + const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); + __m512i currentmax = _mm512_setzero_si512(); + __m512i currentoffsetmax = _mm512_setzero_si512(); + + // Optimized: Process 32 values (2x 512-bit) per iteration for better + // throughput + while (end - buf >= 32) { + __m512i utf32_1 = _mm512_loadu_si512((const __m512i *)buf); + __m512i utf32_2 = _mm512_loadu_si512((const __m512i *)(buf + 16)); + buf += 32; + + // Process both blocks in parallel to maximize instruction-level parallelism + __m512i offsetmax_1 = _mm512_add_epi32(utf32_1, offset); + __m512i offsetmax_2 = _mm512_add_epi32(utf32_2, offset); + + currentoffsetmax = _mm512_max_epu32(offsetmax_1, currentoffsetmax); + currentmax = _mm512_max_epu32(utf32_1, currentmax); + + currentoffsetmax = _mm512_max_epu32(offsetmax_2, currentoffsetmax); + currentmax = _mm512_max_epu32(utf32_2, currentmax); + } + + // Handle remaining 16-31 values + if (end - buf >= 16) { + __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); + buf += 16; + currentoffsetmax = + _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); + currentmax = _mm512_max_epu32(utf32, currentmax); + } + + // Handle remaining 0-15 values with masked load + if (buf < end) { + __m512i utf32 = + _mm512_maskz_loadu_epi32(__mmask16((1 << (end - buf)) - 1), buf); + currentoffsetmax = + _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); + currentmax = _mm512_max_epu32(utf32, currentmax); + } + + const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); + const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); + const auto outside_range = _mm512_cmpgt_epu32_mask(currentmax, standardmax); + if (outside_range != 0) { + return false; + } + + const auto surrogate = + _mm512_cmpgt_epu32_mask(currentoffsetmax, standardoffsetmax); + if (surrogate != 0) { + return false; + } + + return true; +} +/* end file src/icelake/icelake_utf32_validation.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ +// file included directly + +static inline size_t latin1_to_utf8_avx512_vec(__m512i input, size_t input_len, + char *utf8_output, + int mask_output) { + __mmask64 nonascii = _mm512_movepi8_mask(input); + size_t output_size = input_len + (size_t)count_ones(nonascii); + + // Mask to denote whether the byte is a leading byte that is not ascii + __mmask64 sixth = _mm512_cmpge_epu8_mask( + input, _mm512_set1_epi8(-64)); // binary representation of -64: 1100 0000 + + const uint64_t alternate_bits = UINT64_C(0x5555555555555555); + uint64_t ascii = ~nonascii; + // the bits in ascii are inverted and zeros are interspersed in between them + uint64_t maskA = ~_pdep_u64(ascii, alternate_bits); + uint64_t maskB = ~_pdep_u64(ascii >> 32, alternate_bits); + + // interleave bytes from top and bottom halves (abcd...ABCD -> aAbBcCdD) + __m512i input_interleaved = _mm512_permutexvar_epi8( + _mm512_set_epi32(0x3f1f3e1e, 0x3d1d3c1c, 0x3b1b3a1a, 0x39193818, + 0x37173616, 0x35153414, 0x33133212, 0x31113010, + 0x2f0f2e0e, 0x2d0d2c0c, 0x2b0b2a0a, 0x29092808, + 0x27072606, 0x25052404, 0x23032202, 0x21012000), + input); + + // double size of each byte, and insert the leading byte 1100 0010 + + /* + upscale the bytes to 16-bit value, adding the 0b11000000 leading byte in the + process. We adjust for the bytes that have their two most significant bits. + This takes care of the first 32 bytes, assuming we interleaved the bytes. */ + __m512i outputA = + _mm512_shldi_epi16(input_interleaved, _mm512_set1_epi8(-62), 8); + outputA = _mm512_mask_add_epi16( + outputA, (__mmask32)sixth, outputA, + _mm512_set1_epi16(1 - 0x4000)); // 1- 0x4000 = 1100 0000 0000 0001???? + + // in the second 32-bit half, set first or second option based on whether + // original input is leading byte (second case) or not (first case) + __m512i leadingB = + _mm512_mask_blend_epi16((__mmask32)(sixth >> 32), + _mm512_set1_epi16(0x00c2), // 0000 0000 1101 0010 + _mm512_set1_epi16(0x40c3)); // 0100 0000 1100 0011 + __m512i outputB = _mm512_ternarylogic_epi32( + input_interleaved, leadingB, _mm512_set1_epi16((short)0xff00), + (240 & 170) ^ 204); // (input_interleaved & 0xff00) ^ leadingB + + // prune redundant bytes + outputA = _mm512_maskz_compress_epi8(maskA, outputA); + outputB = _mm512_maskz_compress_epi8(maskB, outputB); + + size_t output_sizeA = (size_t)count_ones((uint32_t)nonascii) + 32; + + if (mask_output) { + if (input_len > 32) { // is the second half of the input vector used? + __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_sizeA); + _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA); + utf8_output += output_sizeA; + write_mask = _bzhi_u64(~0ULL, (unsigned int)(output_size - output_sizeA)); + _mm512_mask_storeu_epi8(utf8_output, write_mask, outputB); + } else { + __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_size); + _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA); + } + } else { + _mm512_storeu_si512(utf8_output, outputA); + utf8_output += output_sizeA; + _mm512_storeu_si512(utf8_output, outputB); + } + return output_size; +} + +static inline size_t latin1_to_utf8_avx512_branch(__m512i input, + char *utf8_output) { + __mmask64 nonascii = _mm512_movepi8_mask(input); + if (nonascii) { + return latin1_to_utf8_avx512_vec(input, 64, utf8_output, 0); + } else { + _mm512_storeu_si512(utf8_output, input); + return 64; + } +} + +size_t latin1_to_utf8_avx512_start(const char *buf, size_t len, + char *utf8_output) { + char *start = utf8_output; + size_t pos = 0; + // if there's at least 128 bytes remaining, we don't need to mask the output + for (; pos + 128 <= len; pos += 64) { + __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos)); + utf8_output += latin1_to_utf8_avx512_branch(input, utf8_output); + } + // in the last 128 bytes, the first 64 may require masking the output + if (pos + 64 <= len) { + __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos)); + utf8_output += latin1_to_utf8_avx512_vec(input, 64, utf8_output, 1); + pos += 64; + } + // with the last 64 bytes, the input also needs to be masked + if (pos < len) { + __mmask64 load_mask = _bzhi_u64(~0ULL, (unsigned int)(len - pos)); + __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos)); + utf8_output += latin1_to_utf8_avx512_vec(input, len - pos, utf8_output, 1); + } + return (size_t)(utf8_output - start); +} +/* end file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ +// file included directly +template +size_t icelake_convert_latin1_to_utf16(const char *latin1_input, size_t len, + char16_t *utf16_output) { + size_t rounded_len = len & ~0x1F; // Round down to nearest multiple of 32 + + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + for (size_t i = 0; i < rounded_len; i += 32) { + // Load 32 Latin1 characters into a 256-bit register + __m256i in = _mm256_loadu_si256((__m256i *)&latin1_input[i]); + // Zero extend each set of 8 Latin1 characters to 32 16-bit integers + __m512i out = _mm512_cvtepu8_epi16(in); + if (big_endian) { + out = _mm512_shuffle_epi8(out, byteflip); + } + // Store the results back to memory + _mm512_storeu_si512((__m512i *)&utf16_output[i], out); + } + if (rounded_len != len) { + uint32_t mask = uint32_t(1 << (len - rounded_len)) - 1; + __m256i in = _mm256_maskz_loadu_epi8(mask, latin1_input + rounded_len); + + // Zero extend each set of 8 Latin1 characters to 32 16-bit integers + __m512i out = _mm512_cvtepu8_epi16(in); + if (big_endian) { + out = _mm512_shuffle_epi8(out, byteflip); + } + // Store the results back to memory + _mm512_mask_storeu_epi16(utf16_output + rounded_len, mask, out); + } + + return len; +} +/* end file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF32 +/* begin file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ +void avx512_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + while (len >= 16) { + // Load 16 Latin1 characters into a 128-bit register + __m128i in = _mm_loadu_si128((__m128i *)buf); + + // Zero extend each set of 8 Latin1 characters to 16 32-bit integers using + // vpmovzxbd + __m512i out = _mm512_cvtepu8_epi32(in); + + // Store the results back to memory + _mm512_storeu_si512((__m512i *)utf32_output, out); + + len -= 16; + buf += 16; + utf32_output += 16; + } + + __mmask16 mask = __mmask16((1 << len) - 1); + __m128i in = _mm_maskz_loadu_epi8(mask, buf); + __m512i out = _mm512_cvtepu8_epi32(in); + _mm512_mask_storeu_epi32((__m512i *)utf32_output, mask, out); +} +/* end file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/icelake/icelake_base64.inl.cpp */ +// file included directly +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ + +struct block64 { + __m512i chunks[1]; +}; + +template +size_t encode_base64_impl(char *dst, const char *src, size_t srclen, + base64_options options, + size_t line_length = simdutf::default_line_length) { + size_t offset = 0; + if (line_length < 4) { + line_length = 4; // We do not support line_length less than 4 + } + // credit: Wojciech Muła + const uint8_t *input = (const uint8_t *)src; + + uint8_t *out = (uint8_t *)dst; + static const char *lookup_tbl = + base64_url + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + const __m512i shuffle_input = _mm512_setr_epi32( + 0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10, + 0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122, + 0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e); + const __m512i lookup = + _mm512_loadu_si512(reinterpret_cast(lookup_tbl)); + const __m512i multi_shifts = _mm512_set1_epi64(UINT64_C(0x3036242a1016040a)); + size_t size = srclen; + __mmask64 input_mask = 0xffffffffffff; // (1 << 48) - 1 + // We want that input == end_input means that we must stop. + const uint8_t *end_input = input + (size - (size % 48)); + while (input != end_input) { + const __m512i v = _mm512_maskz_loadu_epi8( + input_mask, reinterpret_cast(input)); + const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); + const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); + const __m512i result = _mm512_permutexvar_epi8(indices, lookup); + if (use_lines) { + if (offset + 64 > line_length) { + if (line_length >= 64) { + __m512i expanded = _mm512_mask_expand_epi8( + _mm512_set1_epi8('\n'), ~(1ULL << ((line_length - offset))), + result); + _mm512_storeu_si512(reinterpret_cast<__m512i *>(out), expanded); + __m128i last_lane = + _mm512_extracti32x4_epi32(result, 3); // Lane 3 (bytes 48-63) + uint8_t last_byte = + static_cast(_mm_extract_epi8(last_lane, 15)); + out[64] = last_byte; + out += 65; + offset = 64 - (line_length - offset); + } else { // slow path + alignas(64) uint8_t local_buffer[64]; + _mm512_storeu_si512(reinterpret_cast<__m512i *>(local_buffer), + result); + size_t out_pos = 0; + size_t local_offset = offset; + for (size_t j = 0; j < 64;) { + if (local_offset == line_length) { + out[out_pos++] = '\n'; + local_offset = 0; + } + out[out_pos++] = local_buffer[j++]; + local_offset++; + } + offset = local_offset; + out += out_pos; + } + } else { + _mm512_storeu_si512(reinterpret_cast<__m512i *>(out), result); + offset += 64; + out += 64; + } + } else { + _mm512_storeu_si512(reinterpret_cast<__m512i *>(out), result); + out += 64; + } + input += 48; + } + size = size % 48; + + input_mask = ((__mmask64)1 << size) - 1; + const __m512i v = _mm512_maskz_loadu_epi8( + input_mask, reinterpret_cast(input)); + const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); + const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); + bool padding_needed = + (((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding)); + size_t padding_amount = ((size % 3) > 0) ? (3 - (size % 3)) : 0; + size_t output_len = ((size + 2) / 3) * 4; + size_t non_padded_output_len = output_len - padding_amount; + if (!padding_needed) { + output_len = non_padded_output_len; + } + // If no output, we are done. + if (output_len == 0) { + return (size_t)(out - (uint8_t *)dst); + } + __mmask64 output_mask = 0xFFFFFFFFFFFFFFFF >> (64 - output_len); + __m512i result = _mm512_mask_permutexvar_epi8( + _mm512_set1_epi8('='), ((__mmask64)1 << non_padded_output_len) - 1, + indices, lookup); + if (use_lines) { + if (offset + output_len > line_length) { + if (line_length >= 64) { + __m512i expanded = _mm512_mask_expand_epi8( + _mm512_set1_epi8('\n'), ~(1ULL << ((line_length - offset))), + result); + if (output_len == 64) { + _mm512_storeu_si512(reinterpret_cast<__m512i *>(out), expanded); + out += 64; + _mm512_mask_storeu_epi8(reinterpret_cast<__m512i *>(out - 63), + 1ULL << 63, result); + out++; + } else { + output_mask = 0xFFFFFFFFFFFFFFFF >> (64 - output_len - 1); + _mm512_mask_storeu_epi8(reinterpret_cast<__m512i *>(out), output_mask, + expanded); + out += output_len + 1; + } + } else { + alignas(64) uint8_t local_buffer[64]; + _mm512_storeu_si512(reinterpret_cast<__m512i *>(local_buffer), result); + size_t out_pos = 0; + size_t local_offset = offset; + for (size_t j = 0; j < output_len;) { + if (local_offset == line_length) { + out[out_pos++] = '\n'; + local_offset = 0; + } + out[out_pos++] = local_buffer[j++]; + local_offset++; + } + offset = local_offset; + out += out_pos; + } + } else { + _mm512_mask_storeu_epi8(reinterpret_cast<__m512i *>(out), output_mask, + result); + out += output_len; + } + } else { + _mm512_mask_storeu_epi8(reinterpret_cast<__m512i *>(out), output_mask, + result); + out += output_len; + } + return (size_t)(out - (uint8_t *)dst); +} + +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + return encode_base64_impl(dst, src, srclen, options); +} + +template +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error, + uint64_t input_mask = UINT64_MAX) { + __m512i input = b->chunks[0]; + const __m512i ascii_space_tbl = _mm512_set_epi8( + 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, + 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, + 0, 0, 32, 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32); + __m512i lookup0; + if (default_or_url) { + lookup0 = _mm512_set_epi8( + -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, + 52, 63, -128, 62, -128, 62, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, -128, + -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -1); + } else if (base64_url) { + lookup0 = _mm512_set_epi8( + -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, + 52, -128, -128, 62, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, + -128, -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -1); + } else { + lookup0 = _mm512_set_epi8( + -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, + 52, 63, -128, -128, -128, 62, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, -128, + -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -128); + } + __m512i lookup1; + if (default_or_url) { + lookup1 = _mm512_set_epi8( + -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, + 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, + 63, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, + 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); + } else if (base64_url) { + lookup1 = _mm512_set_epi8( + -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, + 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, + 63, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, + 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); + } else { + lookup1 = _mm512_set_epi8( + -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, + 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, + -128, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); + } + + const __m512i translated = _mm512_permutex2var_epi8(lookup0, input, lookup1); + const __m512i combined = _mm512_or_si512(translated, input); + const __mmask64 mask = _mm512_movepi8_mask(combined) & input_mask; + if (!ignore_garbage && mask) { + const __mmask64 spaces = + _mm512_cmpeq_epi8_mask(_mm512_shuffle_epi8(ascii_space_tbl, input), + input) & + input_mask; + *error = (mask ^ spaces); + } + b->chunks[0] = translated; + + return mask | (~input_mask); +} + +static inline void copy_block(block64 *b, char *output) { + _mm512_storeu_si512(reinterpret_cast<__m512i *>(output), b->chunks[0]); +} + +static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { + uint64_t nmask = ~mask; + __m512i c = _mm512_maskz_compress_epi8(nmask, b->chunks[0]); + _mm512_storeu_si512(reinterpret_cast<__m512i *>(output), c); + return _mm_popcnt_u64(nmask); +} + +// The caller of this function is responsible to ensure that there are 64 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char *src) { + b->chunks[0] = _mm512_loadu_si512(reinterpret_cast(src)); +} + +static inline void load_block_partial(block64 *b, const char *src, + __mmask64 input_mask) { + b->chunks[0] = _mm512_maskz_loadu_epi8( + input_mask, reinterpret_cast(src)); +} + +// The caller of this function is responsible to ensure that there are 128 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char16_t *src) { + __m512i m1 = _mm512_loadu_si512(reinterpret_cast(src)); + __m512i m2 = _mm512_loadu_si512(reinterpret_cast(src + 32)); + __m512i p = _mm512_packus_epi16(m1, m2); + b->chunks[0] = + _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), p); +} + +static inline void load_block_partial(block64 *b, const char16_t *src, + __mmask64 input_mask) { + __m512i m1 = _mm512_maskz_loadu_epi16((__mmask32)input_mask, + reinterpret_cast(src)); + __m512i m2 = + _mm512_maskz_loadu_epi16((__mmask32)(input_mask >> 32), + reinterpret_cast(src + 32)); + __m512i p = _mm512_packus_epi16(m1, m2); + b->chunks[0] = + _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), p); +} + +static inline void base64_decode(char *out, __m512i str) { + const __m512i merge_ab_and_bc = + _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140)); + const __m512i merged = + _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); + const __m512i pack = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 61, 62, 56, 57, 58, + 52, 53, 54, 48, 49, 50, 44, 45, 46, 40, 41, 42, 36, 37, 38, 32, 33, 34, + 28, 29, 30, 24, 25, 26, 20, 21, 22, 16, 17, 18, 12, 13, 14, 8, 9, 10, 4, + 5, 6, 0, 1, 2); + const __m512i shuffled = _mm512_permutexvar_epi8(pack, merged); + _mm512_mask_storeu_epi8( + (__m512i *)out, 0xffffffffffff, + shuffled); // mask would be 0xffffffffffff since we write 48 bytes. +} +// decode 64 bytes and output 48 bytes +static inline void base64_decode_block(char *out, const char *src) { + base64_decode(out, + _mm512_loadu_si512(reinterpret_cast(src))); +} +static inline void base64_decode_block(char *out, block64 *b) { + base64_decode(out, b->chunks[0]); +} + +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + (void)options; + const uint8_t *to_base64 = + default_or_url ? tables::base64::to_base64_default_or_url_value + : (base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + auto ri = simdutf::scalar::base64::find_end(src, srclen, options); + size_t equallocation = ri.equallocation; + size_t padding_characters = ri.equalsigns; + srclen = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (srclen == 0) { + if (!ignore_garbage && padding_characters > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; + + // figure out why block_size == 2 is sometimes best??? + constexpr size_t block_size = 6; + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b; + load_block(&b, src); + src += 64; + uint64_t error = 0; + uint64_t badcharmask = + to_base64_mask(&b, + &error); + if (!ignore_garbage && error) { + src -= 64; + size_t error_offset = _tzcnt_u64(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + // optimization opportunity: check for simple masks like those made of + // continuous 1s followed by continuous 0s. And masks containing a + // single bad character. + bufferptr += compress_block(&b, badcharmask, bufferptr); + } else if (bufferptr != buffer) { + copy_block(&b, bufferptr); + bufferptr += 64; + } else { + base64_decode_block(dst, &b); + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 1); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + + int last_block_len = (int)(srcend - src); + if (last_block_len != 0) { + __mmask64 input_mask = ((__mmask64)1 << last_block_len) - 1; + block64 b; + load_block_partial(&b, src, input_mask); + uint64_t error = 0; + uint64_t badcharmask = + to_base64_mask(&b, &error, + input_mask); + if (!ignore_garbage && error) { + size_t error_offset = _tzcnt_u64(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + src += last_block_len; + bufferptr += compress_block(&b, badcharmask, bufferptr); + } + + char *buffer_start = buffer; + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + base64_decode_block(dst, buffer_start); + dst += 48; + } + if ((bufferptr - buffer_start) != 0) { + // For efficiency reasons, we end up reproducing much of the code + // in base64_tail_decode_impl. Better engineering would be to + // refactor the code so that we can call it without a performance hit. + size_t rem = (bufferptr - buffer_start); + int idx = rem % 4; + __mmask64 mask = ((__mmask64)1 << rem) - 1; + __m512i input = _mm512_maskz_loadu_epi8(mask, buffer_start); + size_t output_len = (rem / 4) * 3; + __mmask64 output_mask = mask >> (rem - output_len); + const __m512i merge_ab_and_bc = + _mm512_maddubs_epi16(input, _mm512_set1_epi32(0x01400140)); + const __m512i merged = + _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); + const __m512i pack = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 61, 62, 56, 57, 58, + 52, 53, 54, 48, 49, 50, 44, 45, 46, 40, 41, 42, 36, 37, 38, 32, 33, 34, + 28, 29, 30, 24, 25, 26, 20, 21, 22, 16, 17, 18, 12, 13, 14, 8, 9, 10, 4, + 5, 6, 0, 1, 2); + const __m512i shuffled = _mm512_permutexvar_epi8(pack, merged); + // We never should have that the number of base64 characters + the + // number of padding characters is more than 4. + if (!ignore_garbage && (idx + padding_characters > 4)) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } + // The idea here is that in loose mode, + // if there is padding at all, it must be used + // to form 4-wise chunk. However, in loose mode, + // we do accept no padding at all. + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::loose && + (idx >= 2) && padding_characters > 0 && + ((idx + padding_characters) & 3) != 0) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit), true}; + } else + // The idea here is that in strict mode, we do not want to accept + // incomplete base64 chunks. So if the chunk was otherwise valid, we + // return BASE64_INPUT_REMAINDER. + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::strict && + (idx >= 2) && ((idx + padding_characters) & 3) != 0) { + // The partial chunk was at src - idx + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {BASE64_INPUT_REMAINDER, equallocation, size_t(dst - dstinit)}; + } else + // If there is a partial chunk with insufficient padding, with + // stop_before_partial, we need to just ignore it. In "only full" mode, + // skip the minute there are padding characters. + if ((last_chunk_options == + last_chunk_handling_options::stop_before_partial && + (padding_characters + idx < 4) && (idx != 0) && + (idx >= 2 || padding_characters == 0)) || + (last_chunk_options == + last_chunk_handling_options::only_full_chunks && + (idx >= 2 || padding_characters == 0))) { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + // we need to rewind src to before the partial chunk + size_t characters_to_skip = idx; + while (characters_to_skip > 0) { + src--; + auto c = *src; + uint8_t code = to_base64[uint8_t(c)]; + if (simdutf::scalar::base64::is_eight_byte(c) && code <= 63) { + characters_to_skip--; + } + } + // And then we need to skip ignored characters + // See https://github.com/simdutf/simdutf/issues/824 + while (src > srcinit) { + auto c = *(src - 1); + uint8_t code = to_base64[uint8_t(c)]; + if (simdutf::scalar::base64::is_eight_byte(c) && code <= 63) { + break; + } + src--; + } + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } else { + if (idx == 2) { + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::strict) { + uint32_t triple = (uint32_t(bufferptr[-2]) << 3 * 6) + + (uint32_t(bufferptr[-1]) << 2 * 6); + if (triple & 0xffff) { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + } + output_mask = (output_mask << 1) | 1; + output_len += 1; + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + } else if (idx == 3) { + if (!ignore_garbage && + last_chunk_options == last_chunk_handling_options::strict) { + uint32_t triple = (uint32_t(bufferptr[-3]) << 3 * 6) + + (uint32_t(bufferptr[-2]) << 2 * 6) + + (uint32_t(bufferptr[-1]) << 1 * 6); + if (triple & 0xff) { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + } + output_mask = (output_mask << 2) | 3; + output_len += 2; + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + } else if (!ignore_garbage && idx == 1 && + (!is_partial(last_chunk_options) || + (is_partial(last_chunk_options) && + padding_characters > 0))) { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else if (!ignore_garbage && idx == 0 && padding_characters > 0) { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {INVALID_BASE64_CHARACTER, equallocation, + size_t(dst - dstinit)}; + } else { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + } + } + if (!ignore_garbage && !is_partial(last_chunk_options) && + padding_characters > 0) { + size_t output_count = size_t(dst - dstinit); + if ((output_count % 3 == 0) || + ((output_count % 3) + 1 + padding_characters != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, output_count}; + } + } + return {SUCCESS, full_input_length, size_t(dst - dstinit)}; + } + + if (!ignore_garbage && padding_characters > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + padding_characters != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} +/* end file src/icelake/icelake_base64.inl.cpp */ +/* begin file src/icelake/icelake_find.inl.cpp */ +simdutf_really_inline const char *util_find(const char *start, const char *end, + char character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + const size_t step = 64; + __m512i char_vec = _mm512_set1_epi8(character); + + // Handle unaligned beginning with a masked load + uintptr_t misalignment = reinterpret_cast(start) % step; + if (misalignment != 0) { + size_t adjustment = step - misalignment; + if (size_t(end - start) < adjustment) { + adjustment = end - start; + } + __mmask64 load_mask = 0xFFFFFFFFFFFFFFFF >> (64 - adjustment); + __m512i data = _mm512_maskz_loadu_epi8( + load_mask, reinterpret_cast(start)); + __mmask64 match_mask = _mm512_cmpeq_epi8_mask(data, char_vec); + + if (match_mask != 0) { + size_t index = _tzcnt_u64(match_mask); + return start + index; + } + start += adjustment; + } + // Process 64 bytes (512 bits) at a time with AVX-512 + // Main loop for full 128-byte chunks + while (size_t(end - start) >= 2 * step) { + __m512i data1 = + _mm512_loadu_si512(reinterpret_cast(start)); + __mmask64 mask1 = _mm512_cmpeq_epi8_mask(data1, char_vec); + + __m512i data2 = + _mm512_loadu_si512(reinterpret_cast(start + step)); + __mmask64 mask2 = _mm512_cmpeq_epi8_mask(data2, char_vec); + if (!_kortestz_mask64_u8(mask1, mask2)) { + if (mask1 != 0) { + // Found a match, return the first one + size_t index = _tzcnt_u64(mask1); + return start + index; + } + size_t index = _tzcnt_u64(mask2); + return start + index + step; + } + start += 2 * step; + } + + // Main loop for full 64-byte chunks + while (size_t(end - start) >= step) { + __m512i data = _mm512_loadu_si512(reinterpret_cast(start)); + __mmask64 mask = _mm512_cmpeq_epi8_mask(data, char_vec); + + if (mask != 0) { + // Found a match, return the first one + size_t index = _tzcnt_u64(mask); + return start + index; + } + + start += step; + } + + // Handle remaining bytes with masked load + size_t remaining = end - start; + if (remaining > 0) { + // Create a mask for the remaining bytes using shifted 0xFFFFFFFFFFFFFFFF + __mmask64 load_mask = 0xFFFFFFFFFFFFFFFF >> (64 - remaining); + __m512i data = _mm512_maskz_loadu_epi8( + load_mask, reinterpret_cast(start)); + __mmask64 match_mask = _mm512_cmpeq_epi8_mask(data, char_vec); + + // Apply load mask to avoid false positives + match_mask &= load_mask; + + if (match_mask != 0) { + // Found a match in the remaining bytes + size_t index = _tzcnt_u64(match_mask); + return start + index; + } + } + + return end; +} + +simdutf_really_inline const char16_t *util_find(const char16_t *start, + const char16_t *end, + char16_t character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + + // Process 32 char16_t (64 bytes, 512 bits) at a time with AVX-512 + const size_t step = 32; + __m512i char_vec = _mm512_set1_epi16(character); + + // Handle unaligned beginning with a masked load + uintptr_t misalignment = + reinterpret_cast(start) % (step * sizeof(char16_t)); + if (misalignment != 0 && misalignment % 2 == 0) { + size_t adjustment = + (step * sizeof(char16_t) - misalignment) / sizeof(char16_t); + if (size_t(end - start) < adjustment) { + adjustment = end - start; + } + __mmask32 load_mask = 0xFFFFFFFF >> (32 - adjustment); + __m512i data = _mm512_maskz_loadu_epi16( + load_mask, reinterpret_cast(start)); + __mmask32 match_mask = _mm512_cmpeq_epi16_mask(data, char_vec); + + if (match_mask != 0) { + size_t index = _tzcnt_u32(match_mask); + return start + index; + } + start += adjustment; + } + + // Main loop for full 32-element chunks + while (size_t(end - start) >= step) { + __m512i data = _mm512_loadu_si512(reinterpret_cast(start)); + __mmask32 mask = _mm512_cmpeq_epi16_mask(data, char_vec); + + if (mask != 0) { + // Found a match, return the first one + size_t index = _tzcnt_u32(mask); + return start + index; + } + + start += step; + } + + // Handle remaining elements with masked load + size_t remaining = end - start; + if (remaining > 0) { + __mmask32 load_mask = 0xFFFFFFFF >> (32 - remaining); + __m512i data = _mm512_maskz_loadu_epi16( + load_mask, reinterpret_cast(start)); + __mmask32 match_mask = _mm512_cmpeq_epi16_mask(data, char_vec); + + if (match_mask != 0) { + size_t index = _tzcnt_u32(match_mask); + return start + index; + } + } + + return end; +} +/* end file src/icelake/icelake_find.inl.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 + +#include + +} // namespace +} // namespace icelake +} // namespace simdutf + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf32.h */ +#include + +namespace simdutf { +namespace icelake { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, + size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + const auto v_0000007f = vector_u32::splat(0x0000007f); + const auto v_000007ff = vector_u32::splat(0x000007ff); + const auto v_0000ffff = vector_u32::splat(0x0000ffff); +#else + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + const auto one = vector_u32::splat(1); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use vector of uint32 counters, this is why this limit is used + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in0 > v_0000007f); + acc -= as_vector_u32(in1 > v_0000007f); + acc -= as_vector_u32(in2 > v_0000007f); + acc -= as_vector_u32(in3 > v_0000007f); + + acc -= as_vector_u32(in0 > v_000007ff); + acc -= as_vector_u32(in1 > v_000007ff); + acc -= as_vector_u32(in2 > v_000007ff); + acc -= as_vector_u32(in3 > v_000007ff); + + acc -= as_vector_u32(in0 > v_0000ffff); + acc -= as_vector_u32(in1 > v_0000ffff); + acc -= as_vector_u32(in2 > v_0000ffff); + acc -= as_vector_u32(in3 > v_0000ffff); +#else + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in > v_0000007f); + acc -= as_vector_u32(in > v_000007ff); + acc -= as_vector_u32(in > v_0000ffff); +#else + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace icelake +} // namespace simdutf +/* end file src/generic/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +namespace simdutf { +namespace icelake { + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + + int out = 0; + uint32_t utf16_err = (length % 2); + uint32_t utf32_err = (length % 4); + uint32_t ends_with_high = 0; + avx512_utf8_checker checker{}; + const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); + __m512i currentmax = _mm512_setzero_si512(); + __m512i currentoffsetmax = _mm512_setzero_si512(); + const char *ptr = input; + const char *end = ptr + length; + for (; end - ptr >= 64; ptr += 64) { + // utf8 checks + const __m512i data = _mm512_loadu_si512((const __m512i *)ptr); + checker.check_next_input(data); + + // utf16le_checks + __m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates); + ends_with_high = ((highsurrogates & 0x80000000) != 0); + + // utf32le checks + currentoffsetmax = + _mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax); + currentmax = _mm512_max_epu32(data, currentmax); + } + + // last block with 0 <= len < 64 + __mmask64 read_mask = (__mmask64(1) << (end - ptr)) - 1; + const __m512i data = _mm512_maskz_loadu_epi8(read_mask, (const __m512i *)ptr); + checker.check_next_input(data); + + __m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates); + + currentoffsetmax = + _mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax); + currentmax = _mm512_max_epu32(data, currentmax); + + const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); + const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); + __m512i is_zero = + _mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax); + utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0); + is_zero = _mm512_xor_si512( + _mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); + utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0); + checker.check_eof(); + bool is_valid_utf8 = !checker.errors(); + if (is_valid_utf8) { + out |= encoding_type::UTF8; + } + if (utf16_err == 0) { + out |= encoding_type::UTF16_LE; + } + if (utf32_err == 0) { + out |= encoding_type::UTF32_LE; + } + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return true; + } + avx512_utf8_checker checker{}; + const char *ptr = buf; + const char *end = ptr + len; + for (; end - ptr >= 64; ptr += 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + checker.check_next_input(utf8); + } + if (end != ptr) { + const __m512i utf8 = _mm512_maskz_loadu_epi8( + ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr); + checker.check_next_input(utf8); + } + checker.check_eof(); + return !checker.errors(); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, len); + } + avx512_utf8_checker checker{}; + const char *ptr = buf; + const char *end = ptr + len; + size_t count{0}; + for (; end - ptr >= 64; ptr += 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + checker.check_next_input(utf8); + if (checker.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(buf), + reinterpret_cast(buf + count), len - count); + res.count += count; + return res; + } + count += 64; + } + if (end != ptr) { + const __m512i utf8 = _mm512_maskz_loadu_epi8( + ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr); + checker.check_next_input(utf8); + } + checker.check_eof(); + if (checker.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(buf), + reinterpret_cast(buf + count), len - count); + res.count += count; + return res; + } + return result(error_code::SUCCESS, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return icelake::validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + const char *buf_orig = buf; + const char *end = buf + len; + const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); + for (; end - buf >= 64; buf += 64) { + const __m512i input = _mm512_loadu_si512((const __m512i *)buf); + __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT); + if (notascii) { + return result(error_code::TOO_LARGE, + buf - buf_orig + _tzcnt_u64(notascii)); + } + } + if (end != buf) { + const __m512i input = _mm512_maskz_loadu_epi8( + ~UINT64_C(0) >> (64 - (end - buf)), (const __m512i *)buf); + __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT); + if (notascii) { + return result(error_code::TOO_LARGE, + buf - buf_orig + _tzcnt_u64(notascii)); + } + } + return result(error_code::SUCCESS, len); +} +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + const char16_t *end = buf + len; + __m512i limit = _mm512_set1_epi16(uint16_t(0x007F)); + for (; end - buf >= 32;) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + auto mask = _mm512_cmpgt_epu16_mask(in, limit); + if (mask) { + return false; + } + buf += 32; + } + if (buf < end) { + __m512i in = + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); + auto mask = _mm512_cmpgt_epu16_mask(in, limit); + if (mask) { + return false; + } + } + return true; +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + const char16_t *end = buf + len; + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + __m512i limit = _mm512_set1_epi16(uint16_t(0x007F)); + for (; end - buf >= 32;) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + in = _mm512_shuffle_epi8(in, byteflip); + auto mask = _mm512_cmpgt_epu16_mask(in, limit); + if (mask) { + return false; + } + buf += 32; + } + if (buf < end) { + __m512i in = + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); + in = _mm512_shuffle_epi8(in, byteflip); + auto mask = _mm512_cmpgt_epu16_mask(in, limit); + if (mask) { + return false; + } + } + return true; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + const char16_t *end = buf + len; + + // Optimized: Process 64 code units (2x 512-bit) per iteration + const __m512i surr_base = _mm512_set1_epi16(uint16_t(0xD800)); + const __m512i surr_range = _mm512_set1_epi16(uint16_t(0x0800)); + const __m512i high_range = _mm512_set1_epi16(uint16_t(0x0400)); + + for (; end - buf >= 64;) { + __m512i in_1 = _mm512_loadu_si512((__m512i *)buf); + __m512i in_2 = _mm512_loadu_si512((__m512i *)(buf + 32)); + + __m512i diff_1 = _mm512_sub_epi16(in_1, surr_base); + __m512i diff_2 = _mm512_sub_epi16(in_2, surr_base); + + __mmask32 surrogates_1 = _mm512_cmplt_epu16_mask(diff_1, surr_range); + __mmask32 surrogates_2 = _mm512_cmplt_epu16_mask(diff_2, surr_range); + + if (surrogates_1 | surrogates_2) { + __mmask32 highsurrogates_1 = _mm512_cmplt_epu16_mask(diff_1, high_range); + __mmask32 lowsurrogates_1 = surrogates_1 ^ highsurrogates_1; + + __mmask32 highsurrogates_2 = _mm512_cmplt_epu16_mask(diff_2, high_range); + __mmask32 lowsurrogates_2 = surrogates_2 ^ highsurrogates_2; + + // Validate first block: high must be followed by low + if ((highsurrogates_1 << 1) != lowsurrogates_1) { + return false; + } + + // Check boundary between blocks: if first block ends with high, second + // must start with low + bool ends_with_high_1 = ((highsurrogates_1 & 0x80000000) != 0); + bool starts_with_low_2 = ((lowsurrogates_2 & 0x1) != 0); + if (ends_with_high_1 && !starts_with_low_2) { + return false; + } + + // Validate second block (shift by 1 if first ended with high) + __mmask32 expected_low_2 = ends_with_high_1 + ? (highsurrogates_2 << 1) | 0x1 + : (highsurrogates_2 << 1); + if (expected_low_2 != lowsurrogates_2) { + return false; + } + + bool ends_with_high_2 = ((highsurrogates_2 & 0x80000000) != 0); + if (ends_with_high_2) { + buf += 63; // advance by 63 to start with high surrogate next round + } else { + buf += 64; + } + } else { + buf += 64; + } + } + + // Handle remaining 32-63 code units + for (; end - buf >= 32;) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + __m512i diff = _mm512_sub_epi16(in, surr_base); + __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, surr_range); + if (surrogates) { + __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, high_range); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + return false; + } + bool ends_with_high = ((highsurrogates & 0x80000000) != 0); + if (ends_with_high) { + buf += 31; // advance only by 31 code units so that we start with the + // high surrogate on the next round. + } else { + buf += 32; + } + } else { + buf += 32; + } + } + if (buf < end) { + __m512i in = + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + return false; + } + } + } + return true; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + const char16_t *end = buf + len; + + for (; end - buf >= 32;) { + __m512i in = _mm512_slli_epi32(_mm512_loadu_si512((__m512i *)buf), 8); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + return false; + } + bool ends_with_high = ((highsurrogates & 0x80000000) != 0); + if (ends_with_high) { + buf += 31; // advance only by 31 code units so that we start with the + // high surrogate on the next round. + } else { + buf += 32; + } + } else { + buf += 32; + } + } + if (buf < end) { + __m512i in = _mm512_slli_epi16( + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), 8); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + return false; + } + } + } + return true; +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + const char16_t *start_buf = buf; + const char16_t *end = buf + len; + for (; end - buf >= 32;) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); + uint32_t extra_high = + _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); + return result(error_code::SURROGATE, + (buf - start_buf) + + (extra_low < extra_high ? extra_low : extra_high)); + } + bool ends_with_high = ((highsurrogates & 0x80000000) != 0); + if (ends_with_high) { + buf += 31; // advance only by 31 code units so that we start with the + // high surrogate on the next round. + } else { + buf += 32; + } + } else { + buf += 32; + } + } + if (buf < end) { + __m512i in = + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); + uint32_t extra_high = + _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); + return result(error_code::SURROGATE, + (buf - start_buf) + + (extra_low < extra_high ? extra_low : extra_high)); + } + } + } + return result(error_code::SUCCESS, len); +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + const char16_t *start_buf = buf; + const char16_t *end = buf + len; + + for (; end - buf >= 32;) { + __m512i in = _mm512_slli_epi16(_mm512_loadu_si512((__m512i *)buf), 8); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); + uint32_t extra_high = + _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); + return result(error_code::SURROGATE, + (buf - start_buf) + + (extra_low < extra_high ? extra_low : extra_high)); + } + bool ends_with_high = ((highsurrogates & 0x80000000) != 0); + if (ends_with_high) { + buf += 31; // advance only by 31 code units so that we start with the + // high surrogate on the next round. + } else { + buf += 32; + } + } else { + buf += 32; + } + } + if (buf < end) { + __m512i in = _mm512_slli_epi16( + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), 8); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); + uint32_t extra_high = + _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); + return result(error_code::SURROGATE, + (buf - start_buf) + + (extra_low < extra_high ? extra_low : extra_high)); + } + } + } + return result(error_code::SUCCESS, len); +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16fix_avx512(input, len, output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16fix_avx512(input, len, output); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + return icelake::validate_utf32(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + const char32_t *buf_orig = buf; + if (len >= 16) { + const char32_t *end = buf + len - 16; + while (buf <= end) { + __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); + __mmask16 outside_range = _mm512_cmp_epu32_mask( + utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT); + + __m512i utf32_off = + _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); + + __mmask16 surrogate_range = _mm512_cmp_epu32_mask( + utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT); + if ((outside_range | surrogate_range)) { + auto outside_idx = _tzcnt_u32(outside_range); + auto surrogate_idx = _tzcnt_u32(surrogate_range); + + if (outside_idx < surrogate_idx) { + return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); + } + + return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); + } + + buf += 16; + } + } + if (len > 0) { + __m512i utf32 = _mm512_maskz_loadu_epi32( + __mmask16((1U << (buf_orig + len - buf)) - 1), (const __m512i *)buf); + __mmask16 outside_range = _mm512_cmp_epu32_mask( + utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT); + __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); + + __mmask16 surrogate_range = _mm512_cmp_epu32_mask( + utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT); + if ((outside_range | surrogate_range)) { + auto outside_idx = _tzcnt_u32(outside_range); + auto surrogate_idx = _tzcnt_u32(surrogate_range); + + if (outside_idx < surrogate_idx) { + return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); + } + + return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); + } + } + + return result(error_code::SUCCESS, len); +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return icelake_convert_latin1_to_utf16(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return icelake_convert_latin1_to_utf16(buf, len, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + avx512_convert_latin1_to_utf32(buf, len, utf32_output); + return len; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return icelake::utf8_to_latin1_avx512(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + // First, try to convert as much as possible using the SIMD implementation. + const char *obuf = buf; + char *olatin1_output = latin1_output; + size_t written = icelake::utf8_to_latin1_avx512(obuf, len, olatin1_output); + + // If we have completely converted the string + if (obuf == buf + len) { + return {simdutf::SUCCESS, written}; + } + size_t pos = obuf - buf; + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, buf + pos, len - pos, latin1_output); + res.count += pos; + return res; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16_result ret = + fast_avx512_convert_utf8_to_utf16(buf, len, + utf16_output); + if (ret.second == nullptr) { + return 0; + } + return ret.second - utf16_output; +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16( + buf, len, utf16_output); + if (ret.second == nullptr) { + return 0; + } + return ret.second - utf16_output; +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return fast_avx512_convert_utf8_to_utf16_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return fast_avx512_convert_utf8_to_utf16_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16_result ret = + icelake::valid_utf8_to_fixed_length( + buf, len, utf16_output); + size_t saved_bytes = ret.second - utf16_output; + const char *end = buf + len; + if (ret.first == end) { + return saved_bytes; + } + + // Note: AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outsiede 16-byte window. + // It meas, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { + ret.first += 1; + } + + if (ret.first != end) { + const size_t scalar_saved_bytes = + scalar::utf8_to_utf16::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16_result ret = + icelake::valid_utf8_to_fixed_length( + buf, len, utf16_output); + size_t saved_bytes = ret.second - utf16_output; + const char *end = buf + len; + if (ret.first == end) { + return saved_bytes; + } + + // Note: AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outsiede 16-byte window. + // It meas, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { + ret.first += 1; + } + + if (ret.first != end) { + const size_t scalar_saved_bytes = + scalar::utf8_to_utf16::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + + return saved_bytes; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_out) const noexcept { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + utf8_to_utf32_result ret = + icelake::validating_utf8_to_fixed_length( + buf, len, utf32_output); + if (ret.second == nullptr) + return 0; + + size_t saved_bytes = ret.second - utf32_output; + const char *end = buf + len; + if (ret.first == end) { + return saved_bytes; + } + + // Note: the AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outside 16-byte window. + // It means, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { + ret.first += 1; + } + if (ret.first != end) { + const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert( + ret.first, len - (ret.first - buf), utf32_out + saved_bytes); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32) const noexcept { + if (simdutf_unlikely(len == 0)) { + return {error_code::SUCCESS, 0}; + } + uint32_t *utf32_output = reinterpret_cast(utf32); + auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks< + endianness::LITTLE, uint32_t>(buf, len, utf32_output); + + if (!std::get<2>(ret)) { + size_t pos = std::get<0>(ret) - buf; + // We might have an error that occurs right before pos. + // This is only a concern if buf[pos] is not a continuation byte. + if ((buf[pos] & 0xc0) != 0x80 && pos >= 64) { + pos -= 1; + } else if ((buf[pos] & 0xc0) == 0x80 && pos >= 64) { + // We must check whether we are the fourth continuation byte + bool c1 = (buf[pos - 1] & 0xc0) == 0x80; + bool c2 = (buf[pos - 2] & 0xc0) == 0x80; + bool c3 = (buf[pos - 3] & 0xc0) == 0x80; + if (c1 && c2 && c3) { + return {simdutf::TOO_LONG, pos}; + } + } + // todo: we reset the output to utf32 instead of using std::get<2.(ret) as + // you'd expect. that is because + // validating_utf8_to_fixed_length_with_constant_checks may have processed + // data beyond the error. + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, buf + pos, len - pos, utf32); + res.count += pos; + return res; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + const char *end = buf + len; + if (std::get<0>(ret) == end) { + return {simdutf::SUCCESS, saved_bytes}; + } + + // Note: the AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outside 16-byte window. + // It means, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (std::get<0>(ret) != end and + ((uint8_t(*std::get<0>(ret)) & 0xc0) == 0x80)) { + std::get<0>(ret) += 1; + } + + if (std::get<0>(ret) != end) { + auto scalar_result = scalar::utf8_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), + reinterpret_cast(utf32_output) + saved_bytes); + if (scalar_result.error != simdutf::SUCCESS) { + scalar_result.count += (std::get<0>(ret) - buf); + } else { + scalar_result.count += saved_bytes; + } + return scalar_result; + } + + return {simdutf::SUCCESS, size_t(std::get<1>(ret) - utf32_output)}; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_out) const noexcept { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + utf8_to_utf32_result ret = + icelake::valid_utf8_to_fixed_length( + buf, len, utf32_output); + size_t saved_bytes = ret.second - utf32_output; + const char *end = buf + len; + if (ret.first == end) { + return saved_bytes; + } + + // Note: AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outsiede 16-byte window. + // It meas, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { + ret.first += 1; + } + + if (ret.first != end) { + const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert_valid( + ret.first, len - (ret.first - buf), utf32_out + saved_bytes); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + + return saved_bytes; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf16_to_latin1(buf, len, + latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf16_to_latin1(buf, len, + latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output) + .first; +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output) + .first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement custom function + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement custom function + return convert_utf16le_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + size_t outlen; + size_t inlen = utf16_to_utf8_avx512i( + buf, len, (unsigned char *)utf8_output, &outlen); + if (inlen != len) { + return 0; + } + return outlen; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + size_t outlen; + size_t inlen = utf16_to_utf8_avx512i( + buf, len, (unsigned char *)utf8_output, &outlen); + if (inlen != len) { + return 0; + } + return outlen; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + size_t outlen; + size_t inlen = utf16_to_utf8_avx512i( + buf, len, (unsigned char *)utf8_output, &outlen); + if (inlen != len) { + result res = scalar::utf16_to_utf8::convert_with_errors( + buf + inlen, len - inlen, utf8_output + outlen); + res.count += inlen; + return res; + } + return {simdutf::SUCCESS, outlen}; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + size_t outlen; + size_t inlen = utf16_to_utf8_avx512i( + buf, len, (unsigned char *)utf8_output, &outlen); + if (inlen != len) { + result res = scalar::utf16_to_utf8::convert_with_errors( + buf + inlen, len - inlen, utf8_output + outlen); + res.count += inlen; + return res; + } + return {simdutf::SUCCESS, outlen}; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf32_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf32_to_latin1_with_errors(buf, len, latin1_output) + .first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf32_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + avx512_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + icelake::avx512_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf32_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx512_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx512_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + avx512_convert_utf32_to_utf16_with_errors( + buf, len, utf16_output); + if (ret.first.error) { + return ret.first; + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + avx512_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.error) { + return ret.first; + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, + utf32_output); + if (!std::get<2>(ret)) { + return 0; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, utf32_output); + if (!std::get<2>(ret)) { + return 0; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, + utf32_output); + if (!std::get<2>(ret)) { + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + scalar_res.count += (std::get<0>(ret) - buf); + return scalar_res; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_res.error) { + scalar_res.count += (std::get<0>(ret) - buf); + return scalar_res; + } else { + scalar_res.count += saved_bytes; + return scalar_res; + } + } + return simdutf::result(simdutf::SUCCESS, saved_bytes); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, utf32_output); + if (!std::get<2>(ret)) { + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + scalar_res.count += (std::get<0>(ret) - buf); + return scalar_res; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_res.error) { + scalar_res.count += (std::get<0>(ret) - buf); + return scalar_res; + } else { + scalar_res.count += saved_bytes; + return scalar_res; + } + } + return simdutf::result(simdutf::SUCCESS, saved_bytes); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, + utf32_output); + if (!std::get<2>(ret)) { + return 0; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, utf32_output); + if (!std::get<2>(ret)) { + return 0; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + size_t pos = 0; + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + while (pos + 32 <= length) { + __m512i utf16 = _mm512_loadu_si512((const __m512i *)(input + pos)); + utf16 = _mm512_shuffle_epi8(utf16, byteflip); + _mm512_storeu_si512(output + pos, utf16); + pos += 32; + } + if (pos < length) { + __mmask32 m((1U << (length - pos)) - 1); + __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i *)(input + pos)); + utf16 = _mm512_shuffle_epi8(utf16, byteflip); + _mm512_mask_storeu_epi16(output + pos, m, utf16); + } +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + const char16_t *ptr = input; + size_t count{0}; + + if (length >= 32) { + const char16_t *end = input + length - 32; + + const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00); + const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff); + + while (ptr <= end) { + __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr); + ptr += 32; + uint64_t not_high_surrogate = + static_cast(_mm512_cmpgt_epu16_mask(utf16, high) | + _mm512_cmplt_epu16_mask(utf16, low)); + count += count_ones(not_high_surrogate); + } + } + + return count + scalar::utf16::count_code_points( + ptr, length - (ptr - input)); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + const char16_t *ptr = input; + size_t count{0}; + if (length >= 32) { + + const char16_t *end = input + length - 32; + + const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00); + const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff); + + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + while (ptr <= end) { + __m512i utf16 = + _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)ptr), byteflip); + ptr += 32; + uint64_t not_high_surrogate = + static_cast(_mm512_cmpgt_epu16_mask(utf16, high) | + _mm512_cmplt_epu16_mask(utf16, low)); + count += count_ones(not_high_surrogate); + } + } + + return count + scalar::utf16::count_code_points( + ptr, length - (ptr - input)); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + const uint8_t *str = reinterpret_cast(input); + size_t answer = + length / sizeof(__m512i) * + sizeof(__m512i); // Number of 512-bit chunks that fits into the length. + size_t i = 0; + __m512i unrolled_popcount{0}; + + const __m512i continuation = _mm512_set1_epi8(char(0b10111111)); + + while (i + sizeof(__m512i) <= length) { + size_t iterations = (length - i) / sizeof(__m512i); + + size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i); + for (; i + 8 * sizeof(__m512i) <= max_i; i += 8 * sizeof(__m512i)) { + __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i)); + __m512i input2 = + _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i))); + __m512i input3 = + _mm512_loadu_si512((const __m512i *)(str + i + 2 * sizeof(__m512i))); + __m512i input4 = + _mm512_loadu_si512((const __m512i *)(str + i + 3 * sizeof(__m512i))); + __m512i input5 = + _mm512_loadu_si512((const __m512i *)(str + i + 4 * sizeof(__m512i))); + __m512i input6 = + _mm512_loadu_si512((const __m512i *)(str + i + 5 * sizeof(__m512i))); + __m512i input7 = + _mm512_loadu_si512((const __m512i *)(str + i + 6 * sizeof(__m512i))); + __m512i input8 = + _mm512_loadu_si512((const __m512i *)(str + i + 7 * sizeof(__m512i))); + + __mmask64 mask1 = _mm512_cmple_epi8_mask(input1, continuation); + __mmask64 mask2 = _mm512_cmple_epi8_mask(input2, continuation); + __mmask64 mask3 = _mm512_cmple_epi8_mask(input3, continuation); + __mmask64 mask4 = _mm512_cmple_epi8_mask(input4, continuation); + __mmask64 mask5 = _mm512_cmple_epi8_mask(input5, continuation); + __mmask64 mask6 = _mm512_cmple_epi8_mask(input6, continuation); + __mmask64 mask7 = _mm512_cmple_epi8_mask(input7, continuation); + __mmask64 mask8 = _mm512_cmple_epi8_mask(input8, continuation); + + __m512i mask_register = _mm512_set_epi64(mask8, mask7, mask6, mask5, + mask4, mask3, mask2, mask1); + + unrolled_popcount = _mm512_add_epi64(unrolled_popcount, + _mm512_popcnt_epi64(mask_register)); + } + + for (; i <= max_i; i += sizeof(__m512i)) { + __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i)); + uint64_t continuation_bitmask = static_cast( + _mm512_cmple_epi8_mask(more_input, continuation)); + answer -= count_ones(continuation_bitmask); + } + } + + answer -= _mm512_reduce_add_epi64(unrolled_popcount); + + return answer + scalar::utf8::count_code_points( + reinterpret_cast(str + i), length - i); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return icelake_utf8_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return icelake_utf8_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return implementation::count_utf16le(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return implementation::count_utf16be(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + const uint8_t *str = reinterpret_cast(input); + size_t answer = length / sizeof(__m512i) * sizeof(__m512i); + size_t i = 0; + if (answer >= 2048) // long strings optimization + { + unsigned char v_0xFF = 0xff; + __m512i eight_64bits = _mm512_setzero_si512(); + while (i + sizeof(__m512i) <= length) { + __m512i runner = _mm512_setzero_si512(); + size_t iterations = (length - i) / sizeof(__m512i); + if (iterations > 255) { + iterations = 255; + } + size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i); + for (; i + 4 * sizeof(__m512i) <= max_i; i += 4 * sizeof(__m512i)) { + // Load four __m512i vectors + __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i)); + __m512i input2 = + _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i))); + __m512i input3 = _mm512_loadu_si512( + (const __m512i *)(str + i + 2 * sizeof(__m512i))); + __m512i input4 = _mm512_loadu_si512( + (const __m512i *)(str + i + 3 * sizeof(__m512i))); + + // Generate four masks + __mmask64 mask1 = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input1); + __mmask64 mask2 = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input2); + __mmask64 mask3 = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input3); + __mmask64 mask4 = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input4); + // Apply the masks and subtract from the runner + __m512i not_ascii1 = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask1, v_0xFF); + __m512i not_ascii2 = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask2, v_0xFF); + __m512i not_ascii3 = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask3, v_0xFF); + __m512i not_ascii4 = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask4, v_0xFF); + + runner = _mm512_sub_epi8(runner, not_ascii1); + runner = _mm512_sub_epi8(runner, not_ascii2); + runner = _mm512_sub_epi8(runner, not_ascii3); + runner = _mm512_sub_epi8(runner, not_ascii4); + } + + for (; i <= max_i; i += sizeof(__m512i)) { + __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i)); + + __mmask64 mask = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), more_input); + __m512i not_ascii = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask, v_0xFF); + runner = _mm512_sub_epi8(runner, not_ascii); + } + + eight_64bits = _mm512_add_epi64( + eight_64bits, _mm512_sad_epu8(runner, _mm512_setzero_si512())); + } + + answer += _mm512_reduce_add_epi64(eight_64bits); + } else if (answer > 0) { + for (; i + sizeof(__m512i) <= length; i += sizeof(__m512i)) { + __m512i latin = _mm512_loadu_si512((const __m512i *)(str + i)); + uint64_t non_ascii = _mm512_movepi8_mask(latin); + answer += count_ones(non_ascii); + } + } + return answer + scalar::latin1::utf8_length_from_latin1( + reinterpret_cast(str + i), length - i); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + size_t pos = 0; + + // UTF-16 char length based on the four most significant bits of UTF-8 bytes + const __m128i utf8_length_128 = _mm_setr_epi8( + // ASCII chars + /* 0000 */ 1, + /* 0001 */ 1, + /* 0010 */ 1, + /* 0011 */ 1, + /* 0100 */ 1, + /* 0101 */ 1, + /* 0110 */ 1, + /* 0111 */ 1, + + // continuation bytes + /* 1000 */ 0, + /* 1001 */ 0, + /* 1010 */ 0, + /* 1011 */ 0, + + // leading bytes + /* 1100 */ 1, // 2-byte UTF-8 char => 1 UTF-16 word + /* 1101 */ 1, // 2-byte UTF-8 char => 1 UTF-16 word + /* 1110 */ 1, // 3-byte UTF-8 char => 1 UTF-16 word + /* 1111 */ 2 // 4-byte UTF-8 char => 2 UTF-16 words (surrogate pair) + ); + + const __m512i char_length = broadcast_128bit_lane(utf8_length_128); + + constexpr size_t max_iterations = 255 / 2; + + size_t iterations = 0; + const auto zero = _mm512_setzero_si512(); + __m512i local = _mm512_setzero_si512(); // byte-wise counters + __m512i counters = _mm512_setzero_si512(); // 64-bit counters + for (; pos + 64 <= length; pos += 64) { + __m512i utf8 = _mm512_loadu_si512((const __m512i *)(input + pos)); + const auto t0 = _mm512_srli_epi32(utf8, 4); + const auto t1 = _mm512_and_si512(t0, _mm512_set1_epi8(0xf)); + const auto t2 = _mm512_shuffle_epi8(char_length, t1); + local = _mm512_add_epi8(local, t2); + + iterations += 1; + if (iterations == max_iterations) { + counters = _mm512_add_epi64(counters, _mm512_sad_epu8(local, zero)); + local = zero; + iterations = 0; + } + } + + size_t count = 0; + + if (pos > 0) { + // don't waste time for short strings + if (iterations > 0) { + counters = _mm512_add_epi64(counters, _mm512_sad_epu8(local, zero)); + } + + const auto l0 = _mm512_extracti32x4_epi32(counters, 0); + const auto l1 = _mm512_extracti32x4_epi32(counters, 1); + const auto l2 = _mm512_extracti32x4_epi32(counters, 2); + const auto l3 = _mm512_extracti32x4_epi32(counters, 3); + + const auto sum = + _mm_add_epi64(_mm_add_epi64(l0, l1), _mm_add_epi64(l2, l3)); + + count = uint64_t(_mm_extract_epi64(sum, 0)) + + uint64_t(_mm_extract_epi64(sum, 1)); + } + + return count + + scalar::utf8::utf16_length_from_utf8(input + pos, length - pos); +} +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return icelake_utf8_length_from_utf16_with_replacement( + input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return icelake_utf8_length_from_utf16_with_replacement( + input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return utf32::utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const char32_t *ptr = input; + size_t count{0}; + + if (length >= 16) { + const char32_t *end = input + length - 16; + + const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff); + + while (ptr <= end) { + __m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr); + ptr += 16; + __mmask16 surrogates_bitmask = + _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff); + + count += 16 + count_ones(surrogates_bitmask); + } + } + + return count + + scalar::utf32::utf16_length_from_utf32(ptr, length - (ptr - input)); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return implementation::count_utf8(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64_impl(output, input, length, options, + line_length); + } else { + return encode_base64_impl(output, input, length, options, + line_length); + } +} + +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + return util_find(start, end, character); +} +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + return util_find(start, end, character); +} +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace icelake +} // namespace simdutf + +/* begin file src/simdutf/icelake/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/icelake/end.h */ +/* end file src/icelake/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_HASWELL +/* begin file src/haswell/implementation.cpp */ +/* begin file src/simdutf/haswell/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "haswell" +// #define SIMDUTF_IMPLEMENTATION haswell +#define SIMDUTF_SIMD_HAS_BYTEMASK 1 + +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_TARGET_HASWELL +#endif + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +// clang-format off +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +// clang-format on +#endif // end of workaround +/* end file src/simdutf/haswell/begin.h */ + +namespace simdutf { +namespace haswell { +namespace { +#ifndef SIMDUTF_HASWELL_H + #error "haswell.h must be included" +#endif +using namespace simd; + +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + return input.reduce_or().is_ascii(); +} +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = + prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be > 0x80 + simd8 is_fourth_byte = + prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be > 0x80 + return simd8(is_third_byte | is_fourth_byte); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +namespace utf16 { +/* begin file src/haswell/avx2_validate_utf16.cpp */ +template +simd8 utf16_gather_high_bytes(const simd16 &in0, + const simd16 &in1) { + if (big_endian) { + // we want lower bytes + const auto mask = simd16(0x00ff); + const auto t0 = in0 & mask; + const auto t1 = in1 & mask; + + return simd16::pack(t0, t1); + } else { + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + + return simd16::pack(t0, t1); + } +} +/* end file src/haswell/avx2_validate_utf16.cpp */ +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/haswell/avx2_utf16fix.cpp */ +/* + * Process one block of 16 characters. If in_place is false, + * copy the block from in to out. If there is a sequencing + * error in the block, overwrite the illsequenced characters + * with the replacement character. This function reads one + * character before the beginning of the buffer as a lookback. + * If that character is illsequenced, it too is overwritten. + */ +template +void utf16fix_block(char16_t *out, const char16_t *in) { + auto swap_if_needed = [](uint16_t x) simdutf_constexpr -> uint16_t { + return scalar::utf16::swap_if_needed(x); + }; + const char16_t replacement = scalar::utf16::replacement(); + __m256i lookback, block, lb_masked, block_masked, lb_is_high, block_is_low; + __m256i illseq, lb_illseq, block_illseq, lb_illseq_shifted; + + lookback = _mm256_loadu_si256((const __m256i *)(in - 1)); + block = _mm256_loadu_si256((const __m256i *)in); + lb_masked = + _mm256_and_si256(lookback, _mm256_set1_epi16(swap_if_needed(0xfc00u))); + block_masked = + _mm256_and_si256(block, _mm256_set1_epi16(swap_if_needed(0xfc00u))); + lb_is_high = + _mm256_cmpeq_epi16(lb_masked, _mm256_set1_epi16(swap_if_needed(0xd800u))); + block_is_low = _mm256_cmpeq_epi16(block_masked, + _mm256_set1_epi16(swap_if_needed(0xdc00u))); + + illseq = _mm256_xor_si256(lb_is_high, block_is_low); + if (!_mm256_testz_si256(illseq, illseq)) { + int lb; + + /* compute the cause of the illegal sequencing */ + lb_illseq = _mm256_andnot_si256(block_is_low, lb_is_high); +#if SIMDUTF_GCC9OROLDER + // Old GCC versions are missing _mm256_zextsi128_si256, so we emulate it. + __m128i tmp_legacygcc = + _mm_bslli_si128(_mm256_extracti128_si256(lb_illseq, 1), 14); + __m256i tmp_legacygcc256 = + _mm256_set_m128i(_mm_setzero_si128(), tmp_legacygcc); + lb_illseq_shifted = + _mm256_or_si256(_mm256_bsrli_epi128(lb_illseq, 2), tmp_legacygcc256); +#else + lb_illseq_shifted = + _mm256_or_si256(_mm256_bsrli_epi128(lb_illseq, 2), + _mm256_zextsi128_si256(_mm_bslli_si128( + _mm256_extracti128_si256(lb_illseq, 1), 14))); +#endif // SIMDUTF_GCC9OROLDER + block_illseq = _mm256_or_si256( + _mm256_andnot_si256(lb_is_high, block_is_low), lb_illseq_shifted); + + /* fix illegal sequencing in the lookback */ +#if SIMDUTF_GCC10 || SIMDUTF_GCC9OROLDER + // GCC 10 is missing important intrinsics. + lb = _mm_cvtsi128_si32(_mm256_extractf128_si256(lb_illseq, 0)); +#else + lb = _mm256_cvtsi256_si32(lb_illseq); +#endif + lb = (lb & replacement) | (~lb & out[-1]); + out[-1] = char16_t(lb); + + /* fix illegal sequencing in the main block */ + block = + _mm256_blendv_epi8(block, _mm256_set1_epi16(replacement), block_illseq); + _mm256_storeu_si256((__m256i *)out, block); + } else if (!in_place) { + _mm256_storeu_si256((__m256i *)out, block); + } +} + +template +void utf16fix_block_sse(char16_t *out, const char16_t *in) { + auto swap_if_needed = [](uint16_t x) simdutf_constexpr -> uint16_t { + return scalar::utf16::swap_if_needed(x); + }; + const char16_t replacement = scalar::utf16::replacement(); + __m128i lookback, block, lb_masked, block_masked, lb_is_high, block_is_low; + __m128i illseq, lb_illseq, block_illseq; + + lookback = _mm_loadu_si128((const __m128i *)(in - 1)); + block = _mm_loadu_si128((const __m128i *)in); + lb_masked = _mm_and_si128(lookback, _mm_set1_epi16(swap_if_needed(0xfc00U))); + block_masked = _mm_and_si128(block, _mm_set1_epi16(swap_if_needed(0xfc00U))); + lb_is_high = + _mm_cmpeq_epi16(lb_masked, _mm_set1_epi16(swap_if_needed(0xd800U))); + block_is_low = + _mm_cmpeq_epi16(block_masked, _mm_set1_epi16(swap_if_needed(0xdc00U))); + + illseq = _mm_xor_si128(lb_is_high, block_is_low); + if (_mm_movemask_epi8(illseq) != 0) { + /* compute the cause of the illegal sequencing */ + lb_illseq = _mm_andnot_si128(block_is_low, lb_is_high); + block_illseq = _mm_or_si128(_mm_andnot_si128(lb_is_high, block_is_low), + _mm_bsrli_si128(lb_illseq, 2)); + /* fix illegal sequencing in the lookback */ + int lb = _mm_cvtsi128_si32(lb_illseq); + lb = (lb & replacement) | (~lb & out[-1]); + out[-1] = char16_t(lb); + /* fix illegal sequencing in the main block */ + block = + _mm_or_si128(_mm_andnot_si128(block_illseq, block), + _mm_and_si128(block_illseq, _mm_set1_epi16(replacement))); + _mm_storeu_si128((__m128i *)out, block); + } else if (!in_place) { + _mm_storeu_si128((__m128i *)out, block); + } +} + +template +void utf16fix_sse(const char16_t *in, size_t n, char16_t *out) { + const char16_t replacement = scalar::utf16::replacement(); + size_t i; + + if (n < 9) { + scalar::utf16::to_well_formed_utf16(in, n, out); + return; + } + + out[0] = + scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; + + /* duplicate code to have the compiler specialise utf16fix_block() */ + if (in == out) { + for (i = 1; i + 8 < n; i += 8) { + utf16fix_block_sse(out + i, in + i); + } + + utf16fix_block_sse(out + n - 8, in + n - 8); + } else { + for (i = 1; i + 8 < n; i += 8) { + utf16fix_block_sse(out + i, in + i); + } + + utf16fix_block_sse(out + n - 8, in + n - 8); + } + + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} + +template +void utf16fix_avx(const char16_t *in, size_t n, char16_t *out) { + const char16_t replacement = scalar::utf16::replacement(); + size_t i; + + if (n < 17) { + utf16fix_sse(in, n, out); + return; + } + + out[0] = + scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; + + /* duplicate code to have the compiler specialise utf16fix_block() */ + if (in == out) { + for (i = 1; i + 16 < n; i += 16) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - 16, in + n - 16); + } else { + for (i = 1; i + 16 < n; i += 16) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - 16, in + n - 16); + } + + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} +/* end file src/haswell/avx2_utf16fix.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/haswell/avx2_convert_latin1_to_utf8.cpp */ +std::pair +avx2_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_output) { + const char *end = latin1_input + len; + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); + const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); + const size_t safety_margin = 12; + + while (end - latin1_input >= std::ptrdiff_t(16 + safety_margin)) { + __m128i in8 = _mm_loadu_si128((__m128i *)latin1_input); + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + const __m128i v_80 = _mm_set1_epi8((char)0x80); + if (_mm_testz_si128(in8, v_80)) { // ASCII fast path!!!! + // 1. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, in8); + // 2. adjust pointers + latin1_input += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // We proceed only with the first 16 bytes. + const __m256i in = _mm256_cvtepu8_epi16((in8)); + + // 1. prepare 2-byte values + // input 16-bit word : [0000|0000|aabb|bbbb] x 8 + // expected output : [1100|00aa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + + // t0 = [0000|00aa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in, 2); + // t1 = [0000|00aa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [1100|00aa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> 16)] + [0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + latin1_input += 16; + continue; + + } // while + return std::make_pair(latin1_input, utf8_output); +} +/* end file src/haswell/avx2_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/haswell/avx2_convert_latin1_to_utf16.cpp */ +template +std::pair +avx2_convert_latin1_to_utf16(const char *latin1_input, size_t len, + char16_t *utf16_output) { + size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 + + size_t i = 0; + for (; i < rounded_len; i += 16) { + // Load 16 bytes from the address (input + i) into a xmm register + const __m128i latin1 = + _mm_loadu_si128(reinterpret_cast(latin1_input + i)); + + // Zero extend each byte in `in` to word + __m256i utf16 = _mm256_cvtepu8_epi16(latin1); + + if (big_endian) { + const __m128i swap128 = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m256i swap = _mm256_set_m128i(swap128, swap128); + utf16 = _mm256_shuffle_epi8(utf16, swap); + } + + // Store the contents of xmm1 into the address pointed by (output + i) + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf16_output + i), utf16); + } + + return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); +} +/* end file src/haswell/avx2_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/haswell/avx2_convert_latin1_to_utf32.cpp */ +std::pair +avx2_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + size_t rounded_len = ((len | 7) ^ 7); // Round down to nearest multiple of 8 + + for (size_t i = 0; i < rounded_len; i += 8) { + // Load 8 Latin1 characters into a 64-bit register + __m128i in = _mm_loadl_epi64((__m128i *)&buf[i]); + + // Zero extend each set of 8 Latin1 characters to 8 32-bit integers using + // vpmovzxbd + __m256i out = _mm256_cvtepu8_epi32(in); + + // Store the results back to memory + _mm256_storeu_si256((__m256i *)&utf32_output[i], out); + } + + // return pointers pointing to where we left off + return std::make_pair(buf + rounded_len, utf32_output + rounded_len); +} +/* end file src/haswell/avx2_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/haswell/avx2_convert_utf8_to_utf16.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i in = _mm_loadu_si128((__m128i *)input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + __m256i ascii = _mm256_cvtepu8_epi16(in); + if (big_endian) { + const __m256i swap256 = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + ascii = _mm256_shuffle_epi8(ascii, swap256); + } + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf16_output), ascii); + utf16_output += 12; // We wrote 12 16-bit characters. + return 12; // We consumed 12 bytes. + } + if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte + // UTF-16 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + if (big_endian) + composed = _mm_shuffle_epi8(composed, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed); + utf16_output += 8; // We wrote 16 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + __m128i composed_repacked = _mm_packus_epi32(composed, composed); + if (big_endian) + composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); + utf16_output += 4; + return 12; + } + + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const __m128i sh = _mm_loadu_si128( + (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + if (big_endian) + composed = _mm_shuffle_epi8(composed, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed); + utf16_output += 6; // We wrote 12 bytes, 6 code points. There is a potential + // overflow of 4 bytes. + } else if (idx < 145) { + // FOUR (4) input code-code units + const __m128i sh = _mm_loadu_si128( + (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + __m128i composed_repacked = _mm_packus_epi32(composed, composed); + if (big_endian) + composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); + utf16_output += 4; // Here we overflow by 8 bytes. + } else if (idx < 209) { + // TWO (2) input code-code units + ////////////// + // There might be garbage inputs where a leading byte mascarades as a + // four-byte leading byte (by being followed by 3 continuation byte), but is + // not greater than 0xf0. This could trigger a buffer overflow if we only + // counted leading bytes of the form 0xf0 as generating surrogate pairs, + // without further UTF-8 validation. Thus we must be careful to ensure that + // only leading bytes at least as large as 0xf0 generate surrogate pairs. We + // do as at the cost of an extra mask. + ///////////// + const __m128i sh = _mm_loadu_si128( + (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); + const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); + // correct for spurious high bit + const __m128i correct = + _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); + middlehighbyte = _mm_xor_si128(correct, middlehighbyte); + const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); + // We deliberately carry the leading four bits in highbyte if they are + // present, we remove them later when computing hightenbits. + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); + // When we need to generate a surrogate pair (leading byte > 0xF0), then + // the corresponding 32-bit value in 'composed' will be greater than + // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the + // location of the surrogate pairs. + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), + _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); + const __m128i composedminus = + _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); + const __m128i lowtenbits = + _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); + // Notice the 0x3ff mask: + const __m128i hightenbits = + _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); + const __m128i lowtenbitsadd = + _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); + const __m128i hightenbitsadd = + _mm_add_epi32(hightenbits, _mm_set1_epi32(0xD800)); + const __m128i lowtenbitsaddshifted = _mm_slli_epi32(lowtenbitsadd, 16); + __m128i surrogates = _mm_or_si128(hightenbitsadd, lowtenbitsaddshifted); + uint32_t basic_buffer[4]; + uint32_t basic_buffer_swap[4]; + if (big_endian) { + _mm_storeu_si128((__m128i *)basic_buffer_swap, + _mm_shuffle_epi8(composed, swap)); + surrogates = _mm_shuffle_epi8(surrogates, swap); + } + _mm_storeu_si128((__m128i *)basic_buffer, composed); + uint32_t surrogate_buffer[4]; + _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); + for (size_t i = 0; i < 3; i++) { + if (basic_buffer[i] > 0x3c00000) { + utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); + utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); + utf16_output += 2; + } else { + utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) + : uint16_t(basic_buffer[i]); + utf16_output++; + } + } + } else { + // here we know that there is an error but we do not handle errors + } + return consumed; +} +/* end file src/haswell/avx2_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/haswell/avx2_convert_utf8_to_utf32.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const __m128i in = _mm_loadu_si128((__m128i *)input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), + _mm256_cvtepu8_epi32(in)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output + 8), + _mm256_cvtepu8_epi32(_mm_srli_si128(in, 8))); + utf32_output += 12; // We wrote 12 32-bit characters. + return 12; // We consumed 12 bytes. + } + if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte + // UTF-32 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + _mm256_storeu_si256((__m256i *)utf32_output, + _mm256_cvtepu16_epi32(composed)); + utf32_output += 8; // We wrote 16 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += 4; + return 12; + } + /// We do not have a fast path available, so we fallback. + + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + _mm256_storeu_si256((__m256i *)utf32_output, + _mm256_cvtepu16_epi32(composed)); + utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential + // overflow of 32 - 24 = 8 bytes. + } else if (idx < 145) { + // FOUR (4) input code-code units + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += 4; + } else if (idx < 209) { + // TWO (2) input code-code units + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); + const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); + // correct for spurious high bit + const __m128i correct = + _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); + middlehighbyte = _mm_xor_si128(correct, middlehighbyte); + const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), + _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += + 3; // We wrote 3 * 4 bytes, there is a potential overflow of 4 bytes. + } else { + // here we know that there is an error but we do not handle errors + } + return consumed; +} +/* end file src/haswell/avx2_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/haswell/avx2_convert_utf16_to_latin1.cpp */ +template +std::pair +avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 32) { + // Load 16 UTF-16 characters into 256-bit AVX2 register + __m256i in0 = _mm256_loadu_si256(reinterpret_cast(buf)); + __m256i in1 = + _mm256_loadu_si256(reinterpret_cast(buf + 16)); + + if simdutf_constexpr (!match_system(big_endian)) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in0 = _mm256_shuffle_epi8(in0, swap); + in1 = _mm256_shuffle_epi8(in1, swap); + } + + __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); + if (_mm256_testz_si256(_mm256_or_si256(in0, in1), high_byte_mask)) { + // Pack 16-bit characters into 8-bit and store in latin1_output + const __m256i packed = _mm256_packus_epi16(in0, in1); + + const __m256i result = _mm256_permute4x64_epi64(packed, 0b11011000); + + _mm256_storeu_si256(reinterpret_cast<__m256i *>(latin1_output), result); + // Adjust pointers for the next iteration + buf += 32; + latin1_output += 32; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +template +std::pair +avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + while (end - buf >= 16) { + __m256i in = _mm256_loadu_si256(reinterpret_cast(buf)); + + if simdutf_constexpr (!match_system(big_endian)) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); + } + + __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); + if (_mm256_testz_si256(in, high_byte_mask)) { + __m128i lo = _mm256_extractf128_si256(in, 0); + __m128i hi = _mm256_extractf128_si256(in, 1); + __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo); + __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi); + _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), + latin1_packed_lo); + _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8), + latin1_packed_hi); + buf += 16; + latin1_output += 16; + } else { + // Fallback to scalar code for handling errors + for (int k = 0; k < 16; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair( + result{error_code::TOO_LARGE, (size_t)(buf - start + k)}, + latin1_output); + } + } + buf += 16; + } + } // while + return std::make_pair(result{error_code::SUCCESS, (size_t)(buf - start)}, + latin1_output); +} +/* end file src/haswell/avx2_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/haswell/avx2_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ + +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { + const char16_t *end = buf + len; + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); + const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); + } + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); + if (_mm256_testz_si256(in, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in), _mm256_extractf128_si256(in, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; + } + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m256i surrogates_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint32_t surrogates_bitmask = + static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x00000000) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, utf8_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + return std::make_pair(buf, utf8_output); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); + const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); + } + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); + if (_mm256_testz_si256(in, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in), _mm256_extractf128_si256(in, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; + } + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m256i surrogates_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint32_t surrogates_bitmask = + static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x00000000) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + utf8_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); +} +/* end file src/haswell/avx2_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/haswell/avx2_convert_utf16_to_utf32.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ + +/* + Returns a pair: the first unprocessed byte from buf and utf32_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +avx2_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *end = buf + len; + const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); + + while (end - buf >= 16) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m256i surrogates_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint32_t surrogates_bitmask = + static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x00000000) { + // case: we extend all sixteen 16-bit code units to sixteen 32-bit code + // units + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), + _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in))); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(utf32_output + 8), + _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in, 1))); + utf32_output += 16; + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + // No surrogate pair + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, utf32_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(buf, utf32_output); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); + + while (end - buf >= 16) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m256i surrogates_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint32_t surrogates_bitmask = + static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x00000000) { + // case: we extend all sixteen 16-bit code units to sixteen 32-bit code + // units + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), + _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in))); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(utf32_output + 8), + _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in, 1))); + utf32_output += 16; + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + // No surrogate pair + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + utf32_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); +} +/* end file src/haswell/avx2_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/haswell/avx2_convert_utf32_to_latin1.cpp */ +std::pair +avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const size_t rounded_len = + len & ~0x1F; // Round down to nearest multiple of 32 + + const __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); + + for (size_t i = 0; i < rounded_len; i += 4 * 8) { + __m256i a = _mm256_loadu_si256((__m256i *)(buf + 0 * 8)); + __m256i b = _mm256_loadu_si256((__m256i *)(buf + 1 * 8)); + __m256i c = _mm256_loadu_si256((__m256i *)(buf + 2 * 8)); + __m256i d = _mm256_loadu_si256((__m256i *)(buf + 3 * 8)); + + const __m256i check_combined = + _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); + + if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { + return std::make_pair(nullptr, latin1_output); + } + + b = _mm256_slli_epi32(b, 1 * 8); + c = _mm256_slli_epi32(c, 2 * 8); + d = _mm256_slli_epi32(d, 3 * 8); + + // clang-format off + + // a = [.. .. .. a7|.. .. .. a6|.. .. .. a5|.. .. .. a4||.. .. .. a3|.. .. .. a2|.. .. .. a1|.. .. .. a0] + // b = [.. .. b7 ..|.. .. b6 ..|.. .. b5 ..|.. .. b4 ..||.. .. b3 ..|.. .. b2 ..|.. .. b1 ..|.. .. b0 ..] + // c = [.. c7 .. ..|.. c6 .. ..|.. c5 .. ..|.. c4 .. ..||.. c3 .. ..|.. c2 .. ..|.. c1 .. ..|.. c0 .. ..] + // d = [d7 .. .. ..|d6 .. .. ..|d5 .. .. ..|d4 .. .. ..||d3 .. .. ..|d2 .. .. ..|d1 .. .. ..|d0 .. .. ..] + + // t0 = [d7 c7 b7 a7|d6 c6 b6 a6|d5 c5 b5 a5|d4 c4 b4 a4||d3 c3 b3 a3|d2 c2 b2 a2|d1 c1 b1 a1|d0 c0 b0 a0] + const __m256i t0 = + _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); + + // shuffle bytes within 128-bit lanes + // t1 = [d7 d6 d5 d4|c7 c6 c5 c4|b7 b6 b5 b4|a7 a6 a5 a4||d3 d2 d1 d0|c3 c2 c1 c0|b3 b2 b1 b0|a3 a2 a1 a0] + const __m256i shuffle_bytes = + _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + + const __m256i t1 = _mm256_shuffle_epi8(t0, shuffle_bytes); + + // reshuffle dwords + // t2 = [d7 d6 d5 d4|d3 d2 d1 d0|c7 c6 c5 c4|c3 c2 c1 c0||b7 b6 b5 b4|b3 b2 b1 b0|a7 a6 a5 a4|a3 a2 a1 a0] + const __m256i shuffle_dwords = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); + const __m256i t2 = _mm256_permutevar8x32_epi32(t1, shuffle_dwords); +// clang format on + + _mm256_storeu_si256((__m256i *)latin1_output, t2); + + latin1_output += 32; + buf += 32; + } + + return std::make_pair(buf, latin1_output); +} + +std::pair +avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const size_t rounded_len = + len & ~0x1F; // Round down to nearest multiple of 32 + + const char32_t *start = buf; + + const __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); + + for (size_t i = 0; i < rounded_len; i += 4 * 8) { + __m256i a = _mm256_loadu_si256((__m256i *)(buf + 0 * 8)); + __m256i b = _mm256_loadu_si256((__m256i *)(buf + 1 * 8)); + __m256i c = _mm256_loadu_si256((__m256i *)(buf + 2 * 8)); + __m256i d = _mm256_loadu_si256((__m256i *)(buf + 3 * 8)); + + const __m256i check_combined = + _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); + + if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { + // Fallback to scalar code for handling errors + for (int k = 0; k < 4 * 8; k++) { + char32_t codepoint = buf[k]; + if (codepoint <= 0xFF) { + *latin1_output++ = static_cast(codepoint); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + + b = _mm256_slli_epi32(b, 1 * 8); + c = _mm256_slli_epi32(c, 2 * 8); + d = _mm256_slli_epi32(d, 3 * 8); + + const __m256i t0 = + _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); + + const __m256i shuffle_bytes = + _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + + const __m256i t1 = _mm256_shuffle_epi8(t0, shuffle_bytes); + + const __m256i shuffle_dwords = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); + const __m256i t2 = _mm256_permutevar8x32_epi32(t1, shuffle_dwords); + + _mm256_storeu_si256((__m256i *)latin1_output, t2); + + latin1_output += 32; + buf += 32; + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/haswell/avx2_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/haswell/avx2_convert_utf32_to_utf8.cpp */ +std::pair +avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { + const char32_t *end = buf + len; + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); + const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); + const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); + const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); + __m256i running_max = _mm256_setzero_si256(); + __m256i forbidden_bytemask = _mm256_setzero_si256(); + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin); + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), + _mm256_and_si256(nextin, v_7fffffff)); + in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); + + // Try to apply UTF-16 => UTF-8 routine on 256 bits + // (haswell/avx2_convert_utf16_to_utf8.cpp) + + if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; + } + // Must check for overflow in packing + const __m256i saturation_bytemask = _mm256_cmpeq_epi32( + _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffffffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); + forbidden_bytemask = _mm256_or_si256( + forbidden_bytemask, + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800)); + + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + } else { + // case: at least one 32-bit word is larger than 0xFFFF <=> it will + // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD may require + // large, non-trivial tables? + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { // 2-byte + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { // 3-byte + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { // 4-byte + if (word > 0x10FFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + // check for invalid input + const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); + if (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi32( + _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) { + return std::make_pair(nullptr, utf8_output); + } + + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf8_output); + } + + return std::make_pair(buf, utf8_output); +} + +std::pair +avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + const char32_t *start = buf; + + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); + const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); + const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); + const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); + const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + // Check for too large input + const __m256i max_input = + _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff); + if (static_cast(_mm256_movemask_epi8( + _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + utf8_output); + } + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), + _mm256_and_si256(nextin, v_7fffffff)); + in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); + + // Try to apply UTF-16 => UTF-8 routine on 256 bits + // (haswell/avx2_convert_utf16_to_utf8.cpp) + + if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; + } + // Must check for overflow in packing + const __m256i saturation_bytemask = _mm256_cmpeq_epi32( + _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffffffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + + // Check for illegal surrogate code units + const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); + const __m256i forbidden_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800); + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != + 0x0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf8_output); + } + + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + } else { + // case: at least one 32-bit word is larger than 0xFFFF <=> it will + // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD may require + // large, non-trivial tables? + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { // 2-byte + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { // 3-byte + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { // 4-byte + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); +} +/* end file src/haswell/avx2_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/haswell/avx2_convert_utf32_to_utf16.cpp */ +template +std::pair +avx2_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *end = buf + len; + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + __m256i forbidden_bytemask = _mm256_setzero_si256(); + + const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { + const __m256i in = _mm256_loadu_si256((__m256i *)buf); + + if (simdutf_likely(_mm256_testz_si256(in, v_ffff0000))) { + // no bits set above 16th bit <=> can pack to UTF16 + // without surrogate pairs + forbidden_bytemask = _mm256_or_si256( + forbidden_bytemask, + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800)); + + __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), + _mm256_extractf128_si256(in, 1)); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); + } + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, utf16_output); + } + *utf16_output++ = + big_endian + ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, utf16_output); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (big_endian) { + high_surrogate = + uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); + low_surrogate = + uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + // check for invalid input + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf16_output); + } + + return std::make_pair(buf, utf16_output); +} + +template +std::pair +avx2_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { + const __m256i in = _mm256_loadu_si256((__m256i *)buf); + + if (simdutf_likely(_mm256_testz_si256(in, v_ffff0000))) { + // no bits set above 16th bit <=> can pack to UTF16 without surrogate + // pairs + const __m256i forbidden_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800); + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != + 0x0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf16_output); + } + + __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), + _mm256_extractf128_si256(in, 1)); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); + } + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf16_output); + } + *utf16_output++ = + big_endian + ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf16_output); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (big_endian) { + high_surrogate = + uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); + low_surrogate = + uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); +} +/* end file src/haswell/avx2_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/haswell/avx2_convert_utf8_to_latin1.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + const __m128i in = _mm_loadu_si128((__m128i *)input); + + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & + 0xfff; // we are only processing 12 bytes in case it is not all ASCII + + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); + latin1_output += 12; // We wrote 12 characters. + return 12; // We consumed 1 bytes. + } + /// We do not have a fast path available, so we fallback. + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small lookup + // table. + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + const __m128i latin1_packed = _mm_packus_epi16(composed, composed); + // writing 8 bytes even though we only care about the first 6 bytes. + // performance note: it would be faster to use _mm_storeu_si128, we should + // investigate. + _mm_storel_epi64((__m128i *)latin1_output, latin1_packed); + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/haswell/avx2_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/haswell/avx2_base64.cpp */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ + +template +simdutf_really_inline __m256i lookup_pshufb_improved(const __m256i input) { + // Precomputed shuffle masks for K = 1 to 16 + // credit: Wojciech Muła + __m256i result = _mm256_subs_epu8(input, _mm256_set1_epi8(51)); + const __m256i less = _mm256_cmpgt_epi8(_mm256_set1_epi8(26), input); + result = + _mm256_or_si256(result, _mm256_and_si256(less, _mm256_set1_epi8(13))); + __m256i shift_LUT; + if (base64_url) { + shift_LUT = _mm256_setr_epi8( + 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0, + + 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0); + } else { + shift_LUT = _mm256_setr_epi8( + 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0, + + 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); + } + + result = _mm256_shuffle_epi8(shift_LUT, result); + return _mm256_add_epi8(result, input); +} + +simdutf_really_inline __m256i insert_line_feed32(__m256i input, int K) { + + static const uint8_t low_table[16][32] = { + {0x80, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 0x80, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 0x80, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 0x80, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 0x80, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 0x80, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 0x80, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 0x80, 7, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 7, 0x80, 8, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 0x80, 9, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x80, 10, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0x80, 11, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0x80, 12, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0x80, 13, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 14, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0x80, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; + static const uint8_t high_table[16][32] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0x80, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0x80, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 0x80, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 0x80, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 0x80, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 0x80, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 0x80, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 0x80, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 0x80, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 0x80, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x80, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0x80, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0x80, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0x80, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0x80}}; + + __m256i line_feed_vector = _mm256_set1_epi8('\n'); + if (K >= 16) { + __m256i mask = _mm256_loadu_si256((const __m256i *)high_table[K - 16]); + __m256i lf_pos = + _mm256_cmpeq_epi8(mask, _mm256_set1_epi8(static_cast(0x80))); + __m256i shuffled = _mm256_shuffle_epi8(input, mask); + __m256i result = _mm256_blendv_epi8(shuffled, line_feed_vector, lf_pos); + return result; + } + // Shift input right by 1 byte + __m256i shift = _mm256_alignr_epi8( + input, _mm256_permute2x128_si256(input, input, 0x21), 15); + + input = _mm256_blend_epi32(input, shift, 0xF0); + + __m256i mask = _mm256_loadu_si256((const __m256i *)low_table[K]); + + __m256i lf_pos = + _mm256_cmpeq_epi8(mask, _mm256_set1_epi8(static_cast(0x80))); + __m256i shuffled = _mm256_shuffle_epi8(input, mask); + + __m256i result = _mm256_blendv_epi8(shuffled, line_feed_vector, lf_pos); + return result; +} + +template +size_t +avx2_encode_base64_impl(char *dst, const char *src, size_t srclen, + base64_options options, + size_t line_length = simdutf::default_line_length) { + size_t offset = 0; + + if (line_length < 4) { + line_length = 4; // We do not support line_length less than 4 + } + // credit: Wojciech Muła + const uint8_t *input = (const uint8_t *)src; + + uint8_t *out = (uint8_t *)dst; + const __m256i shuf = + _mm256_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1, + + 10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); + size_t i = 0; + for (; i + 100 <= srclen; i += 96) { + const __m128i lo0 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 0)); + const __m128i hi0 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 1)); + const __m128i lo1 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 2)); + const __m128i hi1 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 3)); + const __m128i lo2 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 4)); + const __m128i hi2 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 5)); + const __m128i lo3 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 6)); + const __m128i hi3 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 7)); + + __m256i in0 = _mm256_shuffle_epi8(_mm256_set_m128i(hi0, lo0), shuf); + __m256i in1 = _mm256_shuffle_epi8(_mm256_set_m128i(hi1, lo1), shuf); + __m256i in2 = _mm256_shuffle_epi8(_mm256_set_m128i(hi2, lo2), shuf); + __m256i in3 = _mm256_shuffle_epi8(_mm256_set_m128i(hi3, lo3), shuf); + + const __m256i t0_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x0fc0fc00)); + const __m256i t0_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x0fc0fc00)); + const __m256i t0_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x0fc0fc00)); + const __m256i t0_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x0fc0fc00)); + + const __m256i t1_0 = + _mm256_mulhi_epu16(t0_0, _mm256_set1_epi32(0x04000040)); + const __m256i t1_1 = + _mm256_mulhi_epu16(t0_1, _mm256_set1_epi32(0x04000040)); + const __m256i t1_2 = + _mm256_mulhi_epu16(t0_2, _mm256_set1_epi32(0x04000040)); + const __m256i t1_3 = + _mm256_mulhi_epu16(t0_3, _mm256_set1_epi32(0x04000040)); + + const __m256i t2_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x003f03f0)); + const __m256i t2_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x003f03f0)); + const __m256i t2_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x003f03f0)); + const __m256i t2_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x003f03f0)); + + const __m256i t3_0 = + _mm256_mullo_epi16(t2_0, _mm256_set1_epi32(0x01000010)); + const __m256i t3_1 = + _mm256_mullo_epi16(t2_1, _mm256_set1_epi32(0x01000010)); + const __m256i t3_2 = + _mm256_mullo_epi16(t2_2, _mm256_set1_epi32(0x01000010)); + const __m256i t3_3 = + _mm256_mullo_epi16(t2_3, _mm256_set1_epi32(0x01000010)); + + const __m256i input0 = _mm256_or_si256(t1_0, t3_0); + const __m256i input1 = _mm256_or_si256(t1_1, t3_1); + const __m256i input2 = _mm256_or_si256(t1_2, t3_2); + const __m256i input3 = _mm256_or_si256(t1_3, t3_3); + + if (use_lines) { + if (line_length >= 32) { // fast path + __m256i result; + result = lookup_pshufb_improved(input0); + if (offset + 32 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 32 - location_end; + // We could do this, or extract instead. + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 1), result); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(out), + insert_line_feed32(result, static_cast(location_end))); + offset = to_move; + out += 32 + 1; + } else { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), result); + offset += 32; + out += 32; + } + result = lookup_pshufb_improved(input1); + + if (offset + 32 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 32 - location_end; + + // We could do this, or extract instead. + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 1), result); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(out), + insert_line_feed32(result, static_cast(location_end))); + // see above. + // out[32] = static_cast(_mm256_extract_epi8(result, 31)); + offset = to_move; + out += 32 + 1; + } else { + + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), result); + + offset += 32; + out += 32; + } + result = lookup_pshufb_improved(input2); + + if (offset + 32 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 32 - location_end; + + // We could do this, or extract instead. + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 1), result); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(out), + insert_line_feed32(result, static_cast(location_end))); + // see above. + // out[32] = static_cast(_mm256_extract_epi8(result, 31)); + offset = to_move; + out += 32 + 1; + } else { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), result); + offset += 32; + out += 32; + } + result = lookup_pshufb_improved(input3); + + if (offset + 32 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 32 - location_end; + + // We could do this, or extract instead. + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 1), result); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(out), + insert_line_feed32(result, static_cast(location_end))); + // see above. + // out[32] = static_cast(_mm256_extract_epi8(result, 31)); + offset = to_move; + out += 32 + 1; + } else { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), result); + offset += 32; + out += 32; + } + } else { // slow path + // could be optimized + uint8_t buffer[128]; + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer), + lookup_pshufb_improved(input0)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + 32), + lookup_pshufb_improved(input1)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + 64), + lookup_pshufb_improved(input2)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + 96), + lookup_pshufb_improved(input3)); + size_t out_pos = 0; + size_t local_offset = offset; + for (size_t j = 0; j < 128;) { + if (local_offset == line_length) { + out[out_pos++] = '\n'; + local_offset = 0; + } + out[out_pos++] = buffer[j++]; + local_offset++; + } + offset = local_offset; + out += out_pos; + } + } else { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), + lookup_pshufb_improved(input0)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 32), + lookup_pshufb_improved(input1)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 64), + lookup_pshufb_improved(input2)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 96), + lookup_pshufb_improved(input3)); + + out += 128; + } + } + for (; i + 28 <= srclen; i += 24) { + // lo = [xxxx|DDDC|CCBB|BAAA] + // hi = [xxxx|HHHG|GGFF|FEEE] + const __m128i lo = + _mm_loadu_si128(reinterpret_cast(input + i)); + const __m128i hi = + _mm_loadu_si128(reinterpret_cast(input + i + 4 * 3)); + + // bytes from groups A, B and C are needed in separate 32-bit lanes + // in = [0HHH|0GGG|0FFF|0EEE[0DDD|0CCC|0BBB|0AAA] + __m256i in = _mm256_shuffle_epi8(_mm256_set_m128i(hi, lo), shuf); + + // this part is well commented in encode.sse.cpp + + const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); + const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); + const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); + const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); + const __m256i indices = _mm256_or_si256(t1, t3); + + if (use_lines) { + if (line_length >= 32) { // fast path + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), + lookup_pshufb_improved(indices)); + + if (offset + 32 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 32 - location_end; + std::memmove(out + location_end + 1, out + location_end, to_move); + out[location_end] = '\n'; + offset = to_move; + out += 32 + 1; + } else { + offset += 32; + out += 32; + } + } else { // slow path + // could be optimized + alignas(32) uint8_t buffer[32]; + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer), + lookup_pshufb_improved(indices)); + std::memcpy(out, buffer, 32); + size_t out_pos = 0; + size_t local_offset = offset; + for (size_t j = 0; j < 32;) { + if (local_offset == line_length) { + out[out_pos++] = '\n'; + local_offset = 0; + } + out[out_pos++] = buffer[j++]; + local_offset++; + } + offset = local_offset; + out += out_pos; + } + } else { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), + lookup_pshufb_improved(indices)); + + out += 32; + } + } + return ((char *)out - (char *)dst) + + scalar::base64::tail_encode_base64_impl( + (char *)out, src + i, srclen - i, options, line_length, offset); +} + +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + return avx2_encode_base64_impl(dst, src, srclen, options); +} + +static inline void compress(__m128i data, uint16_t mask, char *output) { + if (mask == 0) { + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); + return; + } + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + + __m128i shufmask = _mm_set_epi64x(tables::base64::thintable_epi8[mask2], + tables::base64::thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(data, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = _mm_loadu_si128(reinterpret_cast( + tables::base64::pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); +} + +// --- decoding ----------------------------------------------- + +template +simdutf_really_inline void compress(__m256i data, uint32_t mask, char *output) { + if (mask == 0) { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), data); + return; + } + compress(_mm256_castsi256_si128(data), uint16_t(mask), output); + compress(_mm256_extracti128_si256(data, 1), uint16_t(mask >> 16), + output + count_ones(~mask & 0xFFFF)); +} + +template +simdutf_really_inline void base64_decode(char *out, __m256i str) { + // credit: aqrit + const __m256i pack_shuffle = + _mm256_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); + const __m256i t0 = _mm256_maddubs_epi16(str, _mm256_set1_epi32(0x01400140)); + const __m256i t1 = _mm256_madd_epi16(t0, _mm256_set1_epi32(0x00011000)); + const __m256i t2 = _mm256_shuffle_epi8(t1, pack_shuffle); + + // Store the output: + _mm_storeu_si128((__m128i *)out, _mm256_castsi256_si128(t2)); + _mm_storeu_si128((__m128i *)(out + 12), _mm256_extracti128_si256(t2, 1)); +} + +template +simdutf_really_inline void base64_decode_block(char *out, const char *src) { + base64_decode(out, + _mm256_loadu_si256(reinterpret_cast(src))); + base64_decode(out + 24, _mm256_loadu_si256( + reinterpret_cast(src + 32))); +} + +template +simdutf_really_inline void base64_decode_block_safe(char *out, + const char *src) { + base64_decode(out, + _mm256_loadu_si256(reinterpret_cast(src))); + alignas(32) char buffer[32]; // We enforce safety with a buffer. + base64_decode( + buffer, _mm256_loadu_si256(reinterpret_cast(src + 32))); + std::memcpy(out + 24, buffer, 24); +} + +// --- decoding - base64 class -------------------------------- + +class block64 { + __m256i chunks[2]; + +public: + // The caller of this function is responsible to ensure that there are 64 + // bytes available from reading at src. + simdutf_really_inline block64(const char *src) { + chunks[0] = _mm256_loadu_si256(reinterpret_cast(src)); + chunks[1] = _mm256_loadu_si256(reinterpret_cast(src + 32)); + } + + // The caller of this function is responsible to ensure that there are 128 + // bytes available from reading at src. + simdutf_really_inline block64(const char16_t *src) { + const auto m1 = _mm256_loadu_si256(reinterpret_cast(src)); + const auto m2 = + _mm256_loadu_si256(reinterpret_cast(src + 16)); + const auto m3 = + _mm256_loadu_si256(reinterpret_cast(src + 32)); + const auto m4 = + _mm256_loadu_si256(reinterpret_cast(src + 48)); + + const auto m1p = _mm256_permute2x128_si256(m1, m2, 0x20); + const auto m2p = _mm256_permute2x128_si256(m1, m2, 0x31); + const auto m3p = _mm256_permute2x128_si256(m3, m4, 0x20); + const auto m4p = _mm256_permute2x128_si256(m3, m4, 0x31); + + chunks[0] = _mm256_packus_epi16(m1p, m2p); + chunks[1] = _mm256_packus_epi16(m3p, m4p); + } + + simdutf_really_inline void copy_block(char *output) { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), chunks[0]); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + 32), chunks[1]); + } + + // decode 64 bytes and output 48 bytes + simdutf_really_inline void base64_decode_block(char *out) { + base64_decode(out, chunks[0]); + base64_decode(out + 24, chunks[1]); + } + + simdutf_really_inline void base64_decode_block_safe(char *out) { + base64_decode(out, chunks[0]); + alignas(32) char buffer[32]; // We enforce safety with a buffer. + base64_decode(buffer, chunks[1]); + std::memcpy(out + 24, buffer, 24); + } + + template + simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { + uint32_t err0 = 0; + uint32_t err1 = 0; + uint64_t m0 = to_base64_mask( + &chunks[0], &err0); + uint64_t m1 = to_base64_mask( + &chunks[1], &err1); + if (!ignore_garbage) { + *error = err0 | ((uint64_t)err1 << 32); + } + return m0 | (m1 << 32); + } + + template + simdutf_really_inline uint32_t to_base64_mask(__m256i *src, uint32_t *error) { + const __m256i ascii_space_tbl = + _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, + 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0); + // credit: aqrit + __m256i delta_asso; + if (default_or_url) { + delta_asso = _mm256_setr_epi8( + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x11, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16); + } else if (base64_url) { + delta_asso = _mm256_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, + 0x0, 0x0, 0xF, 0x0, 0xF); + } else { + delta_asso = _mm256_setr_epi8( + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0F, 0x00, 0x0F, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); + } + + __m256i delta_values; + if (default_or_url) { + delta_values = _mm256_setr_epi8( + uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0x13), + uint8_t(0x04), uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), + uint8_t(0xB9), uint8_t(0x00), uint8_t(0xFF), uint8_t(0x11), + uint8_t(0xFF), uint8_t(0xBF), uint8_t(0x10), uint8_t(0xB9), + uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0x13), + uint8_t(0x04), uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), + uint8_t(0xB9), uint8_t(0x00), uint8_t(0xFF), uint8_t(0x11), + uint8_t(0xFF), uint8_t(0xBF), uint8_t(0x10), uint8_t(0xB9)); + } else if (base64_url) { + delta_values = _mm256_setr_epi8( + 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), + uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), uint8_t(0xE0), + uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), + uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), + uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); + } else { + delta_values = _mm256_setr_epi8( + int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), int8_t(0x04), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), int8_t(0x00), + int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), + int8_t(0x00), int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), + int8_t(0xB9), int8_t(0xB9)); + } + + __m256i check_asso; + if (default_or_url) { + check_asso = _mm256_setr_epi8( + 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03, + 0x07, 0x0B, 0x0E, 0x0B, 0x06, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0E, 0x0B, 0x06); + } else if (base64_url) { + check_asso = _mm256_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6, 0xD, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x3, + 0x7, 0xB, 0xE, 0xB, 0x6); + } else { + check_asso = _mm256_setr_epi8( + 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03, + 0x07, 0x0B, 0x0B, 0x0B, 0x0F, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); + } + __m256i check_values; + if (default_or_url) { + check_values = _mm256_setr_epi8( + uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xD5), uint8_t(0xA6), + uint8_t(0xB5), uint8_t(0xA1), uint8_t(0x00), uint8_t(0x80), + uint8_t(0x00), uint8_t(0x80), uint8_t(0x00), uint8_t(0x80), + uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xD5), uint8_t(0xA6), + uint8_t(0xB5), uint8_t(0xA1), uint8_t(0x00), uint8_t(0x80), + uint8_t(0x00), uint8_t(0x80), uint8_t(0x00), uint8_t(0x80)); + } else if (base64_url) { + check_values = _mm256_setr_epi8( + uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), uint8_t(0xA6), + uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, uint8_t(0x80), + 0x0, uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), + uint8_t(0xA6), uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, + uint8_t(0x80), 0x0, uint8_t(0x80)); + } else { + check_values = _mm256_setr_epi8( + int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0xCF), + int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), int8_t(0x86), + int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), int8_t(0x91), + int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), + int8_t(0x86), int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), + int8_t(0x91), int8_t(0x80)); + } + const __m256i shifted = _mm256_srli_epi32(*src, 3); + __m256i delta_hash = + _mm256_avg_epu8(_mm256_shuffle_epi8(delta_asso, *src), shifted); + if (default_or_url) { + delta_hash = _mm256_and_si256(delta_hash, _mm256_set1_epi8(0xf)); + } + const __m256i check_hash = + _mm256_avg_epu8(_mm256_shuffle_epi8(check_asso, *src), shifted); + const __m256i out = + _mm256_adds_epi8(_mm256_shuffle_epi8(delta_values, delta_hash), *src); + const __m256i chk = + _mm256_adds_epi8(_mm256_shuffle_epi8(check_values, check_hash), *src); + const int mask = _mm256_movemask_epi8(chk); + if (!ignore_garbage && mask) { + __m256i ascii_space = + _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src); + *error = (mask ^ _mm256_movemask_epi8(ascii_space)); + } + *src = out; + return (uint32_t)mask; + } + + simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { + if (is_power_of_two(mask)) { + return compress_block_single(mask, output); + } + + uint64_t nmask = ~mask; + compress(chunks[0], uint32_t(mask), output); + compress(chunks[1], uint32_t(mask >> 32), + output + count_ones(nmask & 0xFFFFFFFF)); + return count_ones(nmask); + } + + simdutf_really_inline size_t compress_block_single(uint64_t mask, + char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + switch (pos64 >> 4) { + case 0b00: { + const __m128i lane0 = _mm256_extracti128_si256(chunks[0], 0); + const __m128i lane1 = _mm256_extracti128_si256(chunks[0], 1); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(lane0, sh); + + _mm_storeu_si128((__m128i *)(output + 0 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 1 * 16 - 1), lane1); + _mm256_storeu_si256((__m256i *)(output + 2 * 16 - 1), chunks[1]); + } break; + case 0b01: { + const __m128i lane0 = _mm256_extracti128_si256(chunks[0], 0); + const __m128i lane1 = _mm256_extracti128_si256(chunks[0], 1); + _mm_storeu_si128((__m128i *)(output + 0 * 16), lane0); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(lane1, sh); + + _mm_storeu_si128((__m128i *)(output + 1 * 16), compressed); + _mm256_storeu_si256((__m256i *)(output + 2 * 16 - 1), chunks[1]); + } break; + case 0b10: { + const __m128i lane2 = _mm256_extracti128_si256(chunks[1], 0); + const __m128i lane3 = _mm256_extracti128_si256(chunks[1], 1); + + _mm256_storeu_si256((__m256i *)(output + 0 * 16), chunks[0]); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(lane2, sh); + + _mm_storeu_si128((__m128i *)(output + 2 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), lane3); + } break; + case 0b11: { + const __m128i lane2 = _mm256_extracti128_si256(chunks[1], 0); + const __m128i lane3 = _mm256_extracti128_si256(chunks[1], 1); + + _mm256_storeu_si256((__m256i *)(output + 0 * 16), chunks[0]); + _mm_storeu_si128((__m128i *)(output + 2 * 16), lane2); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(lane3, sh); + + _mm_storeu_si128((__m128i *)(output + 3 * 16), compressed); + } break; + } + + return 63; + } +}; +/* end file src/haswell/avx2_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 + +} // unnamed namespace +} // namespace haswell +} // namespace simdutf + +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace haswell { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); + +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} + +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; +} + +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_validation { + +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); +} + +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; + } + + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + } + } + + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_validation { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); +} + +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); +} + +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); +} + +} // namespace utf8_validation +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace ascii_validation { + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); + + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + return false; + } + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + return in.is_ascii(); +} + +} // namespace ascii_validation +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_utf16 { + +using namespace simd; + +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_utf16 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } + + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 2; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + + size_t iterations = 0; + size_t pos = 0; + size_t count = 0; + for (; pos + N <= size; pos += N) { + const auto input = + vector_i8::load(reinterpret_cast(in + pos)); + + const auto continuation = input > int8_t(-65); + const auto utf_4bytes = vector_u8(input.value) >= uint8_t(240); + + local -= vector_u8(continuation); + local -= vector_u8(utf_4bytes); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + // transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_utf32 { + +using namespace simd; + +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_utf32 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +/* begin file src/generic/utf32.h */ +#include + +namespace simdutf { +namespace haswell { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, + size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + const auto v_0000007f = vector_u32::splat(0x0000007f); + const auto v_000007ff = vector_u32::splat(0x000007ff); + const auto v_0000ffff = vector_u32::splat(0x0000ffff); +#else + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + const auto one = vector_u32::splat(1); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use vector of uint32 counters, this is why this limit is used + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in0 > v_0000007f); + acc -= as_vector_u32(in1 > v_0000007f); + acc -= as_vector_u32(in2 > v_0000007f); + acc -= as_vector_u32(in3 > v_0000007f); + + acc -= as_vector_u32(in0 > v_000007ff); + acc -= as_vector_u32(in1 > v_000007ff); + acc -= as_vector_u32(in2 > v_000007ff); + acc -= as_vector_u32(in3 > v_000007ff); + + acc -= as_vector_u32(in0 > v_0000ffff); + acc -= as_vector_u32(in1 > v_0000ffff); + acc -= as_vector_u32(in2 > v_0000ffff); + acc -= as_vector_u32(in3 > v_0000ffff); +#else + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in > v_0000007f); + acc -= as_vector_u32(in > v_000007ff); + acc -= as_vector_u32(in > v_0000ffff); +#else + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +// other functions +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf16 { + +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; + } + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} + +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); + + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; + } + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} + +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; + + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } + + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf16.h */ +/* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf16 { + +using namespace simd; + +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline result +utf8_length_from_utf16_with_replacement(const char16_t *in, size_t size) { + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + if (N + 1 > size) { + return scalar::utf16::utf8_length_from_utf16_with_replacement( + in, size); + } // special case for short inputs + size_t pos = 0; + bool any_surrogates = false; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + auto v_mismatched_count = vector_u16::zero(); + + size_t count = 0; + size_t mismatched_count = 0; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + if (scalar::utf16::is_low_surrogate(in[0])) { + any_surrogates = true; + mismatched_count += 1; + } + + for (; pos < (size - 1) / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + if (is_surrogate.to_bitmask() != 0 || + scalar::utf16::is_low_surrogate(in[pos + N])) { + any_surrogates = true; + auto input_next = + vector_u16::load(reinterpret_cast(in + pos + 1)); + if simdutf_constexpr (!match_system(big_endian)) { + input_next = input_next.swap_bytes(); + } + + const auto lb_masked = input & (0xfc00); + const auto block_masked = input_next & (0xfc00); + + const auto lb_is_high = lb_masked == (0xd800); + const auto block_is_low = block_masked == (0xdc00); + + const auto illseq = min(vector_u16(lb_is_high ^ block_is_low), one); + + v_mismatched_count += illseq; + } + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + mismatched_count += v_mismatched_count.sum(); + v_mismatched_count = vector_u16::zero(); + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + mismatched_count += v_mismatched_count.sum(); + } + + if (scalar::utf16::is_low_surrogate(in[pos])) { + any_surrogates = true; + if (!scalar::utf16::is_high_surrogate(in[pos - 1])) { + mismatched_count -= 1; + count += 2; + pos += 1; + } + } + count += pos; + count += mismatched_count; + if (scalar::utf16::is_high_surrogate(in[pos - 1])) { + any_surrogates = true; + if (pos == size) { + count += 2; + } else if (scalar::utf16::is_low_surrogate(in[pos])) { + pos += 1; + count += 2; + } + } + result scalar_result = + scalar::utf16::utf8_length_from_utf16_with_replacement( + in + pos, size - pos); + return {any_surrogates ? SURROGATE : scalar_result.error, + count + scalar_result.count}; +} + +} // namespace utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +template +const result validate_utf16_as_ascii_with_errors(const char16_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + size_t pos = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input_vec( + reinterpret_cast(input + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input_vec.swap_bytes(); + } + uint64_t matches = input_vec.lteq(uint16_t(0x7f)); + if (~matches) { + // Found a match, return the first one + int index = trailing_zeroes(~matches) / 2; + return result(error_code::TOO_LARGE, pos + index); + } + } + + // Scalar tail + while (pos < size) { + + char16_t v = scalar::utf16::swap_if_needed(input[pos]); + if (v > 0x7F) { + return result(error_code::TOO_LARGE, pos); + } + pos++; + } + return result(error_code::SUCCESS, size); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + // transcoding from UTF-8 to Latin 1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, + + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace haswell +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf32.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf32 { + +simdutf_really_inline bool validate(const char32_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return true; + } + + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + auto currentmax = vector_u32::zero(); + auto currentoffsetmax = vector_u32::zero(); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if simdutf_constexpr (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + currentmax = max(currentmax, in); + currentoffsetmax = max(currentoffsetmax, in + offset); + input += N; + } + + const auto too_large = currentmax > standardmax; + if (too_large.any()) { + return false; + } + + const auto surrogate = currentoffsetmax > standardoffsetmax; + if (surrogate.any()) { + return false; + } + + return scalar::utf32::validate(input, end - input); +} + +simdutf_really_inline result validate_with_errors(const char32_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return result(error_code::SUCCESS, 0); + } + + const char32_t *start = input; + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff + 1); + const auto surrogate_mask = vector_u32::splat(0xfffff800); + const auto surrogate_byte = vector_u32::splat(0x0000d800); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if simdutf_constexpr (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + const auto too_large = in >= standardmax; + const auto surrogate = (in & surrogate_mask) == surrogate_byte; + + const auto combined = too_large | surrogate; + if (simdutf_unlikely(combined.any())) { + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; + } + + input += N; + } + + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; +} + +} // namespace utf32 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/validate_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/generic/base64.h */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ +namespace simdutf { +namespace haswell { +namespace { +namespace base64 { + +/* + The following template function implements API for Base64 decoding. + + An implementation is responsible for providing the `block64` type and + associated methods that perform actual conversion. Please refer + to any vectorized implementation to learn the API of these procedures. +*/ +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = + default_or_url ? tables::base64::to_base64_default_or_url_value + : (base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + auto ri = simdutf::scalar::base64::find_end(src, srclen, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + srclen = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + char *end_of_safe_64byte_zone = + dst == nullptr + ? nullptr + : ((srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 + : dst); + + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; + + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b(src); + src += 64; + uint64_t error = 0; + const uint64_t badcharmask = + b.to_base64_mask(&error); + if (!ignore_garbage && error) { + src -= 64; + const size_t error_offset = trailing_zeroes(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + bufferptr += b.compress_block(badcharmask, bufferptr); + } else if (bufferptr != buffer) { + b.copy_block(bufferptr); + bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + b.base64_decode_block_safe(dst); + } else { + b.base64_decode_block(dst); + } + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 2); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!ignore_garbage && + (!scalar::base64::is_eight_byte(*src) || val > 64)) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result( + r, size_t(src - srcinit), size_t(dst - dstinit), equallocation, + full_input_length, last_chunk_options); + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(srcinit + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(srcinit + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; + } + if (!ignore_garbage && equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} + +} // namespace base64 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/base64.h */ +/* begin file src/generic/find.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace util { + +simdutf_really_inline const char *find(const char *start, const char *end, + char character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + // Align the start pointer to 64 bytes + uintptr_t misalignment = reinterpret_cast(start) % 64; + if (misalignment != 0) { + size_t adjustment = 64 - misalignment; + if (size_t(std::distance(start, end)) < adjustment) { + adjustment = std::distance(start, end); + } + for (size_t i = 0; i < adjustment; i++) { + if (start[i] == character) { + return start + i; + } + } + start += adjustment; + } + + // Main loop for 64-byte aligned data + for (; std::distance(start, end) >= 64; start += 64) { + simd8x64 input(reinterpret_cast(start)); + uint64_t matches = input.eq(uint8_t(character)); + if (matches != 0) { + // Found a match, return the first one + int index = trailing_zeroes(matches); + return start + index; + } + } + return std::find(start, end, character); +} + +simdutf_really_inline const char16_t * +find(const char16_t *start, const char16_t *end, char16_t character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + // Align the start pointer to 64 bytes if misalignment is even + uintptr_t misalignment = reinterpret_cast(start) % 64; + if (misalignment != 0 && misalignment % 2 == 0) { + size_t adjustment = (64 - misalignment) / sizeof(char16_t); + if (size_t(std::distance(start, end)) < adjustment) { + adjustment = std::distance(start, end); + } + for (size_t i = 0; i < adjustment; i++) { + if (start[i] == character) { + return start + i; + } + } + start += adjustment; + } + + // Main loop for 64-byte aligned data + for (; std::distance(start, end) >= 32; start += 32) { + simd16x32 input(reinterpret_cast(start)); + uint64_t matches = input.eq(uint16_t(character)); + if (matches != 0) { + // Found a match, return the first one + int index = trailing_zeroes(matches) / 2; + return start + index; + } + } + return std::find(start, end, character); +} + +} // namespace util +} // namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/find.h */ +#endif // SIMDUTF_FEATURE_BASE64 + +namespace simdutf { +namespace haswell { + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + + int out = 0; + uint32_t utf16_err = (length % 2); + uint32_t utf32_err = (length % 4); + uint32_t ends_with_high = 0; + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + const __m256i standardmax = _mm256_set1_epi32(0x10ffff); + const __m256i offset = _mm256_set1_epi32(0xffff2000); + const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); + __m256i currentmax = _mm256_setzero_si256(); + __m256i currentoffsetmax = _mm256_setzero_si256(); + + utf8_checker c{}; + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + // utf8 checks + c.check_next_input(in); + + // utf16le checks + auto in0 = simd16(in.chunks[0]); + auto in1 = simd16(in.chunks[1]); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const auto in2 = simd16::pack(t0, t1); + const auto surrogates_wordmask = (in2 & v_f8) == v_d8; + const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); + const auto vL = (in2 & v_fc) == v_dc; + const uint32_t L = vL.to_bitmask(); + const uint32_t H = L ^ surrogates_bitmask; + utf16_err |= (((H << 1) | ends_with_high) != L); + ends_with_high = (H & 0x80000000) != 0; + + // utf32le checks + currentmax = _mm256_max_epu32(in.chunks[0], currentmax); + currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[0], offset), + currentoffsetmax); + currentmax = _mm256_max_epu32(in.chunks[1], currentmax); + currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[1], offset), + currentoffsetmax); + + reader.advance(); + } + + uint8_t block[64]{}; + size_t idx = reader.block_index(); + std::memcpy(block, &input[idx], length - idx); + simd::simd8x64 in(block); + c.check_next_input(in); + + // utf16le last block check + auto in0 = simd16(in.chunks[0]); + auto in1 = simd16(in.chunks[1]); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const auto in2 = simd16::pack(t0, t1); + const auto surrogates_wordmask = (in2 & v_f8) == v_d8; + const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); + const auto vL = (in2 & v_fc) == v_dc; + const uint32_t L = vL.to_bitmask(); + const uint32_t H = L ^ surrogates_bitmask; + utf16_err |= (((H << 1) | ends_with_high) != L); + // this is required to check for last byte ending in high and end of input + // is reached + ends_with_high = (H & 0x80000000) != 0; + utf16_err |= ends_with_high; + + // utf32le last block check + currentmax = _mm256_max_epu32(in.chunks[0], currentmax); + currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[0], offset), + currentoffsetmax); + currentmax = _mm256_max_epu32(in.chunks[1], currentmax); + currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[1], offset), + currentoffsetmax); + + reader.advance(); + + c.check_eof(); + bool is_valid_utf8 = !c.errors(); + __m256i is_zero = + _mm256_xor_si256(_mm256_max_epu32(currentmax, standardmax), standardmax); + utf32_err |= (_mm256_testz_si256(is_zero, is_zero) == 0); + + is_zero = _mm256_xor_si256( + _mm256_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); + utf32_err |= (_mm256_testz_si256(is_zero, is_zero) == 0); + if (is_valid_utf8) { + out |= encoding_type::UTF8; + } + if (utf16_err == 0) { + out |= encoding_type::UTF16_LE; + } + if (utf32_err == 0) { + out |= encoding_type::UTF32_LE; + } + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return haswell::utf8_validation::generic_validate_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return haswell::utf8_validation::generic_validate_utf8_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return haswell::ascii_validation::generic_validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return haswell::ascii_validation::generic_validate_ascii_with_errors(buf, + len); +} +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return haswell::utf16::validate_utf16_as_ascii_with_errors< + endianness::LITTLE>(buf, len) + .error == SUCCESS; +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return haswell::utf16::validate_utf16_as_ascii_with_errors( + buf, len) + .error == SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid UTF-16. protect the implementation from + // handling nullptr + return true; + } + const auto res = + haswell::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { + return false; + } + + if (res.count == len) { + return true; + } + + return scalar::utf16::validate(buf + res.count, + len - res.count); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid UTF-16. protect the implementation from + // handling nullptr + return true; + } + const auto res = + haswell::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { + return false; + } + + if (res.count == len) { + return true; + } + + return scalar::utf16::validate(buf + res.count, + len - res.count); +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + + const result res = + haswell::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + const result scalar_res = + scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + const result res = + haswell::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + const result scalar_res = + scalar::utf16::validate_with_errors(buf + res.count, + len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16fix_avx(input, len, output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16fix_avx(input, len, output); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + return utf32::validate(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + return utf32::validate_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + avx2_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; + + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx2_convert_latin1_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; + } + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx2_convert_latin1_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; + } + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + avx2_convert_latin1_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; + } + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *input, size_t size, char *latin1_output) const noexcept { + return utf8_to_latin1::convert_valid(input, size, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_latin1(buf, len, + latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_latin1(buf, len, + latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + avx2_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + avx2_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function + return convert_utf16le_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_utf8(buf, len, + utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_utf8(buf, len, + utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf16_to_utf8_with_errors( + buf, len, utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf16_to_utf8_with_errors( + buf, len, utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + avx2_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + avx2_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + avx2_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return convert_utf32_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_utf32(buf, len, + utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_utf32(buf, len, + utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf16_to_utf32_with_errors( + buf, len, utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf16_to_utf32_with_errors( + buf, len, utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf32_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx2_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx2_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf32_to_utf16_with_errors( + buf, len, utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf32_to_utf16_with_errors( + buf, len, utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *in, size_t size) const noexcept { + return utf8::count_code_points_bytemask(in, size); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_bytemask(input, + length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_bytemask(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8_bytemask(input, length); +} +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_with_replacement( + input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_with_replacement( + input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t len) const noexcept { + const uint8_t *data = reinterpret_cast(input); + size_t answer = len / sizeof(__m256i) * sizeof(__m256i); + size_t i = 0; + if (answer >= 2048) { // long strings optimization + __m256i four_64bits = _mm256_setzero_si256(); + while (i + sizeof(__m256i) <= len) { + __m256i runner = _mm256_setzero_si256(); + // We can do up to 255 loops without overflow. + size_t iterations = (len - i) / sizeof(__m256i); + if (iterations > 255) { + iterations = 255; + } + size_t max_i = i + iterations * sizeof(__m256i) - sizeof(__m256i); + for (; i + 4 * sizeof(__m256i) <= max_i; i += 4 * sizeof(__m256i)) { + __m256i input1 = _mm256_loadu_si256((const __m256i *)(data + i)); + __m256i input2 = + _mm256_loadu_si256((const __m256i *)(data + i + sizeof(__m256i))); + __m256i input3 = _mm256_loadu_si256( + (const __m256i *)(data + i + 2 * sizeof(__m256i))); + __m256i input4 = _mm256_loadu_si256( + (const __m256i *)(data + i + 3 * sizeof(__m256i))); + __m256i input12 = + _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input1), + _mm256_cmpgt_epi8(_mm256_setzero_si256(), input2)); + __m256i input23 = + _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input3), + _mm256_cmpgt_epi8(_mm256_setzero_si256(), input4)); + __m256i input1234 = _mm256_add_epi8(input12, input23); + runner = _mm256_sub_epi8(runner, input1234); + } + for (; i <= max_i; i += sizeof(__m256i)) { + __m256i input_256_chunk = + _mm256_loadu_si256((const __m256i *)(data + i)); + runner = _mm256_sub_epi8( + runner, _mm256_cmpgt_epi8(_mm256_setzero_si256(), input_256_chunk)); + } + four_64bits = _mm256_add_epi64( + four_64bits, _mm256_sad_epu8(runner, _mm256_setzero_si256())); + } + answer += _mm256_extract_epi64(four_64bits, 0) + + _mm256_extract_epi64(four_64bits, 1) + + _mm256_extract_epi64(four_64bits, 2) + + _mm256_extract_epi64(four_64bits, 3); + } else if (answer > 0) { + for (; i + sizeof(__m256i) <= len; i += sizeof(__m256i)) { + __m256i latin = _mm256_loadu_si256((const __m256i *)(data + i)); + uint32_t non_ascii = _mm256_movemask_epi8(latin); + answer += count_ones(non_ascii); + } + } + return answer + scalar::latin1::utf8_length_from_latin1( + reinterpret_cast(data + i), len - i); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return utf32::utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const __m256i v_00000000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + size_t pos = 0; + size_t count = 0; + for (; pos + 8 <= length; pos += 8) { + __m256i in = _mm256_loadu_si256((__m256i *)(input + pos)); + const __m256i surrogate_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + const uint32_t surrogate_bitmask = + static_cast(_mm256_movemask_epi8(surrogate_bytemask)); + size_t surrogate_count = (32 - count_ones(surrogate_bitmask)) / 4; + count += 8 + surrogate_count; + } + return count + + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + if (options & base64_url) { + return avx2_encode_base64_impl(output, input, length, options, + line_length); + } else { + return avx2_encode_base64_impl(output, input, length, options, + line_length); + } +} + +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + return util::find(start, end, character); +} + +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + return util::find(start, end, character); +} +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace haswell +} // namespace simdutf + +/* begin file src/simdutf/haswell/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +#undef SIMDUTF_SIMD_HAS_BYTEMASK + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/haswell/end.h */ +/* end file src/haswell/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_PPC64 +/* begin file src/ppc64/implementation.cpp */ +/* begin file src/simdutf/ppc64/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "ppc64" +// #define SIMDUTF_IMPLEMENTATION ppc64 +/* end file src/simdutf/ppc64/begin.h */ + +/* begin file src/ppc64/ppc64_utf16_to_utf8_tables.h */ +// Code generated automatically; DO NOT EDIT +// file generated by scripts/ppc64_convert_utf16_to_utf8.py +#ifndef PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H +#define PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H + +namespace simdutf { +namespace { +namespace tables { +namespace ppc64_utf16_to_utf8 { + +#if SIMDUTF_IS_BIG_ENDIAN +// 1 byte for length, 16 bytes for mask +const uint8_t pack_1_2_3_utf8_bytes[256][17] = { + {12, 1, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80}, + {9, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 17, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 1, 0, 16, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {11, 1, 0, 16, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 16, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 17, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 0, 16, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 0, 16, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 1, 0, 16, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {11, 1, 0, 16, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 16, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 17, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 1, 0, 16, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 0, 16, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 0, 16, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 1, 0, 16, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 1, 0, 16, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 1, 0, 16, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 1, 0, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {2, 0, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {5, 1, 0, 16, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 0, 16, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 1, 0, 16, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 0, 16, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 17, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 1, 0, 16, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 0, 16, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 1, 0, 16, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 1, 0, 16, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 1, 0, 16, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 1, 0, 16, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 0, 16, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 17, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {6, 1, 0, 16, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 0, 16, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 0, 16, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {11, 1, 0, 16, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 16, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 17, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 1, 0, 16, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 0, 16, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 1, 0, 16, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 0, 16, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 1, 0, 16, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 1, 0, 16, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {10, 1, 0, 16, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 1, 0, 16, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 1, 0, 16, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 1, 0, 16, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 16, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 17, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 1, 0, 16, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 1, 0, 16, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 1, 0, 16, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 1, 0, 16, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 0, 16, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 17, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {6, 1, 0, 16, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 0, 16, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 0, 16, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {9, 1, 0, 16, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 16, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 17, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 1, 0, 16, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 1, 0, 16, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 0, 16, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 0, 16, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 16, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 17, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 1, 0, 16, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 0, 16, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 17, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 1, 0, 16, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 0, 16, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 17, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 1, 0, 16, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 16, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 17, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, +}; +#else +// 1 byte for length, 16 bytes for mask +const uint8_t pack_1_2_3_utf8_bytes[256][17] = { + {12, 0, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80}, + {9, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 16, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 1, 17, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {11, 0, 1, 17, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 17, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 16, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 1, 17, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 1, 17, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 1, 17, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {11, 0, 1, 17, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 17, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 16, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 0, 1, 17, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 1, 17, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 0, 1, 17, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 1, 17, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 1, 17, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 0, 1, 17, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 0, 1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {2, 1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {5, 0, 1, 17, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 1, 17, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 0, 1, 17, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 1, 17, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 16, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 1, 17, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 1, 17, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 0, 1, 17, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 0, 1, 17, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 0, 1, 17, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 1, 17, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 1, 17, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 16, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {6, 0, 1, 17, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 1, 17, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 1, 17, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {11, 0, 1, 17, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 1, 17, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 16, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 0, 1, 17, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 0, 1, 17, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 1, 17, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 1, 17, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 0, 1, 17, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 0, 1, 17, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {10, 0, 1, 17, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 1, 17, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 1, 17, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 0, 1, 17, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 1, 17, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 16, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 1, 17, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 0, 1, 17, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 1, 17, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 1, 17, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 1, 17, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 16, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {6, 0, 1, 17, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 0, 1, 17, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 1, 17, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {9, 0, 1, 17, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 1, 17, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 16, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 1, 17, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 0, 1, 17, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 0, 1, 17, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 0, 1, 17, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 1, 17, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 16, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 1, 17, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 1, 17, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 16, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 0, 1, 17, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 1, 17, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 16, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 0, 1, 17, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 1, 17, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 16, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, +}; +#endif // SIMDUTF_IS_BIG_ENDIAN +} // namespace ppc64_utf16_to_utf8 +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H +/* end file src/ppc64/ppc64_utf16_to_utf8_tables.h */ + +namespace simdutf { +namespace ppc64 { +namespace { +#ifndef SIMDUTF_PPC64_H + #error "ppc64.h must be included" +#endif +using namespace simd; + +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + // careful: 0x80 is not ascii. + return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); +} + +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = + prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = + prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 + // Caller requires a bool (all 1's). All values resulting from the subtraction + // will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte); +} + +/// ErrorReporting describes behaviour of a vectorized procedure regarding error +/// checking +enum class ErrorReporting { + precise, // the procedure will report *approximate* or *precise* error + // position + at_the_end, // the procedure will only inform about an error after scanning + // the whole input (or its significant portion) + none, // no error checking is done, we assume valid inputs +}; + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/ppc64/ppc64_validate_utf16.cpp */ +template +simd8 utf16_gather_high_bytes(const simd16 in0, + const simd16 in1) { + if (big_endian) { + const vec_u8_t pack_high = { + 0, 2, 4, 6, 8, 10, 12, 14, // in0 + 16, 18, 20, 22, 24, 26, 28, 30 // in1 + }; + + return vec_perm(vec_u8_t(in0.value), vec_u8_t(in1.value), pack_high); + } else { + const vec_u8_t pack_high = { + 1, 3, 5, 7, 9, 11, 13, 15, // in0 + 17, 19, 21, 23, 25, 27, 29, 31 // in1 + }; + + return vec_perm(vec_u8_t(in0.value), vec_u8_t(in1.value), pack_high); + } +} +/* end file src/ppc64/ppc64_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF8 +/* begin file src/ppc64/ppc64_convert_latin1_to_utf8.cpp */ +/* + * reads a vector of uint16 values + * bits after 11th are ignored + * first 11 bits are encoded into utf8 + * !important! utf8_output must have at least 16 writable bytes + */ +simdutf_really_inline void +write_v_u16_11bits_to_utf8(const vector_u16 v_u16, char *&utf8_output, + const vector_u8 one_byte_bytemask, + const uint16_t one_byte_bitmask) { + + // 0b1100_0000_1000_0000 + const auto v_c080 = vector_u16(0xc080); + // 0b0011_1111_0000_0000 + const auto v_1f00 = vector_u16(0x1f00); + // 0b0000_0000_0011_1111 + const auto v_003f = vector_u16(0x003f); + + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [0000|0000|00bb|bbbb] + const auto t0 = v_u16 & v_003f; + // t1 = [000a|aaaa|bbbb|bb00] + const auto t1 = v_u16.shl<2>(); + // t2 = [000a|aaaa|00bb|bbbb] + const auto t2 = select(v_1f00, t1, t0); + // t3 = [110a|aaaa|10bb|bbbb] + const auto t3 = t2 | v_c080; + + // 2. merge ASCII and 2-byte codewords + const auto utf8_unpacked1 = + select(one_byte_bytemask, as_vector_u8(v_u16), as_vector_u8(t3)); + +#if SIMDUTF_IS_BIG_ENDIAN + const auto tmp = as_vector_u16(utf8_unpacked1).swap_bytes(); +#else + const auto tmp = as_vector_u16(utf8_unpacked1); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto utf8_unpacked = as_vector_u8(tmp); + + // 3. prepare bitmask for 8-bit lookup + // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a + // - LSB) + const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a + const uint16_t m1 = static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 + const uint8_t m2 = static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const auto shuffle = vector_u8::load(row + 1); + const auto utf8_packed = shuffle.lookup_16(utf8_unpacked); + + // 5. store bytes + utf8_packed.store(utf8_output); + + // 6. adjust pointers + utf8_output += row[0]; +} + +inline void write_v_u16_11bits_to_utf8(const vector_u16 v_u16, + char *&utf8_output, + const vector_u16 v_0000, + const vector_u16 v_ff80) { + // no bits set above 7th bit + const auto one_byte_bytemask = (v_u16 & v_ff80) == v_0000; + const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); + + write_v_u16_11bits_to_utf8(v_u16, utf8_output, + as_vector_u8(one_byte_bytemask), one_byte_bitmask); +} + +std::pair +ppc64_convert_latin1_to_utf8(const char *latin_input, + const size_t latin_input_length, + char *utf8_output) { + const char *end = latin_input + latin_input_length; + + const auto v_0000 = vector_u16::zero(); + const auto v_00 = vector_u8::zero(); + + // 0b1111_1111_1000_0000 + const auto v_ff80 = vector_u16(0xff80); + +#if SIMDUTF_IS_BIG_ENDIAN + const auto latin_1_half_into_u16_byte_mask = + vector_u8(16, 0, 16, 1, 16, 2, 16, 3, 16, 4, 16, 5, 16, 6, 16, 7); + const auto latin_2_half_into_u16_byte_mask = + vector_u8(16, 8, 16, 9, 16, 10, 16, 11, 16, 12, 16, 13, 16, 14, 16, 15); +#else + const auto latin_1_half_into_u16_byte_mask = + vector_u8(0, 16, 1, 16, 2, 16, 3, 16, 4, 16, 5, 16, 6, 16, 7, 16); + const auto latin_2_half_into_u16_byte_mask = + vector_u8(8, 16, 9, 16, 10, 16, 11, 16, 12, 16, 13, 16, 14, 16, 15, 16); +#endif // SIMDUTF_IS_BIG_ENDIAN + + // each latin1 takes 1-2 utf8 bytes + // slow path writes useful 8-15 bytes twice (eagerly writes 16 bytes and then + // adjust the pointer) so the last write can exceed the utf8_output size by + // 8-1 bytes by reserving 8 extra input bytes, we expect the output to have + // 8-16 bytes free + while (end - latin_input >= 16 + 8) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + const auto v_latin = vector_u8::load(latin_input); + + if (v_latin.is_ascii()) { // ASCII fast path!!!! + v_latin.store(utf8_output); + latin_input += 16; + utf8_output += 16; + continue; + } + + // assuming a/b are bytes and A/B are uint16 of the same value + // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA + const vector_u16 v_u16_latin_1_half = + as_vector_u16(latin_1_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); + + // aaaa_aaaa_bbbb_bbbb -> BBBB_BBBB + const vector_u16 v_u16_latin_2_half = + as_vector_u16(latin_2_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); + + write_v_u16_11bits_to_utf8(v_u16_latin_1_half, utf8_output, v_0000, v_ff80); + write_v_u16_11bits_to_utf8(v_u16_latin_2_half, utf8_output, v_0000, v_ff80); + latin_input += 16; + } + + if (end - latin_input >= 16) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + const auto v_latin = vector_u8::load(latin_input); + + if (v_latin.is_ascii()) { // ASCII fast path!!!! + v_latin.store(utf8_output); + latin_input += 16; + utf8_output += 16; + } else { + // assuming a/b are bytes and A/B are uint16 of the same value + // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA + const auto v_u16_latin_1_half = as_vector_u16( + latin_1_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); + + write_v_u16_11bits_to_utf8(v_u16_latin_1_half, utf8_output, v_0000, + v_ff80); + latin_input += 8; + } + } + + return std::make_pair(latin_input, utf8_output); +} +/* end file src/ppc64/ppc64_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF16 +/* begin file src/ppc64/ppc64_convert_latin1_to_utf16.cpp */ +template +size_t ppc64_convert_latin1_to_utf16(const char *latin1_input, size_t len, + char16_t *utf16_output) { + const size_t rounded_len = align_down(len); + + for (size_t i = 0; i < rounded_len; i += vector_u8::ELEMENTS) { + const auto in = vector_u8::load(&latin1_input[i]); + in.store_bytes_as_utf16(&utf16_output[i]); + } + + return rounded_len; +} +/* end file src/ppc64/ppc64_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF32 +/* begin file src/ppc64/ppc64_convert_latin1_to_utf32.cpp */ +std::pair +ppc64_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const size_t rounded_len = align_down(len); + + for (size_t i = 0; i < rounded_len; i += vector_u8::ELEMENTS) { + const auto in = vector_u8::load(&buf[i]); + in.store_bytes_as_utf32(&utf32_output[i]); + } + + return std::make_pair(buf + rounded_len, utf32_output + rounded_len); +} +/* end file src/ppc64/ppc64_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/ppc64/ppc64_convert_utf8_to_latin1.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + const auto in = vector_u8::load(input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & + 0xfff; // we are only processing 12 bytes in case it is not all ASCII + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + in.store(latin1_output); + latin1_output += 12; // We wrote 12 characters. + return 12; // We consumed 12 bytes. + } + /// We do not have a fast path available, so we fallback. + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small lookup + // table. + + const auto reshuffle = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); + const auto perm8 = reshuffle.lookup_32(in, vector_u8::zero()); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm16 = as_vector_u16(perm8).swap_bytes(); +#else + const auto perm16 = as_vector_u16(perm8); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm16 & uint16_t(0x7f); + const auto highbyte = perm16 & uint16_t(0x1f00); + const auto composed = ascii | highbyte.shr<2>(); + + const auto latin1_packed = vector_u16::pack(composed, composed); +#if defined(__clang__) + __attribute__((aligned(16))) char buf[16]; + latin1_packed.store(buf); + memcpy(latin1_output, buf, 6); +#else + // writing 8 bytes even though we only care about the first 6 bytes. + const auto tmp = vec_u64_t(latin1_packed.value); + memcpy(latin1_output, &tmp[0], 8); +#endif + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/ppc64/ppc64_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/ppc64/ppc64_convert_utf8_to_utf16.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const auto in = vector_u8::load(input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + // Note: using 16 bytes is unsafe, see issue_ossfuzz_71218 + in.store_bytes_as_utf16(utf16_output); + utf16_output += 12; // We wrote 12 16-bit characters. + return 12; // We consumed 12 bytes. + } + if (((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte + // UTF-16 code units. +#if SIMDUTF_IS_BIG_ENDIAN + const auto in16 = as_vector_u16(in); +#else + const auto in16 = as_vector_u16(in).swap_bytes(); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto lo = in16 & uint16_t(0x007f); + const auto hi = in16.shr<2>(); + + auto composed = select(uint16_t(0x1f00 >> 2), hi, lo); + if simdutf_constexpr (!match_system(big_endian)) { + composed = composed.swap_bytes(); + } + + composed.store(utf16_output); + utf16_output += 8; // We wrote 16 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. There is probably a more efficient sequence, but the + // following might do. + + // AltiVec: it might be done better, for now SSE translation + + const auto sh = + vector_u8(2, 1, 0, 16, 5, 4, 3, 16, 8, 7, 6, 16, 11, 10, 9, 16); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto b0 = perm & uint32_t(0x0000007f); + const auto b1 = select(uint32_t(0x00003f00 >> 2), perm.shr<2>(), b0); + const auto b2 = select(uint32_t(0x000f0000 >> 4), perm.shr<4>(), b1); + const auto composed = b2; + auto packed = vector_u32::pack(composed, composed); + + if simdutf_constexpr (!match_system(big_endian)) { + packed = packed.swap_bytes(); + } + + packed.store(utf16_output); + utf16_output += 4; + return 12; + } + /// We do not have a fast path available, so we fallback. + + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u16(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u16(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto b0 = perm & uint16_t(0x007f); + const auto b1 = perm & uint16_t(0x1f00); + + auto composed = b0 | b1.shr<2>(); + + if simdutf_constexpr (!match_system(big_endian)) { + composed = composed.swap_bytes(); + } + + composed.store(utf16_output); + utf16_output += 6; // We wrote 12 bytes, 6 code points. + } else if (idx < 145) { + // FOUR (4) input code-code units + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto b0 = perm & uint32_t(0x0000007f); + const auto b1 = perm & uint32_t(0x00003f00); + const auto b2 = perm & uint32_t(0x000f0000); + + const auto composed = b0 | b1.shr<2>() | b2.shr<4>(); + + auto packed = vector_u32::pack(composed, composed); + + if simdutf_constexpr (!match_system(big_endian)) { + packed = packed.swap_bytes(); + } + + packed.store(utf16_output); + utf16_output += 4; + } else if (idx < 209) { + // TWO (2) input code-code units + ////////////// + // There might be garbage inputs where a leading byte mascarades as a + // four-byte leading byte (by being followed by 3 continuation byte), but is + // not greater than 0xf0. This could trigger a buffer overflow if we only + // counted leading bytes of the form 0xf0 as generating surrogate pairs, + // without further UTF-8 validation. Thus we must be careful to ensure that + // only leading bytes at least as large as 0xf0 generate surrogate pairs. We + // do as at the cost of an extra mask. + ///////////// + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm & uint32_t(0x00000007f); + const auto middlebyte = perm & uint32_t(0x00003f00); + const auto middlebyte_shifted = middlebyte.shr<2>(); + + auto middlehighbyte = perm & uint32_t(0x003f0000); + // correct for spurious high bit + + const auto correct = (perm & uint32_t(0x00400000)).shr<1>(); + middlehighbyte = correct ^ middlehighbyte; + const auto middlehighbyte_shifted = middlehighbyte.shr<4>(); + // We deliberately carry the leading four bits in highbyte if they are + // present, we remove them later when computing hightenbits. + const auto highbyte = perm & uint32_t(0xff000000); + const auto highbyte_shifted = highbyte.shr<6>(); + // When we need to generate a surrogate pair (leading byte > 0xF0), then + // the corresponding 32-bit value in 'composed' will be greater than + // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the + // location of the surrogate pairs. + const auto composed = + ascii | middlebyte_shifted | highbyte_shifted | middlehighbyte_shifted; + + const auto composedminus = composed - uint32_t(0x10000); + const auto lowtenbits = composedminus & uint32_t(0x3ff); + // Notice the 0x3ff mask: + const auto hightenbits = composedminus.shr<10>() & uint32_t(0x3ff); + const auto lowtenbitsadd = lowtenbits + uint32_t(0xDC00); + const auto hightenbitsadd = hightenbits + uint32_t(0xD800); + const auto lowtenbitsaddshifted = lowtenbitsadd.shl<16>(); + auto surrogates = hightenbitsadd | lowtenbitsaddshifted; + + uint32_t basic_buffer[4]; + composed.store(basic_buffer); + uint32_t surrogate_buffer[4]; + surrogates.swap_bytes().store(surrogate_buffer); + + for (size_t i = 0; i < 3; i++) { + if (basic_buffer[i] > 0x3c00000) { + const auto ch0 = uint16_t(surrogate_buffer[i] & 0xffff); + const auto ch1 = uint16_t(surrogate_buffer[i] >> 16); + if (match_system(big_endian)) { + utf16_output[1] = scalar::u16_swap_bytes(ch0); + utf16_output[0] = scalar::u16_swap_bytes(ch1); + } else { + utf16_output[1] = ch0; + utf16_output[0] = ch1; + } + utf16_output += 2; + } else { + const auto chr = uint16_t(basic_buffer[i]); + utf16_output[0] = scalar::utf16::swap_if_needed(chr); + utf16_output++; + } + } + } else { + // here we know that there is an error but we do not handle errors + } + return consumed; +} +/* end file src/ppc64/ppc64_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/ppc64/ppc64_convert_utf8_to_utf32.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const auto in = vector_u8::load(input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + in.store_bytes_as_utf32(utf32_output); + utf32_output += 12; // We wrote 12 32-bit characters. + return 12; // We consumed 12 bytes. + } + if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte + // UTF-32 code units. +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = as_vector_u16(in); +#else + const auto perm = as_vector_u16(in).swap_bytes(); +#endif // SIMDUTF_IS_BIG_ENDIAN + // in = [110aaaaa|10bbbbbb] + // t0 = [00000000|00bbbbbb] + const auto t0 = perm & uint16_t(0x007f); + + // t1 = [00110aaa|aabbbbbb] + const auto t1 = perm.shr<2>(); + const auto composed = select(uint16_t(0x1f00 >> 2), t1, t0); + + const auto composed8 = as_vector_u8(composed); + composed8.store_words_as_utf32(utf32_output); + + utf32_output += 8; // We wrote 32 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. +#if SIMDUTF_IS_BIG_ENDIAN + const auto sh = + vector_u8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11); +#else + const auto sh = + vector_u8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); + + // in = [1110aaaa|10bbbbbb|10cccccc] + + // t0 = [00000000|00000000|00cccccc] + const auto t0 = perm & uint32_t(0x0000007f); + + // t2 = [00000000|0000bbbb|bbcccccc] + const auto t1 = perm.shr<2>(); + const auto t2 = select(uint32_t(0x00003f00 >> 2), t1, t0); + + // t4 = [00000000|aaaabbbb|bbcccccc] + const auto t3 = perm.shr<4>(); + const auto t4 = select(uint32_t(0x0f0000 >> 4), t3, t2); + + t4.store(utf32_output); + utf32_output += 4; + return 12; + } + /// We do not have a fast path available, so we fallback. + + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u16(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u16(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm & uint16_t(0x7f); + const auto highbyte = perm & uint16_t(0x1f00); + const auto composed = ascii | highbyte.shr<2>(); + + as_vector_u8(composed).store_words_as_utf32(utf32_output); + utf32_output += 6; // We wrote 12 bytes, 6 code points. + } else if (idx < 145) { + // FOUR (4) input code-code units + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm & uint32_t(0x7f); + const auto middlebyte = perm & uint32_t(0x3f00); + const auto middlebyte_shifted = middlebyte.shr<2>(); + const auto highbyte = perm & uint32_t(0x0f0000); + const auto highbyte_shifted = highbyte.shr<4>(); + const auto composed = ascii | middlebyte_shifted | highbyte_shifted; + + composed.store(utf32_output); + utf32_output += 4; + } else if (idx < 209) { + // TWO (2) input code-code units + const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); +#if SIMDUTF_IS_BIG_ENDIAN + const auto perm = + as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); +#else + const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto ascii = perm & uint32_t(0x0000007f); + const auto middlebyte = perm & uint32_t(0x3f00); + const auto middlebyte_shifted = middlebyte.shr<2>(); + auto middlehighbyte = perm & uint32_t(0x003f0000); + // correct for spurious high bit + const auto correct0 = perm & uint32_t(0x00400000); + const auto correct = correct0.shr<1>(); + middlehighbyte = correct ^ middlehighbyte; + const auto middlehighbyte_shifted = middlehighbyte.shr<4>(); + const auto highbyte = perm & uint32_t(0x07000000); + const auto highbyte_shifted = highbyte.shr<6>(); + const auto composed = + ascii | middlebyte_shifted | highbyte_shifted | middlehighbyte_shifted; + composed.store(utf32_output); + utf32_output += 3; + } else { + // here we know that there is an error but we do not handle errors + } + return consumed; +} +/* end file src/ppc64/ppc64_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/ppc64/ppc64_convert_utf16_to_latin1.cpp */ +struct utf16_to_latin1_t { + error_code err; + const char16_t *input; + char *output; +}; + +template +utf16_to_latin1_t ppc64_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 8) { + // Load 8 x UTF-16 characters + auto in = vector_u8::load(buf); + + // Move low bytes of UTF-16 chars to lower half of `in` + // and upper bytes to upper half of `in`. + if simdutf_constexpr (!match_system(big_endian)) { + const auto perm = + vector_u8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + in = perm.lookup_16(in); + } else { + const auto perm = + vector_u8(1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14); + in = perm.lookup_16(in); + } + + // AltiVec-specific +#if defined(__clang__) + __attribute__((aligned(16))) uint64_t tmp[8]; + in.store(tmp); + #if SIMDUTF_IS_BIG_ENDIAN + memcpy(latin1_output, &tmp[0], 8); + const uint64_t upper = tmp[1]; + #else + memcpy(latin1_output, &tmp[1], 8); + const uint64_t upper = tmp[0]; + #endif // SIMDUTF_IS_BIG_ENDIAN +#else + const auto tmp = vec_u64_t(in.value); + #if SIMDUTF_IS_BIG_ENDIAN + memcpy(latin1_output, &tmp[0], 8); + const uint64_t upper = tmp[1]; + #else + memcpy(latin1_output, &tmp[1], 8); + const uint64_t upper = tmp[0]; + #endif // SIMDUTF_IS_BIG_ENDIAN +#endif // defined(__clang__) + // AltiVec + + if (simdutf_unlikely(upper)) { + uint8_t bytes[8]; + memcpy(bytes, &upper, 8); + for (size_t k = 0; k < 8; k++) { + if (bytes[k] != 0) { + return utf16_to_latin1_t{error_code::TOO_LARGE, buf + k, + latin1_output}; + } + } + } else { + // Adjust pointers for next iteration + buf += 8; + latin1_output += 8; + } + } // while + + return utf16_to_latin1_t{error_code::SUCCESS, buf, latin1_output}; +} +/* end file src/ppc64/ppc64_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +/* begin file src/ppc64/ppc64_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ + +// Auxiliary procedure used by UTF-16 and UTF-32 into UTF-8. +// Note the pointer is passed by reference, it is updated by the procedure. +template +simdutf_really_inline void ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( + const vector_u16 in, uint16_t one_byte_bitmask, + const T one_or_two_bytes_bytemask, uint16_t one_or_two_bytes_bitmask, + char *&utf8_output) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes +#if SIMDUTF_IS_BIG_ENDIAN + const auto dup_lsb = + vector_u8(1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); +#else + const auto dup_lsb = + vector_u8(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); +#endif // SIMDUTF_IS_BIG_ENDIAN + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const auto t0 = as_vector_u16(dup_lsb.lookup_16(as_vector_u8(in))); + + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const auto t1 = t0 & uint16_t(0b0011111101111111); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const auto t2 = t1 | uint16_t(0b1000000000000000); + + // in = [aaaa|bbbb|bbcc|cccc] + // a0 = [0000|0000|0000|aaaa] + const auto a0 = in.shr<12>(); + // b0 = [aabb|bbbb|cccc|cc00] + const auto b0 = in.shl<2>(); + // s0 = [00bb|bbbb|00cc|cccc] + const auto s0 = select(uint16_t(0x3f00), b0, a0); + + // s3 = [11bb|bbbb|1110|aaaa] + const auto s3 = s0 | uint16_t(0b1100000011100000); + + const auto m0 = + ~as_vector_u16(one_or_two_bytes_bytemask) & uint16_t(0b0100000000000000); + const auto s4 = s3 ^ m0; + + // 4. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { + // We only have three-byte code units. Use fast path. +#if SIMDUTF_IS_BIG_ENDIAN + // Lookups produced by scripts/ppc64_convert_utf16_to_utf8.py + const auto shuffle0 = + vector_u8(1, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 9, 8, 24, 11); + const auto shuffle1 = vector_u8(10, 26, 13, 12, 28, 15, 14, 30, -1, -1, -1, + -1, -1, -1, -1, -1); +#else + const auto shuffle0 = + vector_u8(0, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 8, 9, 25, 10); + const auto shuffle1 = vector_u8(11, 27, 12, 13, 29, 14, 15, 31, -1, -1, -1, + -1, -1, -1, -1, -1); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto utf8_0 = shuffle0.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); + const auto utf8_1 = shuffle1.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); + + utf8_0.store(utf8_output); + utf8_output += 16; + utf8_1.store(utf8_output); + utf8_output += 8; + return; + } + + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::ppc64_utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const auto shuffle0 = vector_u8::load(row0 + 1); + + const auto utf8_0 = shuffle0.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); + const uint8_t mask1 = static_cast(mask >> 8); + + const uint8_t *row1 = + &simdutf::tables::ppc64_utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const auto shuffle1 = vector_u8::load(row1 + 1) + uint8_t(8); + const auto utf8_1 = shuffle1.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); + + utf8_0.store(utf8_output); + utf8_output += row0[0]; + utf8_1.store(utf8_output); + utf8_output += row1[0]; +} + +struct utf16_to_utf8_t { + error_code err; + const char16_t *input; + char *output; +}; + +/* + Returns utf16_to_utf8_t value + A scalar routine should carry on the conversion of the tail, + iff there was no error. +*/ +template +utf16_to_utf8_t ppc64_convert_utf16_to_utf8(const char16_t *buf, size_t len, + char *utf8_output) { + + const char16_t *end = buf + len; + + const auto v_f800 = vector_u16(0xf800); + const auto v_d800 = vector_u16(0xd800); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + auto in = vector_u16::load(buf); + if (not match_system(big_endian)) { + in = in.swap_bytes(); + } + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + if (in.is_ascii()) { + auto nextin = vector_u16::load(buf + vector_u16::ELEMENTS); + if (not match_system(big_endian)) { + nextin = nextin.swap_bytes(); + } + + if (nextin.is_ascii()) { + // 1. pack the bytes + const auto utf8_packed = vector_u16::pack(in, nextin); + // 2. store (16 bytes) + utf8_packed.store(utf8_output); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + + // next block is not ASCII + const auto utf8_packed = vector_u16::pack(in, in); + // 2. store (16 bytes) + utf8_packed.store(utf8_output); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + // fallback + } + + // no bits set above 7th bit + const auto one_byte_bytemask = in < uint16_t(1 << 7); + const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); + + // no bits set above 11th bit + const auto one_or_two_bytes_bytemask = in < uint16_t(1 << 11); + const uint16_t one_or_two_bytes_bitmask = + one_or_two_bytes_bytemask.to_bitmask(); + + if (one_or_two_bytes_bitmask == 0xffff) { + write_v_u16_11bits_to_utf8( + in, utf8_output, as_vector_u8(one_byte_bytemask), one_byte_bitmask); + buf += 8; + continue; + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also to deal with situation when there is a surrogate word + // at the end of a chunk. + const auto surrogates_bytemask = (in & v_f800) == v_d800; + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint16_t surrogates_bitmask = surrogates_bytemask.to_bitmask(); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x0000) { + ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( + in, one_byte_bitmask, one_or_two_bytes_bytemask, + one_or_two_bytes_bitmask, utf8_output); + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = uint8_t(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = uint8_t((word >> 6) | 0b11000000); + *utf8_output++ = uint8_t((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = uint8_t((word >> 12) | 0b11100000); + *utf8_output++ = uint8_t(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = uint8_t((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return utf16_to_utf8_t{error_code::SURROGATE, buf + k - 1, + utf8_output}; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = uint8_t((value >> 18) | 0b11110000); + *utf8_output++ = uint8_t(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = uint8_t(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = uint8_t((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return utf16_to_utf8_t{error_code::SUCCESS, buf, utf8_output}; +} +/* end file src/ppc64/ppc64_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/ppc64/ppc64_convert_utf16_to_utf32.cpp */ +struct utf16_to_utf32_t { + error_code err; // error code + const char16_t *input; // last position in input buffer + char32_t *output; // last position in output buffer +}; + +template +utf16_to_utf32_t ppc64_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *end = buf + len; + + const auto v_f800 = vector_u16::splat(0xf800); + const auto v_d800 = vector_u16::splat(0xd800); + const auto zero = vector_u8::zero(); + + while (end - buf >= vector_u16::ELEMENTS) { + auto in = vector_u16::load(buf); + if (not match_system(big_endian)) { + in = in.swap_bytes(); + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const auto surrogates_bytemask = (in & v_f800) == v_d800; + + // bitmask = 0x0000 if there are no surrogates + const uint16_t surrogates_bitmask = surrogates_bytemask.to_bitmask(); + + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x0000) { + // case: no surrogate pairs, extend 16-bit code units to 32-bit code units +#if SIMDUTF_IS_BIG_ENDIAN + const auto lo = + vector_u8(16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7); + const auto hi = vector_u8(16, 16, 8 + 0, 8 + 1, 16, 16, 8 + 2, 8 + 3, 16, + 16, 8 + 4, 8 + 5, 16, 16, 8 + 6, 8 + 7); +#else + const auto lo = + vector_u8(0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7, 16, 16); + const auto hi = vector_u8(8 + 0, 8 + 1, 16, 16, 8 + 2, 8 + 3, 16, 16, + 8 + 4, 8 + 5, 16, 16, 8 + 6, 8 + 7, 16, 16); +#endif // SIMDUTF_IS_BIG_ENDIAN + + const auto utf32_0 = lo.lookup_32(as_vector_u8(in), zero); + const auto utf32_1 = hi.lookup_32(as_vector_u8(in), zero); + + utf32_0.store(utf32_output); + utf32_1.store(utf32_output + 4); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + const uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return utf16_to_utf32_t{error_code::SURROGATE, buf + k - 1, + utf32_output}; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + + return utf16_to_utf32_t{error_code::SUCCESS, buf, utf32_output}; +} +/* end file src/ppc64/ppc64_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/ppc64/ppc64_convert_utf32_to_latin1.cpp */ +enum class ErrorChecking { disabled, enabled }; + +struct utf32_to_latin1_t { + error_code err; + const char32_t *input; + char *output; +}; + +template +utf32_to_latin1_t simdutf_really_inline ppc64_convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) { + constexpr size_t N = vector_u32::ELEMENTS; + const size_t rounded_len = align_down<4 * N>(len); + + const auto high_bytes_mask = vector_u32::splat(0xFFFFFF00); + + for (size_t i = 0; i < rounded_len; i += 4 * N) { + const auto in1 = vector_u32::load(buf + 0 * N); + const auto in2 = vector_u32::load(buf + 1 * N); + const auto in3 = vector_u32::load(buf + 2 * N); + const auto in4 = vector_u32::load(buf + 3 * N); + + if (ec == ErrorChecking::enabled) { + const auto combined = in1 | in2 | in3 | in4; + const auto too_big = (combined & high_bytes_mask) != uint32_t(0); + + if (simdutf_unlikely(too_big.any())) { + // Scalar code will carry on from the beginning of the current block + // and report the exact error position. + return utf32_to_latin1_t{error_code::OTHER, buf, latin1_output}; + } + } + + // Note: element #1 contains 0, and is used to mask-out elements +#if SIMDUTF_IS_BIG_ENDIAN + const auto shlo = vector_u8(0 + 3, 4 + 3, 8 + 3, 12 + 3, 16 + 3, 20 + 3, + 24 + 3, 28 + 3, 1, 1, 1, 1, 1, 1, 1, 1); + const auto shhi = vector_u8(1, 1, 1, 1, 1, 1, 1, 1, 0 + 3, 4 + 3, 8 + 3, + 12 + 3, 16 + 3, 20 + 3, 24 + 3, 28 + 3); +#else + const auto shlo = + vector_u8(0, 4, 8, 12, 16, 20, 24, 28, 1, 1, 1, 1, 1, 1, 1, 1); + const auto shhi = + vector_u8(1, 1, 1, 1, 1, 1, 1, 1, 0, 4, 8, 12, 16, 20, 24, 28); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto lo = shlo.lookup_32(as_vector_u8(in1), as_vector_u8(in2)); + const auto hi = shhi.lookup_32(as_vector_u8(in3), as_vector_u8(in4)); + + const auto merged = lo | hi; + + merged.store(latin1_output); + latin1_output += 4 * N; + buf += 4 * N; + } + + return utf32_to_latin1_t{error_code::SUCCESS, buf, latin1_output}; +} +/* end file src/ppc64/ppc64_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 +/* begin file src/ppc64/ppc64_convert_utf32_to_utf16.cpp */ +struct utf32_to_utf16_t { + error_code err; + const char32_t *input; + char16_t *output; +}; + +template +utf32_to_utf16_t ppc64_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_output) { + + const char32_t *end = buf + len; + + const auto zero = vector_u32::zero(); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + + auto forbidden_global = simd16(); + + while (end - buf >= 8) { + const auto in0 = vector_u32::load(buf); + const auto in1 = vector_u32::load(buf + vector_u32::ELEMENTS); + + const auto any_surrogate = ((in0 | in1) & v_ffff0000) != zero; + + // Check if no bits set above 15th + if (any_surrogate.is_zero()) { + // Pack UTF-32 to UTF-16 +#if SIMDUTF_IS_BIG_ENDIAN + const auto sh = big_endian ? vector_u8(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, + 22, 23, 26, 27, 30, 31) + : vector_u8(3, 2, 7, 6, 11, 10, 15, 14, 19, 18, + 23, 22, 27, 26, 31, 30); +#else + const auto sh = big_endian ? vector_u8(1, 0, 5, 4, 9, 8, 13, 12, 17, 16, + 21, 20, 25, 24, 29, 28) + : vector_u8(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, + 20, 21, 24, 25, 28, 29); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto packed0 = sh.lookup_32(as_vector_u8(in0), as_vector_u8(in1)); + const auto packed = as_vector_u16(packed0); + +#if SIMDUTF_IS_BIG_ENDIAN + const auto v_f800 = + big_endian ? vector_u16::splat(0xf800) : vector_u16::splat(0x00f8); + const auto v_d800 = + big_endian ? vector_u16::splat(0xd800) : vector_u16::splat(0x00d8); +#else + const auto v_f800 = + big_endian ? vector_u16::splat(0x00f8) : vector_u16::splat(0xf800); + const auto v_d800 = + big_endian ? vector_u16::splat(0x00d8) : vector_u16::splat(0xd800); +#endif // SIMDUTF_IS_BIG_ENDIAN + const auto forbidden = (packed & v_f800) == v_d800; + + switch (er) { + case ErrorReporting::precise: + if (not forbidden.is_zero()) { + // scalar procedure will rescan the portion of buffer we've just + // analysed + return utf32_to_utf16_t{error_code::OTHER, buf, utf16_output}; + } + break; + case ErrorReporting::at_the_end: + forbidden_global |= forbidden; + break; + case ErrorReporting::none: + break; + } + + packed.store(utf16_output); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return utf32_to_utf16_t{error_code::SURROGATE, buf + k, + utf16_output}; + } + *utf16_output++ = + scalar::utf16::swap_if_needed(uint16_t(word)); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return utf32_to_utf16_t{error_code::TOO_LARGE, buf + k, + utf16_output}; + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + high_surrogate = + scalar::utf16::swap_if_needed(high_surrogate); + low_surrogate = + scalar::utf16::swap_if_needed(low_surrogate); + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + if (er == ErrorReporting::at_the_end) { + // check for invalid input + if (not forbidden_global.is_zero()) { + return utf32_to_utf16_t{error_code::SURROGATE, buf, utf16_output}; + } + } + + return utf32_to_utf16_t{error_code::SUCCESS, buf, utf16_output}; +} +/* end file src/ppc64/ppc64_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF32 +/* begin file src/ppc64/ppc64_convert_utf32_to_utf8.cpp */ +struct utf32_to_utf8_t { + error_code err; + const char32_t *input; + char *output; +}; + +template +utf32_to_utf8_t ppc64_convert_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + + const auto v_f800 = vector_u16::splat(0xf800); + const auto v_d800 = vector_u16::splat(0xd800); + + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + const auto v_00000000 = vector_u32::zero(); + auto forbidden_bytemask = simd16(); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= + std::ptrdiff_t( + 16 + safety_margin)) { // buf is a char32_t pointer, each char32_t + // has 4 bytes or 32 bits, thus buf + 16 * + // char_32t = 512 bits = 64 bytes + // We load two 16 bytes registers for a total of 32 bytes or 16 characters. + // These two values can hold only 8 UTF32 chars + auto in0 = vector_u32::load(buf); + auto in1 = vector_u32::load(buf + vector_u32::ELEMENTS); + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + auto in = vector_u32::pack(in0, in1); + + // Try to apply UTF-16 => UTF-8 from ./ppc64_convert_utf16_to_utf8.cpp + + // Check for ASCII fast path + + // ASCII fast path!!!! + // We eagerly load another 32 bytes, hoping that they will be ASCII too. + // The intuition is that we try to collect 16 ASCII characters which + // requires a total of 64 bytes of input. If we fail, we just pass thirdin + // and fourthin as our new inputs. + if (in.is_ascii()) { // if the first two blocks are ASCII + const auto in2 = vector_u32::load(buf + 2 * vector_u32::ELEMENTS); + const auto in3 = vector_u32::load(buf + 3 * vector_u32::ELEMENTS); + + const auto next = vector_u32::pack(in2, in3); + if (next.is_ascii()) { + // 1. pack the bytes + const auto utf8_packed = vector_u16::pack(in, next); + // 2. store (16 bytes) + utf8_packed.store(utf8_output); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + + // `next` is not ASCII, write `in` and carry on with next + + // 1. pack the bytes + const auto utf8_packed = vector_u16::pack(in, in); + utf8_packed.store(utf8_output); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + + // Proceed with next input + in = next; + in0 = in2; + in1 = in3; + } + + // no bits set above 7th bit + const auto one_byte_bytemask = in < uint16_t(1 << 7); + const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); + + // no bits set above 11th bit + const auto one_or_two_bytes_bytemask = in < uint16_t(1 << 11); + const uint16_t one_or_two_bytes_bitmask = + one_or_two_bytes_bytemask.to_bitmask(); + + if (one_or_two_bytes_bitmask == 0xffff) { + write_v_u16_11bits_to_utf8( + in, utf8_output, as_vector_u8(one_byte_bytemask), one_byte_bitmask); + buf += 8; + continue; + } + + // Check for overflow in packing + const auto saturation_bytemask = ((in0 | in1) & v_ffff0000) == v_00000000; + const uint16_t saturation_bitmask = saturation_bytemask.to_bitmask(); + if (saturation_bitmask == 0xffff) { + switch (er) { + case ErrorReporting::precise: { + const auto forbidden = (in & v_f800) == v_d800; + if (forbidden.any()) { + // We return no error code, instead we force the scalar procedure + // to rescan the portion of input where we've just found an error. + return utf32_to_utf8_t{error_code::SUCCESS, buf, utf8_output}; + } + } break; + case ErrorReporting::at_the_end: + forbidden_bytemask |= (in & v_f800) == v_d800; + break; + case ErrorReporting::none: + break; + } + + ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( + in, one_byte_bitmask, one_or_two_bytes_bytemask, + one_or_two_bytes_bitmask, utf8_output); + buf += 8; + } else { + // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD in the + // presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (er != ErrorReporting::none and + (word >= 0xD800 && word <= 0xDFFF)) { + return utf32_to_utf8_t{error_code::SURROGATE, buf + k, utf8_output}; + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (er != ErrorReporting::none and (word > 0x10FFFF)) { + return utf32_to_utf8_t{error_code::TOO_LARGE, buf + k, utf8_output}; + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + if (er == ErrorReporting::at_the_end) { + if (forbidden_bytemask.any()) { + return utf32_to_utf8_t{error_code::SURROGATE, buf, utf8_output}; + } + } + + return utf32_to_utf8_t{ + error_code::SUCCESS, + buf, + utf8_output, + }; +} +/* end file src/ppc64/ppc64_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/ppc64/ppc64_utf8_length_from_latin1.cpp */ +template T min(T a, T b) { return a <= b ? a : b; } + +std::pair ppc64_utf8_length_from_latin1(const char *input, + size_t length) { + constexpr size_t N = vector_u8::ELEMENTS; + length = (length / N); + + size_t count = length * N; + while (length != 0) { + vector_u32 partial = vector_u32::zero(); + + // partial accumulator has 32 bits => this yields (2^31 / 16) + size_t chunk = min(length, size_t(0xffffffff / N)); + length -= chunk; + while (chunk != 0) { + auto local = vector_u8::zero(); + // local accumulator has 8 bits => this yields 255 max (we increment by 1 + // in each iteration) + const size_t n = min(chunk, size_t(255)); + chunk -= n; + for (size_t i = 0; i < n; i++) { + const auto in = vector_i8::load(input); + input += N; + + local -= as_vector_u8(in < vector_i8::splat(0)); + } + + partial = sum4bytes(local, partial); + } + + for (int i = 0; i < vector_u32::ELEMENTS; i++) { + count += size_t(partial.value[i]); + } + } + + return std::make_pair(input, count); +} +/* end file src/ppc64/ppc64_utf8_length_from_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/ppc64/ppc64_base64.cpp */ +/* + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + * + * AMD XOP specific: http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html + * Altivec has capabilities of AMD XOP (or vice versa): shuffle using 2 vectors + * and variable shifts, thus this implementation shares some code solution + * (modulo intrinsic function names). + */ + +constexpr bool with_base64_std = false; +constexpr bool with_base64_url = true; +constexpr bool with_ignore_errors = true; +constexpr bool with_ignore_garbage = true; +constexpr bool with_strict_checking = false; + +// --- encoding ----------------------------------------------- + +/* + Procedure translates vector of bytes having 6-bit values + into ASCII counterparts. +*/ +template +vector_u8 encoding_translate_6bit_values(const vector_u8 input) { + // credit: Wojciech Muła + // reduce 0..51 -> 0 + // 52..61 -> 1 .. 10 + // 62 -> 11 + // 63 -> 12 + auto result = input.saturating_sub(vector_u8::splat(51)); + + // distinguish between ranges 0..25 and 26..51: + // 0 .. 25 -> remains 13 + // 26 .. 51 -> becomes 0 + const auto lt = input < vector_u8::splat(26); + result = select(as_vector_u8(lt), vector_u8::splat(13), result); + + const auto shift_LUT = + base64_url ? vector_u8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0) + : vector_u8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); + // read shift + result = result.lookup_16(shift_LUT); + + return input + result; +} + +/* + Procedure expands 12 bytes (4*3 bytes) into 16 bytes, + each byte stores 6 bits of data +*/ +template +simdutf_really_inline vector_u8 encoding_expand_6bit_fields(vector_u8 input) { +#if SIMDUTF_IS_BIG_ENDIAN + #define indices4(dx) (dx + 0), (dx + 1), (dx + 1), (dx + 2) + const auto expand_3_to_4 = vector_u8(indices4(0 * 3), indices4(1 * 3), + indices4(2 * 3), indices4(3 * 3)); + #undef indices4 + + // input = [........|ccdddddd|bbbbcccc|aaaaaabb] as uint8_t + // 3 2 1 0 + // + // in' = [aaaaaabb|bbbbcccc|bbbbcccc|ccdddddd] as uint32_t + // 0 1 1 2 + const auto in = as_vector_u32(expand_3_to_4.lookup_16(input)); + + // t0 = [00000000|00000000|00000000|00dddddd] + const auto t0 = in & uint32_t(0x0000003f); + + // t1 = [00000000|00000000|00cccccc|00dddddd] + const auto t1 = select(uint32_t(0x00003f00), in.shl<2>(), t0); + + // t2 = [00000000|00bbbbbb|00cccccc|00dddddd] + const auto t2 = select(uint32_t(0x003f0000), in.shr<4>(), t1); + + // t3 = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] + const auto t3 = select(uint32_t(0x3f000000), in.shr<2>(), t2); + + return as_vector_u8(t3); +#else + #define indices4(dx) (dx + 1), (dx + 0), (dx + 2), (dx + 1) + const auto expand_3_to_4 = vector_u8(indices4(0 * 3), indices4(1 * 3), + indices4(2 * 3), indices4(3 * 3)); + #undef indices4 + + // input = [........|ccdddddd|bbbbcccc|aaaaaabb] as uint8_t + // 3 2 1 0 + // + // in' = [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] as uint32_t + // 1 2 0 1 + const auto in = as_vector_u32(expand_3_to_4.lookup_16(input)); + + // t0 = [00dddddd|00000000|00000000|00000000] + const auto t0 = in.shl<8>() & uint32_t(0x3f000000); + + // t1 = [00dddddd|00cccccc|00000000|00000000] + const auto t1 = select(uint32_t(0x003f0000), in.shr<6>(), t0); + + // t2 = [00dddddd|00cccccc|00bbbbbb|00000000] + const auto t2 = select(uint32_t(0x00003f00), in.shl<4>(), t1); + + // t3 = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] + const auto t3 = select(uint32_t(0x0000003f), in.shr<10>(), t2); + + return as_vector_u8(t3); +#endif // SIMDUTF_IS_BIG_ENDIAN +} + +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + + const uint8_t *input = (const uint8_t *)src; + + uint8_t *out = (uint8_t *)dst; + + size_t i = 0; + for (; i + 52 <= srclen; i += 48) { + const auto in0 = vector_u8::load(input + i + 12 * 0); + const auto in1 = vector_u8::load(input + i + 12 * 1); + const auto in2 = vector_u8::load(input + i + 12 * 2); + const auto in3 = vector_u8::load(input + i + 12 * 3); + + const auto expanded0 = encoding_expand_6bit_fields(in0); + const auto expanded1 = encoding_expand_6bit_fields(in1); + const auto expanded2 = encoding_expand_6bit_fields(in2); + const auto expanded3 = encoding_expand_6bit_fields(in3); + + const auto base64_0 = + encoding_translate_6bit_values(expanded0); + const auto base64_1 = + encoding_translate_6bit_values(expanded1); + const auto base64_2 = + encoding_translate_6bit_values(expanded2); + const auto base64_3 = + encoding_translate_6bit_values(expanded3); + + base64_0.store(out); + out += 16; + + base64_1.store(out); + out += 16; + + base64_2.store(out); + out += 16; + + base64_3.store(out); + out += 16; + } + for (; i + 16 <= srclen; i += 12) { + const auto in = vector_u8::load(input + i); + const auto expanded = encoding_expand_6bit_fields(in); + const auto base64 = encoding_translate_6bit_values(expanded); + + base64.store(out); + out += 16; + } + + return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, + srclen - i, options); +} + +// --- decoding ----------------------------------------------- + +static simdutf_really_inline void compress(const vector_u8 data, uint16_t mask, + char *output) { + if (mask == 0) { + data.store(output); + return; + } + + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + +#if SIMDUTF_IS_BIG_ENDIAN + vec_u64_t tmp = { + tables::base64::thintable_epi8[mask2], + tables::base64::thintable_epi8[mask1], + }; + + auto shufmask = vector_u8(vec_reve(vec_u8_t(tmp))); + + // we increment by 0x08 the second half of the mask + shufmask = + shufmask + vector_u8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); +#else + vec_u64_t tmp = { + tables::base64::thintable_epi8[mask1], + tables::base64::thintable_epi8[mask2], + }; + + auto shufmask = vector_u8(vec_u8_t(tmp)); + + // we increment by 0x08 the second half of the mask + shufmask = + shufmask + vector_u8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); +#endif // SIMDUTF_IS_BIG_ENDIAN + + // this is the version "nearly pruned" + const auto pruned = shufmask.lookup_16(data); + // we still need to put the two halves together. + // we compute the popcount of the first half: + const int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + const auto compactmask = + vector_u8::load(tables::base64::pshufb_combine_table + pop1 * 8); + + const auto answer = compactmask.lookup_16(pruned); + + answer.store(output); +} + +static simdutf_really_inline vector_u8 decoding_pack(vector_u8 input) { +#if SIMDUTF_IS_BIG_ENDIAN + // in = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] + // want = [00000000|aaaaaabb|bbbbcccc|ccdddddd] + + auto in = as_vector_u16(input); + // t0 = [00??aaaa|aabbbbbb|00??cccc|ccdddddd] + const auto t0 = in.shr<2>(); + const auto t1 = select(uint16_t(0x0fc0), t0, in); + + // t0 = [00??????|aaaaaabb|bbbbcccc|ccdddddd] + const auto t2 = as_vector_u32(t1); + const auto t3 = t2.shr<4>(); + const auto t4 = select(uint32_t(0x00fff000), t3, t2); + + const auto tmp = as_vector_u8(t4); + + const auto shuffle = + vector_u8(1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 0, 0, 0, 0); + + const auto t = shuffle.lookup_16(tmp); + + return t; +#else + // in = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] + // want = [00000000|aaaaaabb|bbbbcccc|ccdddddd] + + auto u = as_vector_u32(input).swap_bytes(); + + auto in = vector_u16((vec_u16_t)u.value); + // t0 = [00??aaaa|aabbbbbb|00??cccc|ccdddddd] + const auto t0 = in.shr<2>(); + const auto t1 = select(uint16_t(0x0fc0), t0, in); + + // t0 = [00??????|aaaaaabb|bbbbcccc|ccdddddd] + const auto t2 = as_vector_u32(t1); + const auto t3 = t2.shr<4>(); + const auto t4 = select(uint32_t(0x00fff000), t3, t2); + + const auto tmp = as_vector_u8(t4); + + const auto shuffle = + vector_u8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 0, 0, 0, 0); + + const auto t = shuffle.lookup_16(tmp); + + return t; +#endif // SIMDUTF_IS_BIG_ENDIAN +} +static simdutf_really_inline void base64_decode(char *out, vector_u8 input) { + const auto expanded = decoding_pack(input); + expanded.store(out); +} + +static simdutf_really_inline void base64_decode_block(char *out, + const char *src) { + base64_decode(out + 12 * 0, vector_u8::load(src + 0 * 16)); + base64_decode(out + 12 * 1, vector_u8::load(src + 1 * 16)); + base64_decode(out + 12 * 2, vector_u8::load(src + 2 * 16)); + base64_decode(out + 12 * 3, vector_u8::load(src + 3 * 16)); +} + +static simdutf_really_inline void base64_decode_block_safe(char *out, + const char *src) { + base64_decode(out + 12 * 0, vector_u8::load(src + 0 * 16)); + base64_decode(out + 12 * 1, vector_u8::load(src + 1 * 16)); + base64_decode(out + 12 * 2, vector_u8::load(src + 2 * 16)); + + char buffer[16]; + base64_decode(buffer, vector_u8::load(src + 3 * 16)); + std::memcpy(out + 36, buffer, 12); +} + +// ---base64 decoding::block64 class -------------------------- + +class block64 { + simd8x64 b; + +public: + simdutf_really_inline block64(const char *src) : b(load_block(src)) {} + simdutf_really_inline block64(const char16_t *src) : b(load_block(src)) {} + +private: + // The caller of this function is responsible to ensure that there are 64 + // bytes available from reading at src. The data is read into a block64 + // structure. + static simdutf_really_inline simd8x64 load_block(const char *src) { + const auto v0 = vector_u8::load(src + 16 * 0); + const auto v1 = vector_u8::load(src + 16 * 1); + const auto v2 = vector_u8::load(src + 16 * 2); + const auto v3 = vector_u8::load(src + 16 * 3); + + return simd8x64(v0, v1, v2, v3); + } + + // The caller of this function is responsible to ensure that there are 128 + // bytes available from reading at src. The data is read into a block64 + // structure. + static simdutf_really_inline simd8x64 + load_block(const char16_t *src) { + const auto m1 = vector_u16::load(src + 8 * 0); + const auto m2 = vector_u16::load(src + 8 * 1); + const auto m3 = vector_u16::load(src + 8 * 2); + const auto m4 = vector_u16::load(src + 8 * 3); + const auto m5 = vector_u16::load(src + 8 * 4); + const auto m6 = vector_u16::load(src + 8 * 5); + const auto m7 = vector_u16::load(src + 8 * 6); + const auto m8 = vector_u16::load(src + 8 * 7); + + return simd8x64(vector_u16::pack(m1, m2), vector_u16::pack(m3, m4), + vector_u16::pack(m5, m6), + vector_u16::pack(m7, m8)); + } + +public: + template + static inline uint16_t to_base64_mask(vector_u8 &src, uint16_t &error) { + const auto ascii_space_tbl = + vector_u8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, + 0xc, 0xd, 0x0, 0x0); + + // credit: aqrit + const auto delta_asso = + default_or_url + ? vector_u8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16) + : vector_u8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); + + const auto delta_values = + default_or_url + ? vector_u8(0xBF, 0xE0, 0xB9, 0x13, 0x04, 0xBF, 0xBF, 0xB9, 0xB9, + 0x00, 0xFF, 0x11, 0xFF, 0xBF, 0x10, 0xB9) + : (base64_url + ? vector_u8(0x0, 0x0, 0x0, 0x13, 0x4, 0xBF, 0xBF, 0xB9, 0xB9, + 0x0, 0x11, 0xC3, 0xBF, 0xE0, 0xB9, 0xB9) + : vector_u8(0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9, + 0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9)); + + const auto check_asso = + default_or_url + ? vector_u8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x03, 0x07, 0x0B, 0x0E, 0x0B, 0x06) + : (base64_url + ? vector_u8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x3, 0x7, 0xB, 0xE, 0xB, 0x6) + : vector_u8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F)); + + const auto check_values = + default_or_url + ? vector_u8(0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6, 0xB5, + 0xA1, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80) + : (base64_url + ? vector_u8(0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xB6, 0xA6, + 0xB5, 0xA1, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80) + : vector_u8(0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6, + 0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80)); + + const auto shifted = src.shr<3>(); + + const auto delta_hash = avg(src.lookup_16(delta_asso), shifted); + const auto check_hash = avg(src.lookup_16(check_asso), shifted); + + const auto out = as_vector_i8(delta_hash.lookup_16(delta_values)) + .saturating_add(as_vector_i8(src)); + const auto chk = as_vector_i8(check_hash.lookup_16(check_values)) + .saturating_add(as_vector_i8(src)); + + const uint16_t mask = chk.to_bitmask(); + if (!ignore_garbage && mask) { + const auto ascii = src.lookup_16(ascii_space_tbl); + const auto ascii_space = (ascii == src); + error = (mask ^ ascii_space.to_bitmask()); + } + src = out; + + return mask; + } + + template + simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { + uint16_t err0 = 0; + uint16_t err1 = 0; + uint16_t err2 = 0; + uint16_t err3 = 0; + uint64_t m0 = to_base64_mask( + b.chunks[0], err0); + uint64_t m1 = to_base64_mask( + b.chunks[1], err1); + uint64_t m2 = to_base64_mask( + b.chunks[2], err2); + uint64_t m3 = to_base64_mask( + b.chunks[3], err3); + + if (!ignore_garbage) { + *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | + ((uint64_t)err3 << 48); + } + return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); + } + + simdutf_really_inline void copy_block(char *output) { + b.store(reinterpret_cast(output)); + } + + simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { + uint64_t nmask = ~mask; + compress(b.chunks[0], uint16_t(mask), output); + compress(b.chunks[1], uint16_t(mask >> 16), + output + count_ones(nmask & 0xFFFF)); + compress(b.chunks[2], uint16_t(mask >> 32), + output + count_ones(nmask & 0xFFFFFFFF)); + compress(b.chunks[3], uint16_t(mask >> 48), + output + count_ones(nmask & 0xFFFFFFFFFFFFULL)); + return count_ones(nmask); + } + + simdutf_really_inline void base64_decode_block(char *out) { + base64_decode(out + 12 * 0, b.chunks[0]); + base64_decode(out + 12 * 1, b.chunks[1]); + base64_decode(out + 12 * 2, b.chunks[2]); + base64_decode(out + 12 * 3, b.chunks[3]); + } + + simdutf_really_inline void base64_decode_block_safe(char *out) { + base64_decode(out + 12 * 0, b.chunks[0]); + base64_decode(out + 12 * 1, b.chunks[1]); + base64_decode(out + 12 * 2, b.chunks[2]); + char buffer[16]; + base64_decode(buffer, b.chunks[3]); + std::memcpy(out + 12 * 3, buffer, 12); + } +}; +/* end file src/ppc64/ppc64_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf + +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace ppc64 { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); + +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} + +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; +} + +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_validation { + +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); +} + +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; + } + + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + } + } + + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_validation { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); +} + +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); +} + +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); +} + +} // namespace utf8_validation +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_utf16 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } + + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_utf16 { + +using namespace simd; + +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_utf32 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_utf32 { + +using namespace simd; + +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf16 { + +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; + } + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} + +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); + + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; + } + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} + +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; + + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } + + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf16.h */ +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +template +const result validate_utf16_as_ascii_with_errors(const char16_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + size_t pos = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input_vec( + reinterpret_cast(input + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input_vec.swap_bytes(); + } + uint64_t matches = input_vec.lteq(uint16_t(0x7f)); + if (~matches) { + // Found a match, return the first one + int index = trailing_zeroes(~matches) / 2; + return result(error_code::TOO_LARGE, pos + index); + } + } + + // Scalar tail + while (pos < size) { + + char16_t v = scalar::utf16::swap_if_needed(input[pos]); + if (v > 0x7F) { + return result(error_code::TOO_LARGE, pos); + } + pos++; + } + return result(error_code::SUCCESS, size); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf32.h */ +#include + +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, + size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + const auto v_0000007f = vector_u32::splat(0x0000007f); + const auto v_000007ff = vector_u32::splat(0x000007ff); + const auto v_0000ffff = vector_u32::splat(0x0000ffff); +#else + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + const auto one = vector_u32::splat(1); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use vector of uint32 counters, this is why this limit is used + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in0 > v_0000007f); + acc -= as_vector_u32(in1 > v_0000007f); + acc -= as_vector_u32(in2 > v_0000007f); + acc -= as_vector_u32(in3 > v_0000007f); + + acc -= as_vector_u32(in0 > v_000007ff); + acc -= as_vector_u32(in1 > v_000007ff); + acc -= as_vector_u32(in2 > v_000007ff); + acc -= as_vector_u32(in3 > v_000007ff); + + acc -= as_vector_u32(in0 > v_0000ffff); + acc -= as_vector_u32(in1 > v_0000ffff); + acc -= as_vector_u32(in2 > v_0000ffff); + acc -= as_vector_u32(in3 > v_0000ffff); +#else + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in > v_0000007f); + acc -= as_vector_u32(in > v_000007ff); + acc -= as_vector_u32(in > v_0000ffff); +#else + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf32.h */ +/* begin file src/generic/validate_utf32.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf32 { + +simdutf_really_inline bool validate(const char32_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return true; + } + + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + auto currentmax = vector_u32::zero(); + auto currentoffsetmax = vector_u32::zero(); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if simdutf_constexpr (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + currentmax = max(currentmax, in); + currentoffsetmax = max(currentoffsetmax, in + offset); + input += N; + } + + const auto too_large = currentmax > standardmax; + if (too_large.any()) { + return false; + } + + const auto surrogate = currentoffsetmax > standardoffsetmax; + if (surrogate.any()) { + return false; + } + + return scalar::utf32::validate(input, end - input); +} + +simdutf_really_inline result validate_with_errors(const char32_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return result(error_code::SUCCESS, 0); + } + + const char32_t *start = input; + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff + 1); + const auto surrogate_mask = vector_u32::splat(0xfffff800); + const auto surrogate_byte = vector_u32::splat(0x0000d800); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if simdutf_constexpr (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + const auto too_large = in >= standardmax; + const auto surrogate = (in & surrogate_mask) == surrogate_byte; + + const auto combined = too_large | surrogate; + if (simdutf_unlikely(combined.any())) { + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; + } + + input += N; + } + + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; +} + +} // namespace utf32 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/validate_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace ascii_validation { + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); + + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + return false; + } + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + return in.is_ascii(); +} + +} // namespace ascii_validation +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, + + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace ppc64 +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/generic/base64.h */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace base64 { + +/* + The following template function implements API for Base64 decoding. + + An implementation is responsible for providing the `block64` type and + associated methods that perform actual conversion. Please refer + to any vectorized implementation to learn the API of these procedures. +*/ +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = + default_or_url ? tables::base64::to_base64_default_or_url_value + : (base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + auto ri = simdutf::scalar::base64::find_end(src, srclen, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + srclen = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + char *end_of_safe_64byte_zone = + dst == nullptr + ? nullptr + : ((srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 + : dst); + + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; + + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b(src); + src += 64; + uint64_t error = 0; + const uint64_t badcharmask = + b.to_base64_mask(&error); + if (!ignore_garbage && error) { + src -= 64; + const size_t error_offset = trailing_zeroes(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + bufferptr += b.compress_block(badcharmask, bufferptr); + } else if (bufferptr != buffer) { + b.copy_block(bufferptr); + bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + b.base64_decode_block_safe(dst); + } else { + b.base64_decode_block(dst); + } + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 2); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!ignore_garbage && + (!scalar::base64::is_eight_byte(*src) || val > 64)) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result( + r, size_t(src - srcinit), size_t(dst - dstinit), equallocation, + full_input_length, last_chunk_options); + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(srcinit + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(srcinit + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; + } + if (!ignore_garbage && equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} + +} // namespace base64 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/base64.h */ +/* begin file src/generic/find.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace util { + +simdutf_really_inline const char *find(const char *start, const char *end, + char character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + // Align the start pointer to 64 bytes + uintptr_t misalignment = reinterpret_cast(start) % 64; + if (misalignment != 0) { + size_t adjustment = 64 - misalignment; + if (size_t(std::distance(start, end)) < adjustment) { + adjustment = std::distance(start, end); + } + for (size_t i = 0; i < adjustment; i++) { + if (start[i] == character) { + return start + i; + } + } + start += adjustment; + } + + // Main loop for 64-byte aligned data + for (; std::distance(start, end) >= 64; start += 64) { + simd8x64 input(reinterpret_cast(start)); + uint64_t matches = input.eq(uint8_t(character)); + if (matches != 0) { + // Found a match, return the first one + int index = trailing_zeroes(matches); + return start + index; + } + } + return std::find(start, end, character); +} + +simdutf_really_inline const char16_t * +find(const char16_t *start, const char16_t *end, char16_t character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + // Align the start pointer to 64 bytes if misalignment is even + uintptr_t misalignment = reinterpret_cast(start) % 64; + if (misalignment != 0 && misalignment % 2 == 0) { + size_t adjustment = (64 - misalignment) / sizeof(char16_t); + if (size_t(std::distance(start, end)) < adjustment) { + adjustment = std::distance(start, end); + } + for (size_t i = 0; i < adjustment; i++) { + if (start[i] == character) { + return start + i; + } + } + start += adjustment; + } + + // Main loop for 64-byte aligned data + for (; std::distance(start, end) >= 32; start += 32) { + simd16x32 input(reinterpret_cast(start)); + uint64_t matches = input.eq(uint16_t(character)); + if (matches != 0) { + // Found a match, return the first one + int index = trailing_zeroes(matches) / 2; + return start + index; + } + } + return std::find(start, end, character); +} + +} // namespace util +} // namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/find.h */ +#endif // SIMDUTF_FEATURE_BASE64 + +/* begin file src/ppc64/templates.cpp */ +/* + Template `convert_impl` implements generic conversion routine between + different encodings. Procedure returns the number of written elements, + or zero in the case of error. + + Parameters: + * VectorizedConvert - vectorized procedure that returns structure having + three fields: error_code (err), const Source* (input), Destination* + (output) + * ScalarConvert - scalar procedure that carries on conversion of tail + * Source - type of input char (like char16_t, char) + * Destination - type of input char +*/ +template +size_t convert_impl(VectorizedConvert vectorized_convert, + ScalarConvert scalar_convert, const Source *buf, size_t len, + Destination *output) { + const auto vr = vectorized_convert(buf, len, output); + const size_t consumed = vr.input - buf; + const size_t written = vr.output - output; + if (vr.err != simdutf::error_code::SUCCESS) { + if (vr.err == simdutf::error_code::OTHER) { + // Vectorized procedure detected an error, but does not know + // exact position. The scalar procedure rescan the portion of + // input and figure out where the error is located. + return scalar_convert(vr.input, len - consumed, vr.output); + } + return 0; + } + + if (consumed == len) { + return written; + } + + const auto ret = scalar_convert(vr.input, len - consumed, vr.output); + if (ret == 0) { + return 0; + } + + return written + ret; +} + +/* + Template `convert_with_errors_impl` implements generic conversion routine + between different encodings. Procedure returns a `result` instance --- + please refer to its documentation for details. + + Parameters: + * VectorizedConvert - vectorized procedure that returns structure having + three fields: error_code (err), const Source* (input), Destination* + (output) + * ScalarConvert - scalar procedure that carries on conversion of tail + * Source - type of input char (like char16_t, char) + * Destination - type of input char +*/ +template +simdutf::result convert_with_errors_impl(VectorizedConvert vectorized_convert, + ScalarConvert scalar_convert, + const Source *buf, size_t len, + Destination *output) { + + const auto vr = vectorized_convert(buf, len, output); + const size_t consumed = vr.input - buf; + const size_t written = vr.output - output; + if (vr.err != simdutf::error_code::SUCCESS) { + if (vr.err == simdutf::error_code::OTHER) { + // Vectorized procedure detected an error, but does not know + // exact position. The scalar procedure rescan the portion of + // input and figure out where the error is located. + auto sr = scalar_convert(vr.input, len - consumed, vr.output); + sr.count += consumed; + return sr; + } + return simdutf::result(vr.err, consumed); + } + + if (consumed == len) { + return simdutf::result(simdutf::error_code::SUCCESS, written); + } + + simdutf::result sr = scalar_convert(vr.input, len - consumed, vr.output); + if (sr.is_ok()) { + sr.count += written; + } else { + sr.count += consumed; + } + + return sr; +} +/* end file src/ppc64/templates.cpp */ + +#ifdef SIMDUTF_INTERNAL_TESTS + #if SIMDUTF_FEATURE_BASE64 + #include "ppc64_base64_internal_tests.cpp" + #endif // SIMDUTF_FEATURE_BASE64 +#endif // SIMDUTF_INTERNAL_TESTS +// +// Implementation-specific overrides +// +namespace simdutf { +namespace ppc64 { + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + int out = 0; + // todo: reimplement as a one-pass algorithm. + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return ppc64::utf8_validation::generic_validate_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return ppc64::utf8_validation::generic_validate_utf8_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return ppc64::ascii_validation::generic_validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return ppc64::ascii_validation::generic_validate_ascii_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return ppc64::utf16::validate_utf16_as_ascii_with_errors( + buf, len) + .error == SUCCESS; +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return ppc64::utf16::validate_utf16_as_ascii_with_errors(buf, + len) + .error == SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + const auto res = + ppc64::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { + return false; + } + + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + return validate_utf16be_with_errors(buf, len).is_ok(); +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return scalar::utf16::to_well_formed_utf16(input, len, + output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return scalar::utf16::to_well_formed_utf16(input, len, + output); +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + const auto res = + ppc64::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + auto scalar = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + scalar.count += res.count; + return scalar; + } + + return res; +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + const auto res = + ppc64::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + auto scalar = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + scalar.count += res.count; + return scalar; + } + + return res; +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + return utf32::validate(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + return utf32::validate_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + const auto ret = ppc64_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; + + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + size_t n = + ppc64_convert_latin1_to_utf16(buf, len, utf16_output); + if (n < len) { + n += scalar::latin1_to_utf16::convert(buf + n, len - n, + utf16_output + n); + } + + return n; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + size_t n = + ppc64_convert_latin1_to_utf16(buf, len, utf16_output); + if (n < len) { + n += scalar::latin1_to_utf16::convert(buf + n, len - n, + utf16_output + n); + } + + return n; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + const auto ret = ppc64_convert_latin1_to_utf32(buf, len, utf32_output); + if (ret.first != buf + len) { + const size_t processed = ret.first - buf; + scalar::latin1_to_utf32::convert(ret.first, len - processed, ret.second); + } + + return len; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return ppc64::utf8_to_latin1::convert_valid(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(buf, len, utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + + return convert_impl( + ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert, + buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + + return convert_impl( + ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert, + buf, len, latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + + return convert_with_errors_impl( + ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert_with_errors, + buf, len, latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + + return convert_with_errors_impl( + ppc64_convert_utf16_to_latin1, + scalar::utf16_to_latin1::convert_with_errors, + buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: we could provide an optimized function. + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: we could provide an optimized function. + return convert_utf16le_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + + return convert_impl(ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::convert, + buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + + return convert_impl( + ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::convert, + buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + + return convert_with_errors_impl( + ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::simple_convert_with_errors, + buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + + return convert_with_errors_impl( + ppc64_convert_utf16_to_utf8, + scalar::utf16_to_utf8::simple_convert_with_errors, buf, + len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return convert_impl(ppc64_convert_utf32_to_latin1, + scalar::utf32_to_latin1::convert, buf, len, + latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return convert_with_errors_impl( + ppc64_convert_utf32_to_latin1, + scalar::utf32_to_latin1::convert_with_errors, buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return convert_impl(ppc64_convert_utf32_to_latin1, + scalar::utf32_to_latin1::convert, buf, len, + latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_impl(ppc64_convert_utf32_to_utf8, + scalar::utf32_to_utf8::convert, + buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_with_errors_impl( + ppc64_convert_utf32_to_utf8, + scalar::utf32_to_utf8::convert_with_errors, buf, + len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_impl(ppc64_convert_utf32_to_utf8, + scalar::utf32_to_utf8::convert, + buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + + return convert_impl(ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert, buf, + len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + + return convert_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert, buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + + return convert_with_errors_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert_with_errors, buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + + return convert_with_errors_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert_with_errors, buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + + return convert_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert, buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + + return convert_impl( + ppc64_convert_utf32_to_utf16, + scalar::utf32_to_utf16::convert, buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_impl(ppc64_convert_utf16_to_utf32, + scalar::utf16_to_utf32::convert, buf, + len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_impl(ppc64_convert_utf16_to_utf32, + scalar::utf16_to_utf32::convert, buf, + len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_with_errors_impl( + ppc64_convert_utf16_to_utf32, + scalar::utf16_to_utf32::convert_with_errors, buf, len, + utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_with_errors_impl( + ppc64_convert_utf16_to_utf32, + scalar::utf16_to_utf32::convert_with_errors, buf, len, + utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + const auto ret = ppc64_utf8_length_from_latin1(input, length); + const size_t consumed = ret.first - input; + + if (consumed == length) { + return ret.second; + } + + const auto scalar = + scalar::latin1::utf8_length_from_latin1(ret.first, length - consumed); + return scalar + ret.second; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8(input, length); +} +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::LITTLE>(input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::BIG>(input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return utf32::utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return scalar::utf32::utf16_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + return scalar::base64::tail_encode_base64_impl(output, input, length, + options, line_length); +} + +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + return util::find(start, end, character); +} + +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + return util::find(start, end, character); +} +#endif // SIMDUTF_FEATURE_BASE64 + +#ifdef SIMDUTF_INTERNAL_TESTS +std::vector +implementation::internal_tests() const { + #define entry(proc) \ + TestProcedure { #proc, proc } + return {entry(base64_encoding_translate_6bit_values), + entry(base64_encoding_expand_6bit_fields), + entry(base64_decoding_valid), + entry(base64_decoding_invalid_ignore_errors), + entry(base64url_decoding_invalid_ignore_errors), + entry(base64_decoding_invalid_strict_errors), + entry(base64url_decoding_invalid_strict_errors), + entry(base64_decoding_pack), + entry(base64_compress)}; + #undef entry +} +#endif + +} // namespace ppc64 +} // namespace simdutf + +/* begin file src/simdutf/ppc64/end.h */ +/* end file src/simdutf/ppc64/end.h */ +/* end file src/ppc64/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_RVV +/* begin file src/rvv/implementation.cpp */ +/* begin file src/simdutf/rvv/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "rvv" +// #define SIMDUTF_IMPLEMENTATION rvv + +#if SIMDUTF_CAN_ALWAYS_RUN_RVV +// nothing needed. +#else +SIMDUTF_TARGET_RVV +#endif +/* end file src/simdutf/rvv/begin.h */ +namespace simdutf { +namespace rvv { +namespace { +#ifndef SIMDUTF_RVV_H + #error "rvv.h must be included" +#endif + +} // unnamed namespace +} // namespace rvv +} // namespace simdutf + +// +// Implementation-specific overrides +// +namespace simdutf { +namespace rvv { +/* begin file src/rvv/rvv_helpers.inl.cpp */ +template +simdutf_really_inline static size_t +rvv_utf32_store_utf16_m4(uint16_t *dst, vuint32m4_t utf32, size_t vl, + vbool4_t m4even) { + /* convert [000000000000aaaa|aaaaaabbbbbbbbbb] + * to [110111bbbbbbbbbb|110110aaaaaaaaaa] */ + vuint32m4_t sur = __riscv_vsub_vx_u32m4(utf32, 0x10000, vl); + sur = __riscv_vor_vv_u32m4(__riscv_vsll_vx_u32m4(sur, 16, vl), + __riscv_vsrl_vx_u32m4(sur, 10, vl), vl); + sur = __riscv_vand_vx_u32m4(sur, 0x3FF03FF, vl); + sur = __riscv_vor_vx_u32m4(sur, 0xDC00D800, vl); + /* merge 1 byte utf32 and 2 byte sur */ + vbool8_t m4 = __riscv_vmsgtu_vx_u32m4_b8(utf32, 0xFFFF, vl); + vuint16m4_t utf32_16 = __riscv_vreinterpret_v_u32m4_u16m4( + __riscv_vmerge_vvm_u32m4(utf32, sur, m4, vl)); + /* compress and store */ + vbool4_t mOut = __riscv_vmor_mm_b4( + __riscv_vmsne_vx_u16m4_b4(utf32_16, 0, vl * 2), m4even, vl * 2); + vuint16m4_t vout = __riscv_vcompress_vm_u16m4(utf32_16, mOut, vl * 2); + vl = __riscv_vcpop_m_b4(mOut, vl * 2); + __riscv_vse16_v_u16m4(dst, simdutf_byteflip(vout, vl), vl); + return vl; +}; +/* end file src/rvv/rvv_helpers.inl.cpp */ + +/* begin file src/rvv/rvv_length_from.inl.cpp */ +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t +implementation::count_utf16le(const char16_t *src, size_t len) const noexcept { + return utf32_length_from_utf16le(src, len); +} + +simdutf_warn_unused size_t +implementation::count_utf16be(const char16_t *src, size_t len) const noexcept { + return utf32_length_from_utf16be(src, len); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *src, size_t len) const noexcept { + return utf32_length_from_utf8(src, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *src, size_t len) const noexcept { + return utf32_length_from_utf8(src, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *src, size_t len) const noexcept { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); + count += __riscv_vcpop_m_b1(mask, vl); + } + return count; +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 +template +simdutf_really_inline static size_t +rvv_utf32_length_from_utf16(const char16_t *src, size_t len) { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + v = simdutf_byteflip(v, vl); + vbool2_t notHigh = + __riscv_vmor_mm_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 0xDFFF, vl), + __riscv_vmsltu_vx_u16m8_b2(v, 0xDC00, vl), vl); + count += __riscv_vcpop_m_b2(notHigh, vl); + } + return count; +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *src, size_t len) const noexcept { + return rvv_utf32_length_from_utf16(src, len); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *src, size_t len) const noexcept { + if (supports_zvbb()) + return rvv_utf32_length_from_utf16(src, len); + else + return rvv_utf32_length_from_utf16(src, len); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *src, size_t len) const noexcept { + size_t count = len; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + count += __riscv_vcpop_m_b1(__riscv_vmslt_vx_i8m8_b1(v, 0, vl), vl); + } + return count; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +template +simdutf_really_inline static size_t +rvv_utf8_length_from_utf16(const char16_t *src, size_t len) { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + v = simdutf_byteflip(v, vl); + vbool2_t m234 = __riscv_vmsgtu_vx_u16m8_b2(v, 0x7F, vl); + vbool2_t m34 = __riscv_vmsgtu_vx_u16m8_b2(v, 0x7FF, vl); + vbool2_t notSur = + __riscv_vmor_mm_b2(__riscv_vmsltu_vx_u16m8_b2(v, 0xD800, vl), + __riscv_vmsgtu_vx_u16m8_b2(v, 0xDFFF, vl), vl); + vbool2_t m3 = __riscv_vmand_mm_b2(m34, notSur, vl); + count += vl + __riscv_vcpop_m_b2(m234, vl) + __riscv_vcpop_m_b2(m3, vl); + } + return count; +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *src, size_t len) const noexcept { + return rvv_utf8_length_from_utf16(src, len); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *src, size_t len) const noexcept { + if (supports_zvbb()) + return rvv_utf8_length_from_utf16(src, len); + else + return rvv_utf8_length_from_utf16(src, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *src, size_t len) const noexcept { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + vbool4_t m234 = __riscv_vmsgtu_vx_u32m8_b4(v, 0x7F, vl); + vbool4_t m34 = __riscv_vmsgtu_vx_u32m8_b4(v, 0x7FF, vl); + vbool4_t m4 = __riscv_vmsgtu_vx_u32m8_b4(v, 0xFFFF, vl); + count += vl + __riscv_vcpop_m_b4(m234, vl) + __riscv_vcpop_m_b4(m34, vl) + + __riscv_vcpop_m_b4(m4, vl); + } + return count; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *src, size_t len) const noexcept { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + vbool1_t m1234 = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); + vbool1_t m4 = __riscv_vmsgtu_vx_u8m8_b1(__riscv_vreinterpret_u8m8(v), + (uint8_t)0b11101111, vl); + count += __riscv_vcpop_m_b1(m1234, vl) + __riscv_vcpop_m_b1(m4, vl); + } + return count; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *src, size_t len) const noexcept { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + vbool4_t m4 = __riscv_vmsgtu_vx_u32m8_b4(v, 0xFFFF, vl); + count += vl + __riscv_vcpop_m_b4(m4, vl); + } + return count; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* end file src/rvv/rvv_length_from.inl.cpp */ +/* begin file src/rvv/rvv_validate.inl.cpp */ +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *src, size_t len) const noexcept { + size_t vlmax = __riscv_vsetvlmax_e8m8(); + vint8m8_t mask = __riscv_vmv_v_x_i8m8(0, vlmax); + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + mask = __riscv_vor_vv_i8m8_tu(mask, mask, v, vl); + } + return __riscv_vfirst_m_b1(__riscv_vmslt_vx_i8m8_b1(mask, 0, vlmax), vlmax) < + 0; +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *src, size_t len) const noexcept { + const char *beg = src; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + long idx = __riscv_vfirst_m_b1(__riscv_vmslt_vx_i8m8_b1(v, 0, vl), vl); + if (idx >= 0) + return result(error_code::TOO_LARGE, src - beg + idx); + } + return result(error_code::SUCCESS, src - beg); +} +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +template +simdutf_really_inline bool rvv_validate_utf16_as_ascii(const char16_t *buf, + size_t len) noexcept { + const char16_t *src = buf; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + v = simdutf_byteflip(v, vl); + long idx = __riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 0x7f, vl), vl); + if (idx >= 0) + return false; + } + return true; +} +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return rvv_validate_utf16_as_ascii(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + if (supports_zvbb()) + return rvv_validate_utf16_as_ascii(buf, len); + else + return rvv_validate_utf16_as_ascii(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* Returns a close estimation of the number of valid UTF-8 bytes up to the + * first invalid one, but never overestimating. */ +simdutf_really_inline static size_t rvv_count_valid_utf8(const char *src, + size_t len) { + const char *beg = src; + if (len < 32) + return 0; + + /* validate first three bytes */ + { + size_t idx = 3; + while (idx < len && (uint8_t(src[idx]) >> 6) == 0b10) + ++idx; + if (idx > 3 + 3 || !scalar::utf8::validate(src, idx)) + return 0; + } + + static const uint64_t err1m[] = {0x0202020202020202, 0x4915012180808080}; + static const uint64_t err2m[] = {0xCBCBCB8B8383A3E7, 0xCBCBDBCBCBCBCBCB}; + static const uint64_t err3m[] = {0x0101010101010101, 0X01010101BABAAEE6}; + + const vuint8m1_t err1tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err1m, 2)); + const vuint8m1_t err2tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err2m, 2)); + const vuint8m1_t err3tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); + + size_t tail = 3; + size_t n = len - tail; + + for (size_t vl; n > 0; n -= vl, src += vl) { + vl = __riscv_vsetvl_e8m4(n); + vuint8m4_t v0 = __riscv_vle8_v_u8m4((uint8_t const *)src, vl); + + uint8_t next0 = src[vl + 0]; + uint8_t next1 = src[vl + 1]; + uint8_t next2 = src[vl + 2]; + + /* fast path: ASCII */ + if (__riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u8m4_b2(v0, 0b01111111, vl), vl) < + 0 && + (next0 | next1 | next2) < 0b10000000) + continue; + + /* see "Validating UTF-8 In Less Than One Instruction Per Byte" + * https://arxiv.org/abs/2010.03090 */ + vuint8m4_t v1 = __riscv_vslide1down_vx_u8m4(v0, next0, vl); + vuint8m4_t v2 = __riscv_vslide1down_vx_u8m4(v1, next1, vl); + + vuint8m4_t v2_hi_nibble = __riscv_vsrl_vx_u8m4(v2, 4, vl); + vuint8m4_t v3_hi_nibble = + __riscv_vslide1down_vx_u8m4(v2_hi_nibble, next2 >> 4, vl); + + vuint8m4_t idx2 = __riscv_vand_vx_u8m4(v2, 0xF, vl); + vuint8m4_t idx1 = v2_hi_nibble; + vuint8m4_t idx3 = v3_hi_nibble; + + vuint8m4_t err1 = simdutf_vrgather_u8m1x4(err1tbl, idx1); + vuint8m4_t err2 = simdutf_vrgather_u8m1x4(err2tbl, idx2); + vuint8m4_t err3 = simdutf_vrgather_u8m1x4(err3tbl, idx3); + vint8m4_t errs = __riscv_vreinterpret_v_u8m4_i8m4( + __riscv_vand_vv_u8m4(__riscv_vand_vv_u8m4(err1, err2, vl), err3, vl)); + + vbool2_t is_3 = __riscv_vmsgtu_vx_u8m4_b2(v1, 0b11100000 - 1, vl); + vbool2_t is_4 = __riscv_vmsgtu_vx_u8m4_b2(v0, 0b11110000 - 1, vl); + vbool2_t is_34 = __riscv_vmor_mm_b2(is_3, is_4, vl); + vbool2_t err34 = + __riscv_vmxor_mm_b2(is_34, __riscv_vmslt_vx_i8m4_b2(errs, 0, vl), vl); + vbool2_t errm = + __riscv_vmor_mm_b2(__riscv_vmsgt_vx_i8m4_b2(errs, 0, vl), err34, vl); + if (__riscv_vfirst_m_b2(errm, vl) >= 0) + break; + } + + /* we need to validate the last character */ + while (tail < len && (uint8_t(src[0]) >> 6) == 0b10) + --src, ++tail; + return src - beg; +} + +simdutf_warn_unused bool +implementation::validate_utf8(const char *src, size_t len) const noexcept { + size_t count = rvv_count_valid_utf8(src, len); + return scalar::utf8::validate(src + count, len - count); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *src, size_t len) const noexcept { + size_t count = rvv_count_valid_utf8(src, len); + result res = scalar::utf8::validate_with_errors(src + count, len - count); + return result(res.error, count + res.count); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +template +simdutf_really_inline static result +rvv_validate_utf16_with_errors(const char16_t *src, size_t len) { + const char16_t *beg = src; + + const uint16_t mask = simdutf_byteflip(0xfc00); + const uint16_t hi_surrogate = simdutf_byteflip(0xd800); + const uint16_t lo_surrogate = simdutf_byteflip(0xdc00); + + uint16_t last = 0; + for (size_t vl; len > 0; len -= vl, src += vl, last = src[-1]) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v1 = __riscv_vle16_v_u16m8((const uint16_t *)src, vl); + vuint16m8_t v0 = __riscv_vslide1up_vx_u16m8(v1, last, vl); + + vbool2_t surhi = __riscv_vmseq_vx_u16m8_b2( + __riscv_vand_vx_u16m8(v0, mask, vl), hi_surrogate, vl); + vbool2_t surlo = __riscv_vmseq_vx_u16m8_b2( + __riscv_vand_vx_u16m8(v1, mask, vl), lo_surrogate, vl); + + long idx = __riscv_vfirst_m_b2(__riscv_vmxor_mm_b2(surhi, surlo, vl), vl); + if (idx >= 0) { + last = simdutf_byteflip(idx > 0 ? src[idx - 1] : last); + return result(error_code::SURROGATE, + src - beg + idx - (last - 0xD800u < 0x400u)); + break; + } + } + if (simdutf_byteflip(last) - 0xD800u < 0x400u) { + return result(error_code::SURROGATE, + src - beg - 1); /* end on high surrogate */ + } else { + return result(error_code::SUCCESS, src - beg); + } +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *src, + size_t len) const noexcept { + return rvv_validate_utf16_with_errors(src, len) + .error == error_code::SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *src, + size_t len) const noexcept { + return validate_utf16be_with_errors(src, len).error == error_code::SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *src, size_t len) const noexcept { + return rvv_validate_utf16_with_errors(src, len); +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *src, size_t len) const noexcept { + if (supports_zvbb()) + return rvv_validate_utf16_with_errors(src, len); + else + return rvv_validate_utf16_with_errors(src, len); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *src, size_t len) const noexcept { + size_t vlmax = __riscv_vsetvlmax_e32m8(); + vuint32m8_t max = __riscv_vmv_v_x_u32m8(0x10FFFF, vlmax); + vuint32m8_t maxOff = __riscv_vmv_v_x_u32m8(0xFFFFF7FF, vlmax); + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + vuint32m8_t off = __riscv_vadd_vx_u32m8(v, 0xFFFF2000, vl); + max = __riscv_vmaxu_vv_u32m8_tu(max, max, v, vl); + maxOff = __riscv_vmaxu_vv_u32m8_tu(maxOff, maxOff, off, vl); + } + return __riscv_vfirst_m_b4( + __riscv_vmor_mm_b4( + __riscv_vmsne_vx_u32m8_b4(max, 0x10FFFF, vlmax), + __riscv_vmsne_vx_u32m8_b4(maxOff, 0xFFFFF7FF, vlmax), vlmax), + vlmax) < 0; +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *src, size_t len) const noexcept { + const char32_t *beg = src; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + vuint32m8_t off = __riscv_vadd_vx_u32m8(v, 0xFFFF2000, vl); + long idx1 = + __riscv_vfirst_m_b4(__riscv_vmsgtu_vx_u32m8_b4(v, 0x10FFFF, vl), vl); + long idx2 = __riscv_vfirst_m_b4( + __riscv_vmsgtu_vx_u32m8_b4(off, 0xFFFFF7FF, vl), vl); + if (idx1 >= 0 && idx2 >= 0) { + if (idx1 <= idx2) { + return result(error_code::TOO_LARGE, src - beg + idx1); + } else { + return result(error_code::SURROGATE, src - beg + idx2); + } + } + if (idx1 >= 0) { + return result(error_code::TOO_LARGE, src - beg + idx1); + } + if (idx2 >= 0) { + return result(error_code::SURROGATE, src - beg + idx2); + } + } + return result(error_code::SUCCESS, src - beg); +} +#endif // SIMDUTF_FEATURE_UTF32 +/* end file src/rvv/rvv_validate.inl.cpp */ + +/* begin file src/rvv/rvv_latin1_to.inl.cpp */ +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *src, size_t len, char *dst) const noexcept { + char *beg = dst; + for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { + vl = __riscv_vsetvl_e8m2(len); + vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); + vbool4_t nascii = + __riscv_vmslt_vx_i8m2_b4(__riscv_vreinterpret_v_u8m2_i8m2(v1), 0, vl); + size_t cnt = __riscv_vcpop_m_b4(nascii, vl); + vlOut = vl + cnt; + if (cnt == 0) { + __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); + continue; + } + + vuint8m2_t v0 = + __riscv_vor_vx_u8m2(__riscv_vsrl_vx_u8m2(v1, 6, vl), 0b11000000, vl); + v1 = __riscv_vand_vx_u8m2_mu(nascii, v1, v1, 0b10111111, vl); + + vuint8m4_t wide = + __riscv_vreinterpret_v_u16m4_u8m4(__riscv_vwmaccu_vx_u16m4( + __riscv_vwaddu_vv_u16m4(v0, v1, vl), 0xFF, v1, vl)); + vbool2_t mask = __riscv_vmsgtu_vx_u8m4_b2( + __riscv_vsub_vx_u8m4(wide, 0b11000000, vl * 2), 1, vl * 2); + vuint8m4_t comp = __riscv_vcompress_vm_u8m4(wide, mask, vl * 2); + + __riscv_vse8_v_u8m4((uint8_t *)dst, comp, vlOut); + } + return dst - beg; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *src, size_t len, char16_t *dst) const noexcept { + char16_t *beg = dst; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e8m4(len); + vuint8m4_t v = __riscv_vle8_v_u8m4((uint8_t *)src, vl); + __riscv_vse16_v_u16m8((uint16_t *)dst, __riscv_vzext_vf2_u16m8(v, vl), vl); + } + return dst - beg; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *src, size_t len, char16_t *dst) const noexcept { + char16_t *beg = dst; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e8m4(len); + vuint8m4_t v = __riscv_vle8_v_u8m4((uint8_t *)src, vl); + __riscv_vse16_v_u16m8( + (uint16_t *)dst, + __riscv_vsll_vx_u16m8(__riscv_vzext_vf2_u16m8(v, vl), 8, vl), vl); + } + return dst - beg; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *src, size_t len, char32_t *dst) const noexcept { + char32_t *beg = dst; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e8m2(len); + vuint8m2_t v = __riscv_vle8_v_u8m2((uint8_t *)src, vl); + __riscv_vse32_v_u32m8((uint32_t *)dst, __riscv_vzext_vf4_u32m8(v, vl), vl); + } + return dst - beg; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* end file src/rvv/rvv_latin1_to.inl.cpp */ +/* begin file src/rvv/rvv_utf16_to.inl.cpp */ +#if SIMDUTF_FEATURE_UTF16 +template +simdutf_really_inline static result +rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) { + const char16_t *const beg = src; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + v = simdutf_byteflip(v, vl); + long idx = __riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 255, vl), vl); + if (idx >= 0) + return result(error_code::TOO_LARGE, src - beg + idx); + __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m4(v, vl), vl); + } + return result(error_code::SUCCESS, src - beg); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf16le_to_latin1_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf16be_to_latin1_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *src, size_t len, char *dst) const noexcept { + return rvv_utf16_to_latin1_with_errors(src, len, dst); +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *src, size_t len, char *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf16_to_latin1_with_errors(src, len, + dst); + else + return rvv_utf16_to_latin1_with_errors(src, len, dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *src, size_t len, char *dst) const noexcept { + const char16_t *const beg = src; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m4(v, vl), vl); + } + return src - beg; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *src, size_t len, char *dst) const noexcept { + const char16_t *const beg = src; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vnsrl_wx_u8m4(v, 8, vl), vl); + } + return src - beg; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +template +simdutf_really_inline static result +rvv_utf16_to_utf8_with_errors(const char16_t *src, size_t len, char *dst) { + size_t n = len; + const char16_t *srcBeg = src; + const char *dstBeg = dst; + size_t vl8m4 = __riscv_vsetvlmax_e8m4(); + vbool2_t m4mulp2 = __riscv_vmseq_vx_u8m4_b2( + __riscv_vand_vx_u8m4(__riscv_vid_v_u8m4(vl8m4), 3, vl8m4), 2, vl8m4); + + for (size_t vl, vlOut; n > 0;) { + vl = __riscv_vsetvl_e16m2(n); + + vuint16m2_t v = __riscv_vle16_v_u16m2((uint16_t const *)src, vl); + v = simdutf_byteflip(v, vl); + vbool8_t m234 = __riscv_vmsgtu_vx_u16m2_b8(v, 0x80 - 1, vl); + + if (__riscv_vfirst_m_b8(m234, vl) < 0) { /* 1 byte utf8 */ + vlOut = vl; + __riscv_vse8_v_u8m1((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m1(v, vlOut), + vlOut); + n -= vl, src += vl, dst += vlOut; + continue; + } + + vbool8_t m34 = __riscv_vmsgtu_vx_u16m2_b8(v, 0x800 - 1, vl); + + if (__riscv_vfirst_m_b8(m34, vl) < 0) { /* 1/2 byte utf8 */ + /* 0: [ aaa|aabbbbbb] + * 1: [aabbbbbb| ] vsll 8 + * 2: [ | aaaaa] vsrl 6 + * 3: [00111111|00011111] + * 4: [ bbbbbb|000aaaaa] (1|2)&3 + * 5: [11000000|11000000] + * 6: [10bbbbbb|110aaaaa] 4|5 */ + vuint16m2_t twoByte = __riscv_vand_vx_u16m2( + __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(v, 8, vl), + __riscv_vsrl_vx_u16m2(v, 6, vl), vl), + 0b0011111100011111, vl); + vuint16m2_t vout16 = + __riscv_vor_vx_u16m2_mu(m234, v, twoByte, 0b1000000011000000, vl); + vuint8m2_t vout = __riscv_vreinterpret_v_u16m2_u8m2(vout16); + + /* Every high byte that is zero should be compressed + * low bytes should never be compressed, so we set them + * to all ones, and then create a non-zero bytes mask */ + vbool4_t mcomp = + __riscv_vmsne_vx_u8m2_b4(__riscv_vreinterpret_v_u16m2_u8m2( + __riscv_vor_vx_u16m2(vout16, 0xFF, vl)), + 0, vl * 2); + vlOut = __riscv_vcpop_m_b4(mcomp, vl * 2); + + vout = __riscv_vcompress_vm_u8m2(vout, mcomp, vl * 2); + __riscv_vse8_v_u8m2((uint8_t *)dst, vout, vlOut); + + n -= vl, src += vl, dst += vlOut; + continue; + } + + vbool8_t sur = __riscv_vmseq_vx_u16m2_b8( + __riscv_vand_vx_u16m2(v, 0xF800, vl), 0xD800, vl); + long first = __riscv_vfirst_m_b8(sur, vl); + size_t tail = vl - first; + vl = first < 0 ? vl : first; + + if (vl > 0) { /* 1/2/3 byte utf8 */ + /* in: [aaaabbbb|bbcccccc] + * v1: [0bcccccc| ] vsll 8 + * v1: [10cccccc| ] vsll 8 & 0b00111111 | 0b10000000 + * v2: [ |110bbbbb] vsrl 6 & 0b00111111 | 0b11000000 + * v2: [ |10bbbbbb] vsrl 6 & 0b00111111 | 0b10000000 + * v3: [ |1110aaaa] vsrl 12 | 0b11100000 + * 1: [00000000|0bcccccc|00000000|00000000] => [0bcccccc] + * 2: [00000000|10cccccc|110bbbbb|00000000] => [110bbbbb] [10cccccc] + * 3: [00000000|10cccccc|10bbbbbb|1110aaaa] => [1110aaaa] [10bbbbbb] + * [10cccccc] + */ + vuint16m2_t v1, v2, v3, v12; + v1 = __riscv_vor_vx_u16m2_mu( + m234, v, __riscv_vand_vx_u16m2(v, 0b00111111, vl), 0b10000000, vl); + v1 = __riscv_vsll_vx_u16m2(v1, 8, vl); + + v2 = __riscv_vor_vx_u16m2( + __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 6, vl), 0b00111111, + vl), + 0b10000000, vl); + v2 = __riscv_vor_vx_u16m2_mu(__riscv_vmnot_m_b8(m34, vl), v2, v2, + 0b01000000, vl); + v3 = __riscv_vor_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 12, vl), 0b11100000, + vl); + v12 = __riscv_vor_vv_u16m2_mu(m234, v1, v1, v2, vl); + + vuint32m4_t w12 = __riscv_vwmulu_vx_u32m4(v12, 1 << 8, vl); + vuint32m4_t w123 = __riscv_vwaddu_wv_u32m4_mu(m34, w12, w12, v3, vl); + vuint8m4_t vout = __riscv_vreinterpret_v_u32m4_u8m4(w123); + + vbool2_t mcomp = __riscv_vmor_mm_b2( + m4mulp2, __riscv_vmsne_vx_u8m4_b2(vout, 0, vl * 4), vl * 4); + vlOut = __riscv_vcpop_m_b2(mcomp, vl * 4); + + vout = __riscv_vcompress_vm_u8m4(vout, mcomp, vl * 4); + __riscv_vse8_v_u8m4((uint8_t *)dst, vout, vlOut); + + n -= vl, src += vl, dst += vlOut; + } + + if (tail) + while (n) { + uint16_t word = simdutf_byteflip(src[0]); + if ((word & 0xFF80) == 0) { + break; + } else if ((word & 0xF800) == 0) { + break; + } else if ((word & 0xF800) != 0xD800) { + break; + } else { + // must be a surrogate pair + if (n <= 1) + return result(error_code::SURROGATE, src - srcBeg); + uint16_t diff = word - 0xD800; + if (diff > 0x3FF) + return result(error_code::SURROGATE, src - srcBeg); + uint16_t diff2 = simdutf_byteflip(src[1]) - 0xDC00; + if (diff2 > 0x3FF) + return result(error_code::SURROGATE, src - srcBeg); + + uint32_t value = ((diff + 0x40) << 10) + diff2; + + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *dst++ = (char)((value >> 18) | 0b11110000); + *dst++ = (char)(((value >> 12) & 0b111111) | 0b10000000); + *dst++ = (char)(((value >> 6) & 0b111111) | 0b10000000); + *dst++ = (char)((value & 0b111111) | 0b10000000); + src += 2; + n -= 2; + } + } + } + + return result(error_code::SUCCESS, dst - dstBeg); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf16le_to_utf8_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf16be_to_utf8_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *src, size_t len, char *dst) const noexcept { + return rvv_utf16_to_utf8_with_errors(src, len, dst); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *src, size_t len, char *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf16_to_utf8_with_errors(src, len, dst); + else + return rvv_utf16_to_utf8_with_errors(src, len, dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *src, size_t len, char *dst) const noexcept { + return convert_utf16le_to_utf8(src, len, dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *src, size_t len, char *dst) const noexcept { + return convert_utf16be_to_utf8(src, len, dst); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +template +simdutf_really_inline static result +rvv_utf16_to_utf32_with_errors(const char16_t *src, size_t len, char32_t *dst) { + const char16_t *const srcBeg = src; + char32_t *const dstBeg = dst; + + constexpr const uint16_t ANY_SURROGATE_MASK = 0xf800; + constexpr const uint16_t ANY_SURROGATE_VALUE = 0xd800; + constexpr const uint16_t LO_SURROGATE_MASK = 0xfc00; + constexpr const uint16_t LO_SURROGATE_VALUE = 0xdc00; + constexpr const uint16_t HI_SURROGATE_MASK = 0xfc00; + constexpr const uint16_t HI_SURROGATE_VALUE = 0xd800; + + uint16_t last = 0; + while (len > 0) { + size_t vl = __riscv_vsetvl_e16m2(len); + vuint16m2_t v0 = __riscv_vle16_v_u16m2((uint16_t const *)src, vl); + v0 = simdutf_byteflip(v0, vl); + + { // check fast-path + const vuint16m2_t v = __riscv_vand_vx_u16m2(v0, ANY_SURROGATE_MASK, vl); + const vbool8_t any_surrogate = + __riscv_vmseq_vx_u16m2_b8(v, ANY_SURROGATE_VALUE, vl); + if (__riscv_vfirst_m_b8(any_surrogate, vl) < 0) { + /* no surrogates */ + __riscv_vse32_v_u32m4((uint32_t *)dst, __riscv_vzext_vf2_u32m4(v0, vl), + vl); + len -= vl; + src += vl; + dst += vl; + continue; + } + } + + if ((simdutf_byteflip(src[0]) & LO_SURROGATE_MASK) == + LO_SURROGATE_VALUE) { + return result(error_code::SURROGATE, src - srcBeg); + } + + // decode surrogates + vuint16m2_t v1 = __riscv_vslide1down_vx_u16m2(v0, 0, vl); + vl = __riscv_vsetvl_e16m2(vl - 1); + if (vl == 0) { + return result(error_code::SURROGATE, src - srcBeg); + } + + const vbool8_t surhi = __riscv_vmseq_vx_u16m2_b8( + __riscv_vand_vx_u16m2(v0, HI_SURROGATE_MASK, vl), HI_SURROGATE_VALUE, + vl); + const vbool8_t surlo = __riscv_vmseq_vx_u16m2_b8( + __riscv_vand_vx_u16m2(v1, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, + vl); + + // compress everything but lo surrogates + const vbool8_t compress = __riscv_vmsne_vx_u16m2_b8( + __riscv_vand_vx_u16m2(v0, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, + vl); + + { + const vbool8_t diff = __riscv_vmxor_mm_b8(surhi, surlo, vl); + const long idx = __riscv_vfirst_m_b8(diff, vl); + if (idx >= 0) { + uint16_t word = simdutf_byteflip(src[idx]); + if (word < 0xD800 || word > 0xDBFF) { + return result(error_code::SURROGATE, src - srcBeg + idx + 1); + } + return result(error_code::SURROGATE, src - srcBeg + idx); + } + } + + last = simdutf_byteflip(src[vl]); + vuint32m4_t utf32 = __riscv_vzext_vf2_u32m4(v0, vl); + + // v0 = 110110yyyyyyyyyy (0xd800 + yyyyyyyyyy) --- hi surrogate + // v1 = 110111xxxxxxxxxx (0xdc00 + xxxxxxxxxx) --- lo surrogate + + // t0 = u16( 0000_00yy_yyyy_yyyy) + const vuint32m4_t t0 = + __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v0, 0x03ff, vl), vl); + // t1 = u32(0000_0000_0000_yyyy_yyyy_yy00_0000_0000) + const vuint32m4_t t1 = __riscv_vsll_vx_u32m4(t0, 10, vl); + + // t2 = u32(0000_0000_0000_0000_0000_00xx_xxxx_xxxx) + const vuint32m4_t t2 = + __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v1, 0x03ff, vl), vl); + + // t3 = u32(0000_0000_0000_yyyy_yyyy_yyxx_xxxx_xxxx) + const vuint32m4_t t3 = __riscv_vor_vv_u32m4(t1, t2, vl); + + // t4 = utf32 from surrogate pairs + const vuint32m4_t t4 = __riscv_vadd_vx_u32m4(t3, 0x10000, vl); + + const vuint32m4_t result = __riscv_vmerge_vvm_u32m4(utf32, t4, surhi, vl); + + const vuint32m4_t comp = __riscv_vcompress_vm_u32m4(result, compress, vl); + const size_t vlOut = __riscv_vcpop_m_b8(compress, vl); + __riscv_vse32_v_u32m4((uint32_t *)dst, comp, vlOut); + + len -= vl; + src += vl; + dst += vlOut; + + if ((last & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) { + // last item is lo surrogate and got already consumed + len -= 1; + src += 1; + } + } + + return result(error_code::SUCCESS, dst - dstBeg); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + result res = convert_utf16le_to_utf32_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + result res = convert_utf16be_to_utf32_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + return rvv_utf16_to_utf32_with_errors(src, len, dst); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf16_to_utf32_with_errors(src, len, + dst); + else + return rvv_utf16_to_utf32_with_errors(src, len, dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + return convert_utf16le_to_utf32(src, len, dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + return convert_utf16be_to_utf32(src, len, dst); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* end file src/rvv/rvv_utf16_to.inl.cpp */ + +/* begin file src/rvv/rvv_utf32_to.inl.cpp */ +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf32_to_latin1_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *src, size_t len, char *dst) const noexcept { + const char32_t *const beg = src; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + long idx = __riscv_vfirst_m_b4(__riscv_vmsgtu_vx_u32m8_b4(v, 255, vl), vl); + if (idx >= 0) + return result(error_code::TOO_LARGE, src - beg + idx); + /* We don't use vcompress here, because its performance varies widely on + * current platforms. This might be worth reconsidering once there is more + * hardware available. */ + __riscv_vse8_v_u8m2( + (uint8_t *)dst, + __riscv_vncvt_x_x_w_u8m2(__riscv_vncvt_x_x_w_u16m4(v, vl), vl), vl); + } + return result(error_code::SUCCESS, src - beg); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *src, size_t len, char *dst) const noexcept { + return convert_utf32_to_latin1(src, len, dst); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +template +simdutf_warn_unused result convert_utf32_to_utf8_aux(const char32_t *src, + size_t len, + char *dst) noexcept { + size_t n = len; + const char32_t *srcBeg = src; + const char *dstBeg = dst; + size_t vl8m4 = __riscv_vsetvlmax_e8m4(); + vbool2_t m4mulp2 = __riscv_vmseq_vx_u8m4_b2( + __riscv_vand_vx_u8m4(__riscv_vid_v_u8m4(vl8m4), 3, vl8m4), 2, vl8m4); + + for (size_t vl, vlOut; n > 0;) { + vl = __riscv_vsetvl_e32m4(n); + + vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t const *)src, vl); + vbool8_t m234 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x80 - 1, vl); + vuint16m2_t vn = __riscv_vncvt_x_x_w_u16m2(v, vl); + + if (__riscv_vfirst_m_b8(m234, vl) < 0) { /* 1 byte utf8 */ + vlOut = vl; + __riscv_vse8_v_u8m1((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m1(vn, vlOut), + vlOut); + n -= vl, src += vl, dst += vlOut; + continue; + } + + vbool8_t m34 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x800 - 1, vl); + + if (__riscv_vfirst_m_b8(m34, vl) < 0) { /* 1/2 byte utf8 */ + /* 0: [ aaa|aabbbbbb] + * 1: [aabbbbbb| ] vsll 8 + * 2: [ | aaaaa] vsrl 6 + * 3: [00111111|00111111] + * 4: [ bbbbbb|000aaaaa] (1|2)&3 + * 5: [10000000|11000000] + * 6: [10bbbbbb|110aaaaa] 4|5 */ + vuint16m2_t twoByte = __riscv_vand_vx_u16m2( + __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(vn, 8, vl), + __riscv_vsrl_vx_u16m2(vn, 6, vl), vl), + 0b0011111100111111, vl); + vuint16m2_t vout16 = + __riscv_vor_vx_u16m2_mu(m234, vn, twoByte, 0b1000000011000000, vl); + vuint8m2_t vout = __riscv_vreinterpret_v_u16m2_u8m2(vout16); + + /* Every high byte that is zero should be compressed + * low bytes should never be compressed, so we set them + * to all ones, and then create a non-zero bytes mask */ + vbool4_t mcomp = + __riscv_vmsne_vx_u8m2_b4(__riscv_vreinterpret_v_u16m2_u8m2( + __riscv_vor_vx_u16m2(vout16, 0xFF, vl)), + 0, vl * 2); + vlOut = __riscv_vcpop_m_b4(mcomp, vl * 2); + + vout = __riscv_vcompress_vm_u8m2(vout, mcomp, vl * 2); + __riscv_vse8_v_u8m2((uint8_t *)dst, vout, vlOut); + + n -= vl, src += vl, dst += vlOut; + continue; + } + + if (with_validation) { + const long idx1 = + __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0x10FFFF, vl), vl); + vbool8_t sur = __riscv_vmseq_vx_u32m4_b8( + __riscv_vand_vx_u32m4(v, 0xFFFFF800, vl), 0xD800, vl); + const long idx2 = __riscv_vfirst_m_b8(sur, vl); + if (idx1 >= 0 || idx2 >= 0) { + if (static_cast(idx1) <= + static_cast(idx2)) { + return result(error_code::TOO_LARGE, src - srcBeg + idx1); + } else { + return result(error_code::SURROGATE, src - srcBeg + idx2); + } + } + } + + vbool8_t m4 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x10000 - 1, vl); + long first = __riscv_vfirst_m_b8(m4, vl); + size_t tail = vl - first; + vl = first < 0 ? vl : first; + + if (vl > 0) { /* 1/2/3 byte utf8 */ + /* vn: [aaaabbbb|bbcccccc] + * v1: [0bcccccc| ] vsll 8 + * v1: [10cccccc| ] vsll 8 & 0b00111111 | 0b10000000 + * v2: [ |110bbbbb] vsrl 6 & 0b00111111 | 0b11000000 + * v2: [ |10bbbbbb] vsrl 6 & 0b00111111 | 0b10000000 + * v3: [ |1110aaaa] vsrl 12 | 0b11100000 + * 1: [00000000|0bcccccc|00000000|00000000] => [0bcccccc] + * 2: [00000000|10cccccc|110bbbbb|00000000] => [110bbbbb] [10cccccc] + * 3: [00000000|10cccccc|10bbbbbb|1110aaaa] => [1110aaaa] [10bbbbbb] + * [10cccccc] + */ + vuint16m2_t v1, v2, v3, v12; + v1 = __riscv_vor_vx_u16m2_mu( + m234, vn, __riscv_vand_vx_u16m2(vn, 0b00111111, vl), 0b10000000, vl); + v1 = __riscv_vsll_vx_u16m2(v1, 8, vl); + + v2 = __riscv_vor_vx_u16m2( + __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(vn, 6, vl), 0b00111111, + vl), + 0b10000000, vl); + v2 = __riscv_vor_vx_u16m2_mu(__riscv_vmnot_m_b8(m34, vl), v2, v2, + 0b01000000, vl); + v3 = __riscv_vor_vx_u16m2(__riscv_vsrl_vx_u16m2(vn, 12, vl), 0b11100000, + vl); + v12 = __riscv_vor_vv_u16m2_mu(m234, v1, v1, v2, vl); + + vuint32m4_t w12 = __riscv_vwmulu_vx_u32m4(v12, 1 << 8, vl); + vuint32m4_t w123 = __riscv_vwaddu_wv_u32m4_mu(m34, w12, w12, v3, vl); + vuint8m4_t vout = __riscv_vreinterpret_v_u32m4_u8m4(w123); + + vbool2_t mcomp = __riscv_vmor_mm_b2( + m4mulp2, __riscv_vmsne_vx_u8m4_b2(vout, 0, vl * 4), vl * 4); + vlOut = __riscv_vcpop_m_b2(mcomp, vl * 4); + + vout = __riscv_vcompress_vm_u8m4(vout, mcomp, vl * 4); + __riscv_vse8_v_u8m4((uint8_t *)dst, vout, vlOut); + + n -= vl, src += vl, dst += vlOut; + } + + if (tail) + while (n) { + uint32_t word = src[0]; + if (word < 0x10000) + break; + if (word > 0x10FFFF) + return result(error_code::TOO_LARGE, src - srcBeg); + *dst++ = (uint8_t)((word >> 18) | 0b11110000); + *dst++ = (uint8_t)(((word >> 12) & 0b111111) | 0b10000000); + *dst++ = (uint8_t)(((word >> 6) & 0b111111) | 0b10000000); + *dst++ = (uint8_t)((word & 0b111111) | 0b10000000); + ++src; + --n; + } + } + + return result(error_code::SUCCESS, dst - dstBeg); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *src, size_t len, char *dst) const noexcept { + constexpr bool with_validation = true; + return convert_utf32_to_utf8_aux(src, len, dst); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf32_to_utf8_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *src, size_t len, char *dst) const noexcept { + constexpr bool with_validation = false; + const auto res = convert_utf32_to_utf8_aux(src, len, dst); + return res.count; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +template +simdutf_really_inline static result +rvv_convert_utf32_to_utf16_with_errors(const char32_t *src, size_t len, + char16_t *dst) { + size_t vl8m2 = __riscv_vsetvlmax_e8m2(); + vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( + __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); + const char16_t *dstBeg = dst; + const char32_t *srcBeg = src; + for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { + vl = __riscv_vsetvl_e32m4(len); + vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t *)src, vl); + vuint32m4_t off = __riscv_vadd_vx_u32m4(v, 0xFFFF2000, vl); + const long err_surrogate_idx = __riscv_vfirst_m_b8( + __riscv_vmsgtu_vx_u32m4_b8(off, 0xFFFFF7FF, vl), vl); + const long idx = + __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0xFFFF, vl), vl); + if (idx < 0) { + if (err_surrogate_idx >= 0) { + return result(error_code::SURROGATE, src - srcBeg + err_surrogate_idx); + } + + vlOut = vl; + vuint16m2_t n = + simdutf_byteflip(__riscv_vncvt_x_x_w_u16m2(v, vlOut), vlOut); + __riscv_vse16_v_u16m2((uint16_t *)dst, n, vlOut); + continue; + } + + const long err_too_big_idx = + __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0x10FFFF, vl), vl); + if (err_too_big_idx >= 0 || err_surrogate_idx >= 0) { + if (static_cast(err_too_big_idx) <= + static_cast(err_surrogate_idx)) { + return result(error_code::TOO_LARGE, src - srcBeg + err_too_big_idx); + } else { + return result(error_code::SURROGATE, src - srcBeg + err_surrogate_idx); + } + } + + vlOut = rvv_utf32_store_utf16_m4((uint16_t *)dst, v, vl, m4even); + } + return result(error_code::SUCCESS, dst - dstBeg); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + result res = convert_utf32_to_utf16le_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + result res = convert_utf32_to_utf16be_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + return rvv_convert_utf32_to_utf16_with_errors( + src, len, dst); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_convert_utf32_to_utf16_with_errors( + src, len, dst); + else + return rvv_convert_utf32_to_utf16_with_errors(src, len, + dst); +} + +template +simdutf_really_inline static size_t +rvv_convert_valid_utf32_to_utf16(const char32_t *src, size_t len, + char16_t *dst) { + size_t vl8m2 = __riscv_vsetvlmax_e8m2(); + vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( + __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); + char16_t *dstBeg = dst; + for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { + vl = __riscv_vsetvl_e32m4(len); + vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t *)src, vl); + if (__riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0xFFFF, vl), vl) < + 0) { + vlOut = vl; + vuint16m2_t n = + simdutf_byteflip(__riscv_vncvt_x_x_w_u16m2(v, vlOut), vlOut); + __riscv_vse16_v_u16m2((uint16_t *)dst, n, vlOut); + continue; + } + vlOut = rvv_utf32_store_utf16_m4((uint16_t *)dst, v, vl, m4even); + } + return dst - dstBeg; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + return rvv_convert_valid_utf32_to_utf16(src, len, + dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_convert_valid_utf32_to_utf16(src, len, + dst); + else + return rvv_convert_valid_utf32_to_utf16(src, len, dst); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* end file src/rvv/rvv_utf32_to.inl.cpp */ +/* begin file src/rvv/rvv_utf8_to.inl.cpp */ +#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) +template +simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, + size_t len, Tdst *dst) { + static_assert(std::is_same() || + std::is_same(), + "invalid type"); + constexpr bool is16 = std::is_same(); + constexpr endianness endian = + bflip == simdutf_ByteFlip::NONE ? endianness::LITTLE : endianness::BIG; + const auto scalar = [](char const *in, size_t count, Tdst *out) { + return is16 ? scalar::utf8_to_utf16::convert(in, count, + (char16_t *)out) + : scalar::utf8_to_utf32::convert(in, count, (char32_t *)out); + }; + + if (len < 32) + return scalar(src, len, dst); + + /* validate first three bytes */ + if (validate) { + size_t idx = 3; + while (idx < len && (uint8_t(src[idx]) >> 6) == 0b10) + ++idx; + if (idx > 3 + 3 || !scalar::utf8::validate(src, idx)) + return 0; + } + + size_t tail = 3; + size_t n = len - tail; + Tdst *beg = dst; + + static const uint64_t err1m[] = {0x0202020202020202, 0x4915012180808080}; + static const uint64_t err2m[] = {0xCBCBCB8B8383A3E7, 0xCBCBDBCBCBCBCBCB}; + static const uint64_t err3m[] = {0x0101010101010101, 0X01010101BABAAEE6}; + + const vuint8m1_t err1tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err1m, 2)); + const vuint8m1_t err2tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err2m, 2)); + const vuint8m1_t err3tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); + + size_t vl8m1 = __riscv_vsetvlmax_e8m1(); + size_t vl8m2 = __riscv_vsetvlmax_e8m2(); + vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( + __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); + + for (size_t vl, vlOut; n > 0; n -= vl, src += vl, dst += vlOut) { + vl = __riscv_vsetvl_e8m2(n); + + vuint8m2_t v0 = __riscv_vle8_v_u8m2((uint8_t const *)src, vl); + uint64_t max = __riscv_vmv_x_s_u8m1_u8( + __riscv_vredmaxu_vs_u8m2_u8m1(v0, __riscv_vmv_s_x_u8m1(0, vl), vl)); + + uint8_t next0 = src[vl + 0]; + uint8_t next1 = src[vl + 1]; + uint8_t next2 = src[vl + 2]; + + /* fast path: ASCII */ + if ((max | next0 | next1 | next2) < 0b10000000) { + vlOut = vl; + if (is16) + __riscv_vse16_v_u16m4( + (uint16_t *)dst, + simdutf_byteflip(__riscv_vzext_vf2_u16m4(v0, vlOut), vlOut), + vlOut); + else + __riscv_vse32_v_u32m8((uint32_t *)dst, + __riscv_vzext_vf4_u32m8(v0, vlOut), vlOut); + continue; + } + + /* see "Validating UTF-8 In Less Than One Instruction Per Byte" + * https://arxiv.org/abs/2010.03090 */ + vuint8m2_t v1 = __riscv_vslide1down_vx_u8m2(v0, next0, vl); + vuint8m2_t v2 = __riscv_vslide1down_vx_u8m2(v1, next1, vl); + vuint8m2_t v3 = __riscv_vslide1down_vx_u8m2(v2, next2, vl); + + if (validate) { + vuint8m2_t idx2 = __riscv_vand_vx_u8m2(v2, 0xF, vl); + vuint8m2_t idx1 = __riscv_vsrl_vx_u8m2(v2, 4, vl); + vuint8m2_t idx3 = __riscv_vsrl_vx_u8m2(v3, 4, vl); + + vuint8m2_t err1 = simdutf_vrgather_u8m1x2(err1tbl, idx1); + vuint8m2_t err2 = simdutf_vrgather_u8m1x2(err2tbl, idx2); + vuint8m2_t err3 = simdutf_vrgather_u8m1x2(err3tbl, idx3); + vint8m2_t errs = __riscv_vreinterpret_v_u8m2_i8m2( + __riscv_vand_vv_u8m2(__riscv_vand_vv_u8m2(err1, err2, vl), err3, vl)); + + vbool4_t is_3 = __riscv_vmsgtu_vx_u8m2_b4(v1, 0b11100000 - 1, vl); + vbool4_t is_4 = __riscv_vmsgtu_vx_u8m2_b4(v0, 0b11110000 - 1, vl); + vbool4_t is_34 = __riscv_vmor_mm_b4(is_3, is_4, vl); + vbool4_t err34 = + __riscv_vmxor_mm_b4(is_34, __riscv_vmslt_vx_i8m2_b4(errs, 0, vl), vl); + vbool4_t errm = + __riscv_vmor_mm_b4(__riscv_vmsgt_vx_i8m2_b4(errs, 0, vl), err34, vl); + if (__riscv_vfirst_m_b4(errm, vl) >= 0) + return 0; + } + + /* decoding */ + + /* mask of non continuation bytes */ + vbool4_t m = + __riscv_vmsgt_vx_i8m2_b4(__riscv_vreinterpret_v_u8m2_i8m2(v0), -65, vl); + vlOut = __riscv_vcpop_m_b4(m, vl); + + /* extract first and second bytes */ + vuint8m2_t b1 = __riscv_vcompress_vm_u8m2(v0, m, vl); + vuint8m2_t b2 = __riscv_vcompress_vm_u8m2(v1, m, vl); + + /* fast path: one and two byte */ + if (max < 0b11100000) { + b2 = __riscv_vand_vx_u8m2(b2, 0b00111111, vlOut); + + vbool4_t m1 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b10111111, vlOut); + b1 = __riscv_vand_vx_u8m2_mu(m1, b1, b1, 63, vlOut); + + vuint16m4_t b12 = __riscv_vwmulu_vv_u16m4( + b1, + __riscv_vmerge_vxm_u8m2(__riscv_vmv_v_x_u8m2(1, vlOut), 1 << 6, m1, + vlOut), + vlOut); + b12 = __riscv_vwaddu_wv_u16m4_mu(m1, b12, b12, b2, vlOut); + if (is16) + __riscv_vse16_v_u16m4((uint16_t *)dst, + simdutf_byteflip(b12, vlOut), vlOut); + else + __riscv_vse32_v_u32m8((uint32_t *)dst, + __riscv_vzext_vf2_u32m8(b12, vlOut), vlOut); + continue; + } + + /* fast path: one, two and three byte */ + if (max < 0b11110000) { + vuint8m2_t b3 = __riscv_vcompress_vm_u8m2(v2, m, vl); + + b2 = __riscv_vand_vx_u8m2(b2, 0b00111111, vlOut); + b3 = __riscv_vand_vx_u8m2(b3, 0b00111111, vlOut); + + vbool4_t m1 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b10111111, vlOut); + vbool4_t m3 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b11011111, vlOut); + + vuint8m2_t t1 = __riscv_vand_vx_u8m2_mu(m1, b1, b1, 63, vlOut); + b1 = __riscv_vand_vx_u8m2_mu(m3, t1, b1, 15, vlOut); + + vuint16m4_t b12 = __riscv_vwmulu_vv_u16m4( + b1, + __riscv_vmerge_vxm_u8m2(__riscv_vmv_v_x_u8m2(1, vlOut), 1 << 6, m1, + vlOut), + vlOut); + b12 = __riscv_vwaddu_wv_u16m4_mu(m1, b12, b12, b2, vlOut); + vuint16m4_t b123 = __riscv_vwaddu_wv_u16m4_mu( + m3, b12, __riscv_vsll_vx_u16m4_mu(m3, b12, b12, 6, vlOut), b3, vlOut); + if (is16) + __riscv_vse16_v_u16m4((uint16_t *)dst, + simdutf_byteflip(b123, vlOut), vlOut); + else + __riscv_vse32_v_u32m8((uint32_t *)dst, + __riscv_vzext_vf2_u32m8(b123, vlOut), vlOut); + continue; + } + + /* extract third and fourth bytes */ + vuint8m2_t b3 = __riscv_vcompress_vm_u8m2(v2, m, vl); + vuint8m2_t b4 = __riscv_vcompress_vm_u8m2(v3, m, vl); + + /* remove prefix from leading bytes + * + * We could also use vrgather here, but it increases register pressure, + * and its performance varies widely on current platforms. It might be + * worth reconsidering, though, once there is more hardware available. + * Same goes for the __riscv_vsrl_vv_u32m4 correction step. + * + * We shift left and then right by the number of bytes in the prefix, + * which can be calculated as follows: + * x max(x-10, 0) + * 0xxx -> 0000-0111 -> sift by 0 or 1 -> 0 + * 10xx -> 1000-1011 -> don't care + * 110x -> 1100,1101 -> sift by 3 -> 2,3 + * 1110 -> 1110 -> sift by 4 -> 4 + * 1111 -> 1111 -> sift by 5 -> 5 + * + * vssubu.vx v, 10, (max(x-10, 0)) almost gives us what we want, we + * just need to manually detect and handle the one special case: + */ + #define SIMDUTF_RVV_UTF8_TO_COMMON_M1(idx) \ + vuint8m1_t c1 = __riscv_vget_v_u8m2_u8m1(b1, idx); \ + vuint8m1_t c2 = __riscv_vget_v_u8m2_u8m1(b2, idx); \ + vuint8m1_t c3 = __riscv_vget_v_u8m2_u8m1(b3, idx); \ + vuint8m1_t c4 = __riscv_vget_v_u8m2_u8m1(b4, idx); \ + /* remove prefix from trailing bytes */ \ + c2 = __riscv_vand_vx_u8m1(c2, 0b00111111, vlOut); \ + c3 = __riscv_vand_vx_u8m1(c3, 0b00111111, vlOut); \ + c4 = __riscv_vand_vx_u8m1(c4, 0b00111111, vlOut); \ + vuint8m1_t shift = __riscv_vsrl_vx_u8m1(c1, 4, vlOut); \ + shift = __riscv_vmerge_vxm_u8m1( \ + __riscv_vssubu_vx_u8m1(shift, 10, vlOut), 3, \ + __riscv_vmseq_vx_u8m1_b8(shift, 12, vlOut), vlOut); \ + c1 = __riscv_vsll_vv_u8m1(c1, shift, vlOut); \ + c1 = __riscv_vsrl_vv_u8m1(c1, shift, vlOut); \ + /* unconditionally widen and combine to c1234 */ \ + vuint16m2_t c34 = __riscv_vwaddu_wv_u16m2( \ + __riscv_vwmulu_vx_u16m2(c3, 1 << 6, vlOut), c4, vlOut); \ + vuint16m2_t c12 = __riscv_vwaddu_wv_u16m2( \ + __riscv_vwmulu_vx_u16m2(c1, 1 << 6, vlOut), c2, vlOut); \ + vuint32m4_t c1234 = __riscv_vwaddu_wv_u32m4( \ + __riscv_vwmulu_vx_u32m4(c12, 1 << 12, vlOut), c34, vlOut); \ + /* derive required right-shift amount from `shift` to reduce \ + * c1234 to the required number of bytes */ \ + c1234 = __riscv_vsrl_vv_u32m4( \ + c1234, \ + __riscv_vzext_vf4_u32m4( \ + __riscv_vmul_vx_u8m1( \ + __riscv_vrsub_vx_u8m1(__riscv_vssubu_vx_u8m1(shift, 2, vlOut), \ + 3, vlOut), \ + 6, vlOut), \ + vlOut), \ + vlOut); \ + /* store result in desired format */ \ + if (is16) \ + vlDst = rvv_utf32_store_utf16_m4((uint16_t *)dst, c1234, vlOut, \ + m4even); \ + else \ + vlDst = vlOut, __riscv_vse32_v_u32m4((uint32_t *)dst, c1234, vlOut); + + /* Unrolling this manually reduces register pressure and allows + * us to terminate early. */ + { + size_t vlOutm2 = vlOut, vlDst; + vlOut = __riscv_vsetvl_e8m1(vlOut < vl8m1 ? vlOut : vl8m1); + SIMDUTF_RVV_UTF8_TO_COMMON_M1(0) + if (vlOutm2 == vlOut) { + vlOut = vlDst; + continue; + } + + dst += vlDst; + vlOut = vlOutm2 - vlOut; + } + { + size_t vlDst; + SIMDUTF_RVV_UTF8_TO_COMMON_M1(1) + vlOut = vlDst; + } + + #undef SIMDUTF_RVV_UTF8_TO_COMMON_M1 + } + + /* validate the last character and reparse it + tail */ + if (len > tail) { + if ((uint8_t(src[0]) >> 6) == 0b10) + --dst; + while ((uint8_t(src[0]) >> 6) == 0b10 && tail < len) + --src, ++tail; + if (is16) { + /* go back one more, when on high surrogate */ + if (simdutf_byteflip((uint16_t)dst[-1]) >= 0xD800 && + simdutf_byteflip((uint16_t)dst[-1]) <= 0xDBFF) + --dst; + } + } + size_t ret = scalar(src, tail, dst); + if (ret == 0) + return 0; + return (size_t)(dst - beg) + ret; +} +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32) + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *src, size_t len, char *dst) const noexcept { + const char *beg = dst; + uint8_t last = 0; + for (size_t vl, vlOut; len > 0; + len -= vl, src += vl, dst += vlOut, last = src[-1]) { + vl = __riscv_vsetvl_e8m2(len); + vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); + // check which bytes are ASCII + vbool4_t ascii = __riscv_vmsltu_vx_u8m2_b4(v1, 0b10000000, vl); + // count ASCII bytes + vlOut = __riscv_vcpop_m_b4(ascii, vl); + // The original code would only enter the next block after this check: + // vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); + // vlOut = __riscv_vcpop_m_b4(m, vl); + // if (vlOut != vl || last > 0b01111111) {...}q + // So that everything is ASCII or continuation bytes, we just proceeded + // without any processing, going straight to __riscv_vse8_v_u8m2. + // But you need the __riscv_vslide1up_vx_u8m2 whenever there is a non-ASCII + // byte. + if (vlOut != vl) { // If not pure ASCII + // Non-ASCII characters + // We now want to mark the ascii and continuation bytes + vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); + // We count them, that's our new vlOut (output vector length) + vlOut = __riscv_vcpop_m_b4(m, vl); + + vuint8m2_t v0 = __riscv_vslide1up_vx_u8m2(v1, last, vl); + + vbool4_t leading0 = __riscv_vmsgtu_vx_u8m2_b4(v0, 0b10111111, vl); + vbool4_t trailing1 = __riscv_vmslt_vx_i8m2_b4( + __riscv_vreinterpret_v_u8m2_i8m2(v1), (uint8_t)0b11000000, vl); + // -62 i 0b11000010, so we check whether any of v0 is too big + vbool4_t tobig = __riscv_vmand_mm_b4( + leading0, + __riscv_vmsgtu_vx_u8m2_b4(__riscv_vxor_vx_u8m2(v0, (uint8_t)-62, vl), + 1, vl), + vl); + if (__riscv_vfirst_m_b4( + __riscv_vmor_mm_b4( + tobig, __riscv_vmxor_mm_b4(leading0, trailing1, vl), vl), + vl) >= 0) + return 0; + + v1 = __riscv_vor_vx_u8m2_mu(__riscv_vmseq_vx_u8m2_b4(v0, 0b11000011, vl), + v1, v1, 0b01000000, vl); + v1 = __riscv_vcompress_vm_u8m2(v1, m, vl); + } else if (last >= 0b11000000) { // If last byte is a leading byte and we + // got only ASCII, error! + return 0; + } + __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); + } + if (last > 0b10111111) + return 0; + return dst - beg; +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *src, size_t len, char *dst) const noexcept { + size_t res = convert_utf8_to_latin1(src, len, dst); + if (res) + return result(error_code::SUCCESS, res); + return scalar::utf8_to_latin1::convert_with_errors(src, len, dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *src, size_t len, char *dst) const noexcept { + const char *beg = dst; + uint8_t last = 0; + for (size_t vl, vlOut; len > 0; + len -= vl, src += vl, dst += vlOut, last = src[-1]) { + vl = __riscv_vsetvl_e8m2(len); + vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); + vbool4_t ascii = __riscv_vmsltu_vx_u8m2_b4(v1, 0b10000000, vl); + vlOut = __riscv_vcpop_m_b4(ascii, vl); + if (vlOut != vl) { // If not pure ASCII + vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); + vlOut = __riscv_vcpop_m_b4(m, vl); + vuint8m2_t v0 = __riscv_vslide1up_vx_u8m2(v1, last, vl); + v1 = __riscv_vor_vx_u8m2_mu(__riscv_vmseq_vx_u8m2_b4(v0, 0b11000011, vl), + v1, v1, 0b01000000, vl); + v1 = __riscv_vcompress_vm_u8m2(v1, m, vl); + } + __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); + } + return dst - beg; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *src, size_t len, char16_t *dst) const noexcept { + return rvv_utf8_to_common(src, len, + (uint16_t *)dst); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *src, size_t len, char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf8_to_common( + src, len, (uint16_t *)dst); + else + return rvv_utf8_to_common(src, len, + (uint16_t *)dst); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *src, size_t len, char16_t *dst) const noexcept { + size_t res = convert_utf8_to_utf16le(src, len, dst); + if (res) + return result(error_code::SUCCESS, res); + return scalar::utf8_to_utf16::convert_with_errors( + src, len, dst); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *src, size_t len, char16_t *dst) const noexcept { + size_t res = convert_utf8_to_utf16be(src, len, dst); + if (res) + return result(error_code::SUCCESS, res); + return scalar::utf8_to_utf16::convert_with_errors(src, len, + dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *src, size_t len, char16_t *dst) const noexcept { + return rvv_utf8_to_common( + src, len, (uint16_t *)dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *src, size_t len, char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf8_to_common( + src, len, (uint16_t *)dst); + else + return rvv_utf8_to_common( + src, len, (uint16_t *)dst); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *src, size_t len, char32_t *dst) const noexcept { + return rvv_utf8_to_common(src, len, + (uint32_t *)dst); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *src, size_t len, char32_t *dst) const noexcept { + size_t res = convert_utf8_to_utf32(src, len, dst); + if (res) + return result(error_code::SUCCESS, res); + return scalar::utf8_to_utf32::convert_with_errors(src, len, dst); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *src, size_t len, char32_t *dst) const noexcept { + return rvv_utf8_to_common( + src, len, (uint32_t *)dst); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* end file src/rvv/rvv_utf8_to.inl.cpp */ + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/rvv/rvv_find.cpp */ +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + const char *src = start; + for (size_t len = end - start, vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vuint8m8_t v = __riscv_vle8_v_u8m8((uint8_t *)src, vl); + long idx = + __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, character, vl), vl); + if (idx >= 0) + return src + idx; + } + return end; +} + +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + const char16_t *src = start; + for (size_t len = end - start, vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + long idx = + __riscv_vfirst_m_b2(__riscv_vmseq_vx_u16m8_b2(v, character, vl), vl); + if (idx >= 0) + return src + idx; + } + return end; +} +/* end file src/rvv/rvv_find.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/rvv/rvv_utf16fix.cpp */ +template +simdutf_really_inline void utf16fix_block_rvv(char16_t *out, const char16_t *in, + size_t vl) { + const char16_t replacement = scalar::utf16::replacement(); + vuint16m8_t block = __riscv_vle16_v_u16m8((const uint16_t *)in, vl); + vuint16m8_t lookback = __riscv_vslide1up_vx_u16m8(block, in[-1], vl); + vuint16m8_t lb_masked = __riscv_vand_vx_u16m8( + lookback, scalar::utf16::swap_if_needed(0xfc00U), vl); + vuint16m8_t block_masked = __riscv_vand_vx_u16m8( + block, scalar::utf16::swap_if_needed(0xfc00U), vl); + vbool2_t lb_is_high = __riscv_vmseq_vx_u16m8_b2( + lb_masked, scalar::utf16::swap_if_needed(0xd800U), vl); + vbool2_t block_is_low = __riscv_vmseq_vx_u16m8_b2( + block_masked, scalar::utf16::swap_if_needed(0xdc00U), vl); + + vbool2_t illseq = __riscv_vmxor_mm_b2(lb_is_high, block_is_low, vl); + if (__riscv_vfirst_m_b2(illseq, vl) >= 0) { + vbool2_t lb_illseq = __riscv_vmandn_mm_b2(lb_is_high, block_is_low, vl); + + vbool2_t lb_illseq_right_shifted; + if (vlmax) { + /* right shift mask register directly via reinterpret at vlmax */ + size_t vlm = __riscv_vsetvlmax_e8mf2(); + vuint8mf2_t vlb_illseq = + __riscv_vlmul_trunc_u8mf2(__riscv_vreinterpret_u8m1(lb_illseq)); + lb_illseq_right_shifted = + __riscv_vreinterpret_b2(__riscv_vlmul_ext_u8m1(__riscv_vmacc_vx_u8mf2( + __riscv_vsrl_vx_u8mf2(vlb_illseq, 1, vlm), 1 << 7, + __riscv_vslide1down_vx_u8mf2(vlb_illseq, 0, vlm), vlm))); + } else { + lb_illseq_right_shifted = __riscv_vmandn_mm_b2( + __riscv_vmseq_vx_u16m8_b2( + __riscv_vslide1down_vx_u16m8(lb_masked, 0, vl), + scalar::utf16::swap_if_needed(0xd800U), vl), + __riscv_vmseq_vx_u16m8_b2( + __riscv_vslide1down_vx_u16m8(block_masked, 0, vl), + scalar::utf16::swap_if_needed(0xdc00U), vl), + vl); + } + + char16_t last = out[-1]; /* allow compiler to generate branchless code */ + out[-1] = __riscv_vfirst_m_b2(lb_illseq, vl) == 0 ? replacement : last; + vbool2_t block_illseq = + __riscv_vmor_mm_b2(__riscv_vmandn_mm_b2(block_is_low, lb_is_high, vl), + lb_illseq_right_shifted, vl); + block = __riscv_vmerge_vxm_u16m8(block, replacement, block_illseq, vl); + __riscv_vse16_v_u16m8((uint16_t *)out, block, vl); + } else if (!in_place) { + __riscv_vse16_v_u16m8((uint16_t *)out, block, vl); + } +} + +template +void rvv_to_well_formed_utf16(const char16_t *in, size_t n, char16_t *out) { + const char16_t replacement = scalar::utf16::replacement(); + const size_t VL = __riscv_vsetvlmax_e16m8(); + if (n == 0) + return; + + out[0] = + scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; + n -= 1; + in += 1; + out += 1; + + /* duplicate code to have the compiler specialise utf16fix_block() */ + if (in == out) { + for (; n > VL; n -= VL, in += VL, out += VL) { + utf16fix_block_rvv(out, in, VL); + } + utf16fix_block_rvv(out, in, n); + } else { + for (; n > VL; n -= VL, in += VL, out += VL) { + utf16fix_block_rvv(out, in, VL); + } + utf16fix_block_rvv(out, in, n); + } + + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return rvv_to_well_formed_utf16(input, len, output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return rvv_to_well_formed_utf16(input, len, output); +} + +template +simdutf_really_inline static void +rvv_change_endianness_utf16(const char16_t *src, size_t len, char16_t *dst) { + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + __riscv_vse16_v_u16m8((uint16_t *)dst, simdutf_byteflip(v, vl), vl); + } +} + +void implementation::change_endianness_utf16(const char16_t *src, size_t len, + char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_change_endianness_utf16(src, len, dst); + else + return rvv_change_endianness_utf16(src, len, dst); +} +/* end file src/rvv/rvv_utf16fix.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) + return bom_encoding; + // todo: reimplement as a one-pass algorithm. + int out = 0; + if (validate_utf8(input, length)) + out |= encoding_type::UTF8; + if (length % 2 == 0) { + if (validate_utf16le(reinterpret_cast(input), length / 2)) + out |= encoding_type::UTF16_LE; + } + if (length % 4 == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) + out |= encoding_type::UTF32_LE; + } + + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return simdutf::scalar::base64::base64_to_binary_details_impl( + input, length, output, options, last_chunk_options); +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return simdutf::scalar::base64::base64_to_binary_details_impl( + input, length, output, options, last_chunk_options); +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return simdutf::scalar::base64::base64_to_binary_details_impl( + input, length, output, options, last_chunk_options); +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return simdutf::scalar::base64::base64_to_binary_details_impl( + input, length, output, options, last_chunk_options); +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + return scalar::base64::tail_encode_base64(output, input, length, options); +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + return scalar::base64::tail_encode_base64_impl(output, input, length, + options, line_length); +} +#endif // SIMDUTF_FEATURE_BASE64 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::LITTLE>(input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::BIG>(input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +} // namespace rvv +} // namespace simdutf + +/* begin file src/simdutf/rvv/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_RVV +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +/* end file src/simdutf/rvv/end.h */ +/* end file src/rvv/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_WESTMERE +/* begin file src/westmere/implementation.cpp */ +/* begin file src/simdutf/westmere/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "westmere" +// #define SIMDUTF_IMPLEMENTATION westmere +#define SIMDUTF_SIMD_HAS_BYTEMASK 1 + +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else +SIMDUTF_TARGET_WESTMERE +#endif +/* end file src/simdutf/westmere/begin.h */ + +namespace simdutf { +namespace westmere { +namespace { +#ifndef SIMDUTF_WESTMERE_H + #error "westmere.h must be included" +#endif +using namespace simd; + +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + return input.reduce_or().is_ascii(); +} +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = + prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = + prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 + return simd8(is_third_byte | is_fourth_byte); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/westmere/internal/loader.cpp */ +namespace internal { +namespace westmere { + +/* begin file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */ +/* + * reads a vector of uint16 values + * bits after 11th are ignored + * first 11 bits are encoded into utf8 + * !important! utf8_output must have at least 16 writable bytes + */ + +inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, + const __m128i one_byte_bytemask, + const uint16_t one_byte_bitmask) { + // 0b1100_0000_1000_0000 + const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080); + // 0b0001_1111_0000_0000 + const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); + // 0b0000_0000_0011_1111 + const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); + + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [000a|aaaa|bbbb|bb00] + const __m128i t0 = _mm_slli_epi16(v_u16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m128i t1 = _mm_and_si128(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m128i t2 = _mm_and_si128(v_u16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m128i t3 = _mm_or_si128(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m128i t4 = _mm_or_si128(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m128i utf8_unpacked = _mm_blendv_epi8(t4, v_u16, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a + // - LSB) + const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a + const uint16_t m1 = static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 + const uint8_t m2 = static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); + + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + + // 6. adjust pointers + utf8_output += row[0]; +} + +inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, + const __m128i v_0000, + const __m128i v_ff80) { + // no bits set above 7th bit + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(v_u16, v_ff80), v_0000); + const uint16_t one_byte_bitmask = + static_cast(_mm_movemask_epi8(one_byte_bytemask)); + + write_v_u16_11bits_to_utf8(v_u16, utf8_output, one_byte_bytemask, + one_byte_bitmask); +} +/* end file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */ + +} // namespace westmere +} // namespace internal +/* end file src/westmere/internal/loader.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/westmere/sse_utf16fix.cpp */ +/* + * Process one block of 8 characters. If in_place is false, + * copy the block from in to out. If there is a sequencing + * error in the block, overwrite the illsequenced characters + * with the replacement character. This function reads one + * character before the beginning of the buffer as a lookback. + * If that character is illsequenced, it too is overwritten. + */ +template +simdutf_really_inline void utf16fix_block_sse(char16_t *out, + const char16_t *in) { + auto swap_if_needed = [](uint16_t x) simdutf_constexpr -> uint16_t { + return scalar::utf16::swap_if_needed(x); + }; + const char16_t replacement = scalar::utf16::replacement(); + + __m128i lookback, block, lb_masked, block_masked, lb_is_high, block_is_low; + __m128i illseq, lb_illseq, block_illseq; + + lookback = _mm_loadu_si128((const __m128i *)(in - 1)); + block = _mm_loadu_si128((const __m128i *)in); + lb_masked = _mm_and_si128(lookback, _mm_set1_epi16(swap_if_needed(0xfc00U))); + block_masked = _mm_and_si128(block, _mm_set1_epi16(swap_if_needed(0xfc00U))); + lb_is_high = + _mm_cmpeq_epi16(lb_masked, _mm_set1_epi16(swap_if_needed(0xd800U))); + block_is_low = + _mm_cmpeq_epi16(block_masked, _mm_set1_epi16(swap_if_needed(0xdc00U))); + + illseq = _mm_xor_si128(lb_is_high, block_is_low); + if (_mm_movemask_epi8(illseq) != 0) { + int lb; + + /* compute the cause of the illegal sequencing */ + lb_illseq = _mm_andnot_si128(block_is_low, lb_is_high); + block_illseq = _mm_or_si128(_mm_andnot_si128(lb_is_high, block_is_low), + _mm_bsrli_si128(lb_illseq, 2)); + + /* fix illegal sequencing in the lookback */ + lb = _mm_cvtsi128_si32(lb_illseq); + lb = (lb & replacement) | (~lb & out[-1]); + out[-1] = char16_t(lb); + /* fix illegal sequencing in the main block */ + block = + _mm_or_si128(_mm_andnot_si128(block_illseq, block), + _mm_and_si128(block_illseq, _mm_set1_epi16(replacement))); + _mm_storeu_si128((__m128i *)out, block); + } else if (!in_place) { + _mm_storeu_si128((__m128i *)out, block); + } +} + +template +void utf16fix_sse(const char16_t *in, size_t n, char16_t *out) { + const char16_t replacement = scalar::utf16::replacement(); + size_t i; + if (n < 9) { + scalar::utf16::to_well_formed_utf16(in, n, out); + return; + } + + out[0] = + scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; + + /* duplicate code to have the compiler specialise utf16fix_block() */ + if (in == out) { + for (i = 1; i + 8 < n; i += 8) { + utf16fix_block_sse(out + i, in + i); + } + + utf16fix_block_sse(out + n - 8, in + n - 8); + } else { + for (i = 1; i + 8 < n; i += 8) { + utf16fix_block_sse(out + i, in + i); + } + + utf16fix_block_sse(out + n - 8, in + n - 8); + } + + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} +/* end file src/westmere/sse_utf16fix.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/westmere/sse_validate_utf16.cpp */ +template +simd8 utf16_gather_high_bytes(const simd16 in0, + const simd16 in1) { + if (big_endian) { + // we want lower bytes + const auto mask = simd16(0x00ff); + const auto t0 = in0 & mask; + const auto t1 = in1 & mask; + + return simd16::pack(t0, t1); + } else { + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + + return simd16::pack(t0, t1); + } +} +/* end file src/westmere/sse_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/westmere/sse_convert_latin1_to_utf8.cpp */ +std::pair +sse_convert_latin1_to_utf8(const char *latin_input, + const size_t latin_input_length, char *utf8_output) { + const char *end = latin_input + latin_input_length; + + const __m128i v_0000 = _mm_setzero_si128(); + // 0b1000_0000 + const __m128i v_80 = _mm_set1_epi8((uint8_t)0x80); + // 0b1111_1111_1000_0000 + const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); + + const __m128i latin_1_half_into_u16_byte_mask = + _mm_setr_epi8(0, '\x80', 1, '\x80', 2, '\x80', 3, '\x80', 4, '\x80', 5, + '\x80', 6, '\x80', 7, '\x80'); + + const __m128i latin_2_half_into_u16_byte_mask = + _mm_setr_epi8(8, '\x80', 9, '\x80', 10, '\x80', 11, '\x80', 12, '\x80', + 13, '\x80', 14, '\x80', 15, '\x80'); + + // each latin1 takes 1-2 utf8 bytes + // slow path writes useful 8-15 bytes twice (eagerly writes 16 bytes and then + // adjust the pointer) so the last write can exceed the utf8_output size by + // 8-1 bytes by reserving 8 extra input bytes, we expect the output to have + // 8-16 bytes free + while (end - latin_input >= 16 + 8) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + __m128i v_latin = _mm_loadu_si128((__m128i *)latin_input); + + if (_mm_testz_si128(v_latin, v_80)) { // ASCII fast path!!!! + _mm_storeu_si128((__m128i *)utf8_output, v_latin); + latin_input += 16; + utf8_output += 16; + continue; + } + + // assuming a/b are bytes and A/B are uint16 of the same value + // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA + __m128i v_u16_latin_1_half = + _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask); + // aaaa_aaaa_bbbb_bbbb -> BBBB_BBBB + __m128i v_u16_latin_2_half = + _mm_shuffle_epi8(v_latin, latin_2_half_into_u16_byte_mask); + + internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_1_half, + utf8_output, v_0000, v_ff80); + internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_2_half, + utf8_output, v_0000, v_ff80); + latin_input += 16; + } + + if (end - latin_input >= 16) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + __m128i v_latin = _mm_loadu_si128((__m128i *)latin_input); + + if (_mm_testz_si128(v_latin, v_80)) { // ASCII fast path!!!! + _mm_storeu_si128((__m128i *)utf8_output, v_latin); + latin_input += 16; + utf8_output += 16; + } else { + // assuming a/b are bytes and A/B are uint16 of the same value + // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA + __m128i v_u16_latin_1_half = + _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask); + internal::westmere::write_v_u16_11bits_to_utf8( + v_u16_latin_1_half, utf8_output, v_0000, v_ff80); + latin_input += 8; + } + } + + return std::make_pair(latin_input, utf8_output); +} +/* end file src/westmere/sse_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/westmere/sse_convert_latin1_to_utf16.cpp */ +template +std::pair +sse_convert_latin1_to_utf16(const char *latin1_input, size_t len, + char16_t *utf16_output) { + size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 + for (size_t i = 0; i < rounded_len; i += 16) { + // Load 16 Latin1 characters into a 128-bit register + __m128i in = + _mm_loadu_si128(reinterpret_cast(&latin1_input[i])); + __m128i out1 = big_endian ? _mm_unpacklo_epi8(_mm_setzero_si128(), in) + : _mm_unpacklo_epi8(in, _mm_setzero_si128()); + __m128i out2 = big_endian ? _mm_unpackhi_epi8(_mm_setzero_si128(), in) + : _mm_unpackhi_epi8(in, _mm_setzero_si128()); + // Zero extend each Latin1 character to 16-bit integers and store the + // results back to memory + _mm_storeu_si128(reinterpret_cast<__m128i *>(&utf16_output[i]), out1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(&utf16_output[i + 8]), out2); + } + // return pointers pointing to where we left off + return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); +} +/* end file src/westmere/sse_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/westmere/sse_convert_latin1_to_utf32.cpp */ +std::pair +sse_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const char *end = buf + len; + + while (end - buf >= 16) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + __m128i in = _mm_loadu_si128((__m128i *)buf); + + // Shift input to process next 4 bytes + __m128i in_shifted1 = _mm_srli_si128(in, 4); + __m128i in_shifted2 = _mm_srli_si128(in, 8); + __m128i in_shifted3 = _mm_srli_si128(in, 12); + + // expand 8-bit to 32-bit unit + __m128i out1 = _mm_cvtepu8_epi32(in); + __m128i out2 = _mm_cvtepu8_epi32(in_shifted1); + __m128i out3 = _mm_cvtepu8_epi32(in_shifted2); + __m128i out4 = _mm_cvtepu8_epi32(in_shifted3); + + _mm_storeu_si128((__m128i *)utf32_output, out1); + _mm_storeu_si128((__m128i *)(utf32_output + 4), out2); + _mm_storeu_si128((__m128i *)(utf32_output + 8), out3); + _mm_storeu_si128((__m128i *)(utf32_output + 12), out4); + + utf32_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf32_output); +} +/* end file src/westmere/sse_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/westmere/sse_convert_utf8_to_utf16.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i in = _mm_loadu_si128((__m128i *)input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + // Note: using 16 bytes is unsafe, see issue_ossfuzz_71218 + __m128i ascii_first = _mm_cvtepu8_epi16(in); + __m128i ascii_second = _mm_cvtepu8_epi16(_mm_srli_si128(in, 8)); + if (big_endian) { + ascii_first = _mm_shuffle_epi8(ascii_first, swap); + ascii_second = _mm_shuffle_epi8(ascii_second, swap); + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output), ascii_first); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + 8), + ascii_second); + utf16_output += 12; // We wrote 12 16-bit characters. + return 12; // We consumed 12 bytes. + } + if (((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte + // UTF-16 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + if (big_endian) + composed = _mm_shuffle_epi8(composed, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed); + utf16_output += 8; // We wrote 16 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + __m128i composed_repacked = _mm_packus_epi32(composed, composed); + if (big_endian) + composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); + utf16_output += 4; + return 12; + } + /// We do not have a fast path available, so we fallback. + + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + if (big_endian) + composed = _mm_shuffle_epi8(composed, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed); + utf16_output += 6; // We wrote 12 bytes, 6 code points. + } else if (idx < 145) { + // FOUR (4) input code-code units + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + __m128i composed_repacked = _mm_packus_epi32(composed, composed); + if (big_endian) + composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); + utf16_output += 4; + } else if (idx < 209) { + // TWO (2) input code-code units + ////////////// + // There might be garbage inputs where a leading byte mascarades as a + // four-byte leading byte (by being followed by 3 continuation byte), but is + // not greater than 0xf0. This could trigger a buffer overflow if we only + // counted leading bytes of the form 0xf0 as generating surrogate pairs, + // without further UTF-8 validation. Thus we must be careful to ensure that + // only leading bytes at least as large as 0xf0 generate surrogate pairs. We + // do as at the cost of an extra mask. + ///////////// + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); + const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); + // correct for spurious high bit + const __m128i correct = + _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); + middlehighbyte = _mm_xor_si128(correct, middlehighbyte); + const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); + // We deliberately carry the leading four bits in highbyte if they are + // present, we remove them later when computing hightenbits. + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); + // When we need to generate a surrogate pair (leading byte > 0xF0), then + // the corresponding 32-bit value in 'composed' will be greater than + // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the + // location of the surrogate pairs. + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), + _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); + const __m128i composedminus = + _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); + const __m128i lowtenbits = + _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); + // Notice the 0x3ff mask: + const __m128i hightenbits = + _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); + const __m128i lowtenbitsadd = + _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); + const __m128i hightenbitsadd = + _mm_add_epi32(hightenbits, _mm_set1_epi32(0xD800)); + const __m128i lowtenbitsaddshifted = _mm_slli_epi32(lowtenbitsadd, 16); + __m128i surrogates = _mm_or_si128(hightenbitsadd, lowtenbitsaddshifted); + uint32_t basic_buffer[4]; + uint32_t basic_buffer_swap[4]; + if (big_endian) { + _mm_storeu_si128((__m128i *)basic_buffer_swap, + _mm_shuffle_epi8(composed, swap)); + surrogates = _mm_shuffle_epi8(surrogates, swap); + } + _mm_storeu_si128((__m128i *)basic_buffer, composed); + uint32_t surrogate_buffer[4]; + _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); + for (size_t i = 0; i < 3; i++) { + if (basic_buffer[i] > 0x3c00000) { + utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); + utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); + utf16_output += 2; + } else { + utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) + : uint16_t(basic_buffer[i]); + utf16_output++; + } + } + } else { + // here we know that there is an error but we do not handle errors + } + return consumed; +} +/* end file src/westmere/sse_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/westmere/sse_convert_utf8_to_utf32.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const __m128i in = _mm_loadu_si128((__m128i *)input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu8_epi32(in)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu8_epi32(_mm_srli_si128(in, 4))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 8), + _mm_cvtepu8_epi32(_mm_srli_si128(in, 8))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 12), + _mm_cvtepu8_epi32(_mm_srli_si128(in, 12))); + utf32_output += 12; // We wrote 12 32-bit characters. + return 12; // We consumed 12 bytes. + } + if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte + // UTF-32 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu16_epi32(composed)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu16_epi32(_mm_srli_si128(composed, 8))); + utf32_output += 8; // We wrote 32 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += 4; + return 12; + } + /// We do not have a fast path available, so we fallback. + + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu16_epi32(composed)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu16_epi32(_mm_srli_si128(composed, 8))); + utf32_output += 6; // We wrote 12 bytes, 6 code points. + } else if (idx < 145) { + // FOUR (4) input code-code units + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += 4; + } else if (idx < 209) { + // TWO (2) input code-code units + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); + const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); + // correct for spurious high bit + const __m128i correct = + _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); + middlehighbyte = _mm_xor_si128(correct, middlehighbyte); + const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), + _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += 3; + } else { + // here we know that there is an error but we do not handle errors + } + return consumed; +} +/* end file src/westmere/sse_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/westmere/sse_convert_utf8_to_latin1.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + const __m128i in = _mm_loadu_si128((__m128i *)input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & + 0xfff; // we are only processing 12 bytes in case it is not all ASCII + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); + latin1_output += 12; // We wrote 12 characters. + return 12; // We consumed 12 bytes. + } + /// We do not have a fast path available, so we fallback. + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small lookup + // table. + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + const __m128i latin1_packed = _mm_packus_epi16(composed, composed); + // writing 8 bytes even though we only care about the first 6 bytes. + // performance note: it would be faster to use _mm_storeu_si128, we should + // investigate. + _mm_storel_epi64((__m128i *)latin1_output, latin1_packed); + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/westmere/sse_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/westmere/sse_convert_utf16_to_latin1.cpp */ +template +std::pair +sse_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 8) { + // Load 8 UTF-16 characters into 128-bit SSE register + __m128i in = _mm_loadu_si128(reinterpret_cast(buf)); + + if simdutf_constexpr (!match_system(big_endian)) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); + } + + __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00); + if (_mm_testz_si128(in, high_byte_mask)) { + // Pack 16-bit characters into 8-bit and store in latin1_output + __m128i latin1_packed = _mm_packus_epi16(in, in); + _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), + latin1_packed); + // Adjust pointers for next iteration + buf += 8; + latin1_output += 8; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +template +std::pair +sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + while (end - buf >= 8) { + __m128i in = _mm_loadu_si128(reinterpret_cast(buf)); + + if simdutf_constexpr (!match_system(big_endian)) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); + } + + __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00); + if (_mm_testz_si128(in, high_byte_mask)) { + __m128i latin1_packed = _mm_packus_epi16(in, in); + _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), + latin1_packed); + buf += 8; + latin1_output += 8; + } else { + // Fallback to scalar code for handling errors + for (int k = 0; k < 8; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + buf += 8; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/westmere/sse_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/westmere/sse_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ + +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { + + const char16_t *end = buf + len; + + const __m128i v_0000 = _mm_setzero_si128(); + const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m128i in = _mm_loadu_si128((__m128i *)buf); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); + } + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80); + if (_mm_testz_si128(in, v_ff80)) { // ASCII fast path!!!! + __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + nextin = _mm_shuffle_epi8(nextin, swap); + } + if (!_mm_testz_si128(nextin, v_ff80)) { + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in, in); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } else { + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in, nextin); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + } + + // no bits set above 7th bit + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000); + const uint16_t one_byte_bitmask = + static_cast(_mm_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m128i one_or_two_bytes_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000); + const uint16_t one_or_two_bytes_bitmask = + static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); + + if (one_or_two_bytes_bitmask == 0xffff) { + internal::westmere::write_v_u16_11bits_to_utf8( + in, utf8_output, one_byte_bytemask, one_byte_bitmask); + buf += 8; + continue; + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m128i surrogates_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint16_t surrogates_bitmask = + static_cast(_mm_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x0000) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m128i t0 = _mm_shuffle_epi8(in, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m128i s0 = _mm_srli_epi16(in, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); + const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m128i s4 = _mm_xor_si128(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m128i out0 = _mm_unpacklo_epi16(t2, s4); + const __m128i out1 = _mm_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { + // We only have three-byte code units. Use fast path. + const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, + 15, 13, -1, -1, -1, -1); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += 12; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + } + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, utf8_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(buf, utf8_output); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + + const __m128i v_0000 = _mm_setzero_si128(); + const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m128i in = _mm_loadu_si128((__m128i *)buf); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); + } + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80); + if (_mm_testz_si128(in, v_ff80)) { // ASCII fast path!!!! + __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + nextin = _mm_shuffle_epi8(nextin, swap); + } + if (!_mm_testz_si128(nextin, v_ff80)) { + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in, in); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } else { + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in, nextin); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + } + + // no bits set above 7th bit + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000); + const uint16_t one_byte_bitmask = + static_cast(_mm_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m128i one_or_two_bytes_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000); + const uint16_t one_or_two_bytes_bitmask = + static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); + + if (one_or_two_bytes_bitmask == 0xffff) { + internal::westmere::write_v_u16_11bits_to_utf8( + in, utf8_output, one_byte_bytemask, one_byte_bitmask); + buf += 8; + continue; + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m128i surrogates_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint16_t surrogates_bitmask = + static_cast(_mm_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x0000) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m128i t0 = _mm_shuffle_epi8(in, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m128i s0 = _mm_srli_epi16(in, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); + const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m128i s4 = _mm_xor_si128(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m128i out0 = _mm_unpacklo_epi16(t2, s4); + const __m128i out1 = _mm_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { + // We only have three-byte code units. Use fast path. + const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, + 15, 13, -1, -1, -1, -1); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += 12; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + } + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + utf8_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); +} +/* end file src/westmere/sse_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/westmere/sse_convert_utf16_to_utf32.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it's an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ + +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routine should carry on the conversion of the tail. +*/ +template +std::pair +sse_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *end = buf + len; + + const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); + + while (end - buf >= 8) { + __m128i in = _mm_loadu_si128((__m128i *)buf); + + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m128i surrogates_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint16_t surrogates_bitmask = + static_cast(_mm_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x0000) { + // case: no surrogate pair, extend 16-bit code units to 32-bit code units + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu16_epi32(in)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu16_epi32(_mm_srli_si128(in, 8))); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, utf32_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(buf, utf32_output); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +sse_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + + const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); + + while (end - buf >= 8) { + __m128i in = _mm_loadu_si128((__m128i *)buf); + + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); + } + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m128i surrogates_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint16_t surrogates_bitmask = + static_cast(_mm_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x0000) { + // case: no surrogate pair, extend 16-bit code units to 32-bit code units + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu16_epi32(in)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu16_epi32(_mm_srli_si128(in, 8))); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + utf32_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); +} +/* end file src/westmere/sse_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/westmere/sse_convert_utf32_to_latin1.cpp */ +std::pair +sse_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 + + __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00); + __m128i shufmask = + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); + + for (size_t i = 0; i < rounded_len; i += 16) { + __m128i in1 = _mm_loadu_si128((__m128i *)buf); + __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4)); + __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8)); + __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12)); + + __m128i check_combined = _mm_or_si128(in1, in2); + check_combined = _mm_or_si128(check_combined, in3); + check_combined = _mm_or_si128(check_combined, in4); + + if (!_mm_testz_si128(check_combined, high_bytes_mask)) { + return std::make_pair(nullptr, latin1_output); + } + __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), + _mm_shuffle_epi8(in2, shufmask)); + __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), + _mm_shuffle_epi8(in4, shufmask)); + __m128i pack = _mm_unpacklo_epi64(pack1, pack2); + _mm_storeu_si128((__m128i *)latin1_output, pack); + latin1_output += 16; + buf += 16; + } + + return std::make_pair(buf, latin1_output); +} + +std::pair +sse_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *start = buf; + const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 + + __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00); + __m128i shufmask = + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); + + for (size_t i = 0; i < rounded_len; i += 16) { + __m128i in1 = _mm_loadu_si128((__m128i *)buf); + __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4)); + __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8)); + __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12)); + + __m128i check_combined = _mm_or_si128(in1, in2); + check_combined = _mm_or_si128(check_combined, in3); + check_combined = _mm_or_si128(check_combined, in4); + + if (!_mm_testz_si128(check_combined, high_bytes_mask)) { + // Fallback to scalar code for handling errors + for (int k = 0; k < 16; k++) { + char32_t codepoint = buf[k]; + if (codepoint <= 0xff) { + *latin1_output++ = char(codepoint); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + buf += 16; + continue; + } + __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), + _mm_shuffle_epi8(in2, shufmask)); + __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), + _mm_shuffle_epi8(in4, shufmask)); + __m128i pack = _mm_unpacklo_epi64(pack1, pack2); + _mm_storeu_si128((__m128i *)latin1_output, pack); + latin1_output += 16; + buf += 16; + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/westmere/sse_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/westmere/sse_convert_utf32_to_utf8.cpp */ +std::pair +sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { + const char32_t *end = buf + len; + + const __m128i v_0000 = _mm_setzero_si128(); //__m128 = 128 bits + const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); // 1111 1000 0000 + // 0000 + const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); // 1100 0000 1000 + // 0000 + const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); // 1111 1111 1000 + // 0000 + const __m128i v_ffff0000 = _mm_set1_epi32( + (uint32_t)0xffff0000); // 1111 1111 1111 1111 0000 0000 0000 0000 + const __m128i v_7fffffff = _mm_set1_epi32( + (uint32_t)0x7fffffff); // 0111 1111 1111 1111 1111 1111 1111 1111 + __m128i running_max = _mm_setzero_si128(); + __m128i forbidden_bytemask = _mm_setzero_si128(); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= + std::ptrdiff_t( + 16 + safety_margin)) { // buf is a char32_t pointer, each char32_t + // has 4 bytes or 32 bits, thus buf + 16 * + // char_32t = 512 bits = 64 bytes + // We load two 16 bytes registers for a total of 32 bytes or 16 characters. + __m128i in = _mm_loadu_si128((__m128i *)buf); + __m128i nextin = _mm_loadu_si128( + (__m128i *)buf + 1); // These two values can hold only 8 UTF32 chars + running_max = _mm_max_epu32( + _mm_max_epu32(in, running_max), // take element-wise max char32_t from + // in and running_max vector + nextin); // and take element-wise max element from nextin and + // running_max vector + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m128i in_16 = _mm_packus_epi32( + _mm_and_si128(in, v_7fffffff), + _mm_and_si128( + nextin, + v_7fffffff)); // in this context pack the two __m128 into a single + // By ensuring the highest bit is set to 0(&v_7fffffff), we are making sure + // all values are interpreted as non-negative, or specifically, the values + // are within the range of valid Unicode code points. remember : having + // leading byte 0 means a positive number by the two complements system. + // Unicode is well beneath the range where you'll start getting issues so + // that's OK. + + // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp + + // Check for ASCII fast path + + // ASCII fast path!!!! + // We eagerly load another 32 bytes, hoping that they will be ASCII too. + // The intuition is that we try to collect 16 ASCII characters which + // requires a total of 64 bytes of input. If we fail, we just pass thirdin + // and fourthin as our new inputs. + if (_mm_testz_si128(in_16, v_ff80)) { // if the first two blocks are ASCII + __m128i thirdin = _mm_loadu_si128((__m128i *)buf + 2); + __m128i fourthin = _mm_loadu_si128((__m128i *)buf + 3); + running_max = _mm_max_epu32( + _mm_max_epu32(thirdin, running_max), + fourthin); // take the running max of all 4 vectors thus far + __m128i nextin_16 = _mm_packus_epi32( + _mm_and_si128(thirdin, v_7fffffff), + _mm_and_si128(fourthin, + v_7fffffff)); // pack into 1 vector, now you have two + if (!_mm_testz_si128( + nextin_16, + v_ff80)) { // checks if the second packed vector is ASCII, if not: + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16( + in_16, in_16); // creates two copy of in_16 in 1 vector + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, + utf8_packed); // put them into the output + // 3. adjust pointers + buf += 8; // the char32_t buffer pointer goes up 8 char32_t chars* 32 + // bits = 256 bits + utf8_output += + 8; // same with output, e.g. lift the first two blocks alone. + // Proceed with next input + in_16 = nextin_16; + // We need to update in and nextin because they are used later. + in = thirdin; + nextin = fourthin; + } else { + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + } + + // no bits set above 7th bit -- find out all the ASCII characters + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16( // this takes four bytes at a time and compares: + _mm_and_si128(in_16, v_ff80), // the vector that get only the first + // 9 bits of each 16-bit/2-byte units + v_0000 // + ); // they should be all zero if they are ASCII. E.g. ASCII in UTF32 is + // of format 0000 0000 0000 0XXX XXXX + // _mm_cmpeq_epi16 should now return a 1111 1111 1111 1111 for equals, and + // 0000 0000 0000 0000 if not for each 16-bit/2-byte units + const uint16_t one_byte_bitmask = static_cast(_mm_movemask_epi8( + one_byte_bytemask)); // collect the MSB from previous vector and put + // them into uint16_t mas + + // no bits set above 11th bit + const __m128i one_or_two_bytes_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000); + const uint16_t one_or_two_bytes_bitmask = + static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); + + if (one_or_two_bytes_bitmask == 0xffff) { + // case: all code units either produce 1 or 2 UTF-8 bytes (at least one + // produces 2 bytes) + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m128i v_1f00 = + _mm_set1_epi16((int16_t)0x1f00); // 0001 1111 0000 0000 + const __m128i v_003f = + _mm_set1_epi16((int16_t)0x003f); // 0000 0000 0011 1111 + + // t0 = [000a|aaaa|bbbb|bb00] + const __m128i t0 = _mm_slli_epi16(in_16, 2); // shift packed vector by two + // t1 = [000a|aaaa|0000|0000] + const __m128i t1 = _mm_and_si128(t0, v_1f00); // potential first utf8 byte + // t2 = [0000|0000|00bb|bbbb] + const __m128i t2 = + _mm_and_si128(in_16, v_003f); // potential second utf8 byte + // t3 = [000a|aaaa|00bb|bbbb] + const __m128i t3 = + _mm_or_si128(t1, t2); // first and second potential utf8 byte together + // t4 = [110a|aaaa|10bb|bbbb] + const __m128i t4 = _mm_or_si128( + t3, + v_c080); // t3 | 1100 0000 1000 0000 = full potential 2-byte utf8 unit + + // 2. merge ASCII and 2-byte codewords + const __m128i utf8_unpacked = + _mm_blendv_epi8(t4, in_16, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - + // MSB, a - LSB) + const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a + const uint16_t m1 = + static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 + const uint8_t m2 = + static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); + + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + + // Check for overflow in packing + + const __m128i saturation_bytemask = _mm_cmpeq_epi32( + _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); + forbidden_bytemask = + _mm_or_si128(forbidden_bytemask, + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800)); + + const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m128i s0 = _mm_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); + const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m128i s4 = _mm_xor_si128(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m128i out0 = _mm_unpacklo_epi16(t2, s4); + const __m128i out1 = _mm_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { + // We only have three-byte code units. Use fast path. + const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, + 15, 13, -1, -1, -1, -1); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += 12; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + } + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + } else { + // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD in the + // presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + // check for invalid input + const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); + if (static_cast(_mm_movemask_epi8(_mm_cmpeq_epi32( + _mm_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffff) { + return std::make_pair(nullptr, utf8_output); + } + + if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf8_output); + } + + return std::make_pair(buf, utf8_output); +} + +std::pair +sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + const char32_t *start = buf; + + const __m128i v_0000 = _mm_setzero_si128(); + const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); + const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); + const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); + const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); + const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff); + const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + // We load two 16 bytes registers for a total of 32 bytes or 8 characters. + __m128i in = _mm_loadu_si128((__m128i *)buf); + __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); + // Check for too large input + __m128i max_input = _mm_max_epu32(_mm_max_epu32(in, nextin), v_10ffff); + if (static_cast(_mm_movemask_epi8( + _mm_cmpeq_epi32(max_input, v_10ffff))) != 0xffff) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + utf8_output); + } + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m128i in_16 = _mm_packus_epi32(_mm_and_si128(in, v_7fffffff), + _mm_and_si128(nextin, v_7fffffff)); + + // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp + + // Check for ASCII fast path + if (_mm_testz_si128(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in_16, in_16); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; + } + + // no bits set above 7th bit + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_ff80), v_0000); + const uint16_t one_byte_bitmask = + static_cast(_mm_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m128i one_or_two_bytes_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000); + const uint16_t one_or_two_bytes_bitmask = + static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); + + if (one_or_two_bytes_bitmask == 0xffff) { + // case: all code units either produce 1 or 2 UTF-8 bytes (at least one + // produces 2 bytes) + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); + const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const __m128i t0 = _mm_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m128i t1 = _mm_and_si128(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m128i t2 = _mm_and_si128(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m128i t3 = _mm_or_si128(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m128i t4 = _mm_or_si128(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m128i utf8_unpacked = + _mm_blendv_epi8(t4, in_16, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - + // MSB, a - LSB) + const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a + const uint16_t m1 = + static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 + const uint8_t m2 = + static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); + + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + + // Check for overflow in packing + const __m128i saturation_bytemask = _mm_cmpeq_epi32( + _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm_movemask_epi8(saturation_bytemask)); + + if (saturation_bitmask == 0xffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + + // Check for illegal surrogate code units + const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); + const __m128i forbidden_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800); + if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf8_output); + } + + const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m128i s0 = _mm_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); + const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m128i s4 = _mm_xor_si128(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m128i out0 = _mm_unpacklo_epi16(t2, s4); + const __m128i out1 = _mm_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { + // We only have three-byte code units. Use fast path. + const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, + 15, 13, -1, -1, -1, -1); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += 12; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + } + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + } else { + // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD in the + // presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); +} +/* end file src/westmere/sse_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/westmere/sse_convert_utf32_to_utf16.cpp */ +struct expansion_result_t { + size_t u16count; + __m128i compressed; +}; + +// Function sse_expand_surrogate takes four **valid** UTF-32 characters +// having at least one code-point producing a surrogate pair. +template +expansion_result_t sse_expand_surrogate(const __m128i x) { + using vector_u32 = simd32; + using vector_u8 = simd8; + + const auto in = vector_u32(x); + + const auto non_surrogate_mask = (in & uint32_t(0xffff0000)) == uint32_t(0); + const auto mask = (~non_surrogate_mask.to_4bit_bitmask()) & 0xf; + + const auto t0 = in - uint32_t(0x00010000); + const auto hi = t0.shr<10>() & uint32_t(0x000003ff); + const auto lo = t0.shl<16>() & uint32_t(0x03ff0000); + const auto surrogates = (lo | hi) | uint32_t(0xdc00d800); + + const auto merged = as_vector_u8(select(non_surrogate_mask, in, surrogates)); + + const auto shuffle = vector_u8::load( + (byte_order == endianness::LITTLE) + ? tables::utf32_to_utf16::pack_utf32_to_utf16le[mask] + : tables::utf32_to_utf16::pack_utf32_to_utf16be[mask]); + + const size_t u16count = (4 + count_ones(mask)); + const auto compressed = shuffle.lookup_16(merged); + + return {u16count, compressed}; +} + +// Function `validate_utf32` checks 2 x 4 UTF-32 characters for their validity. +simdutf_really_inline bool validate_utf32(const __m128i a, const __m128i b) { + using vector_u32 = simd32; + + const auto in0 = vector_u32(a); + const auto in1 = vector_u32(b); + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + + const auto too_large = max(in0, in1) > standardmax; + const auto surrogate0 = (in0 + offset) > standardoffsetmax; + const auto surrogate1 = (in1 + offset) > standardoffsetmax; + + const auto combined = too_large | surrogate0 | surrogate1; + return !combined.any(); +} + +template +std::pair +sse_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_output) { + + const char32_t *end = buf + len; + + const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); + __m128i forbidden_bytemask = _mm_setzero_si128(); + + while (end - buf >= 16 + 8) { + const __m128i *ptr = reinterpret_cast(buf); + const __m128i in0 = _mm_loadu_si128(ptr + 0); + const __m128i in1 = _mm_loadu_si128(ptr + 1); + const __m128i in2 = _mm_loadu_si128(ptr + 2); + const __m128i in3 = _mm_loadu_si128(ptr + 3); + + const __m128i combined = + _mm_or_si128(_mm_or_si128(in2, in3), _mm_or_si128(in0, in1)); + if (simdutf_likely(_mm_testz_si128(combined, v_ffff0000))) { + // No bits set above 16th, directly pack UTF-32 to UTF-16 + __m128i utf16_packed0 = _mm_packus_epi32(in0, in1); + __m128i utf16_packed1 = _mm_packus_epi32(in2, in3); + + const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); + forbidden_bytemask = _mm_or_si128( + forbidden_bytemask, + _mm_or_si128( + _mm_cmpeq_epi16(_mm_and_si128(utf16_packed0, v_f800), v_d800), + _mm_cmpeq_epi16(_mm_and_si128(utf16_packed1, v_f800), v_d800))); + + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed0 = _mm_shuffle_epi8(utf16_packed0, swap); + utf16_packed1 = _mm_shuffle_epi8(utf16_packed1, swap); + } + + _mm_storeu_si128((__m128i *)utf16_output + 0, utf16_packed0); + _mm_storeu_si128((__m128i *)utf16_output + 1, utf16_packed1); + utf16_output += 16; + buf += 16; + } else { + if (!validate_utf32(in0, in1) || !validate_utf32(in2, in3)) { + return std::make_pair(nullptr, utf16_output); + } + + const auto ret0 = sse_expand_surrogate(in0); + _mm_storeu_si128((__m128i *)utf16_output, ret0.compressed); + utf16_output += ret0.u16count; + + const auto ret1 = sse_expand_surrogate(in1); + _mm_storeu_si128((__m128i *)utf16_output, ret1.compressed); + utf16_output += ret1.u16count; + + const auto ret2 = sse_expand_surrogate(in2); + _mm_storeu_si128((__m128i *)utf16_output, ret2.compressed); + utf16_output += ret2.u16count; + + const auto ret3 = sse_expand_surrogate(in3); + _mm_storeu_si128((__m128i *)utf16_output, ret3.compressed); + utf16_output += ret3.u16count; + + buf += 16; + } + } + + // check for invalid input + if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf16_output); + } + + return std::make_pair(buf, utf16_output); +} + +template +std::pair +sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; + + const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); + + while (end - buf >= 8) { + const __m128i in = _mm_loadu_si128((__m128i *)buf); + const __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); + + const __m128i combined = _mm_or_si128(in, nextin); + if (simdutf_likely(_mm_testz_si128(combined, v_ffff0000))) { + // No bits set above 16th, directly pack UTF-32 to UTF-16 + __m128i utf16_packed = _mm_packus_epi32(in, nextin); + + const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); + const __m128i forbidden_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800); + if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf16_output); + } + + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); + } + + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf16_output); + } + *utf16_output++ = + big_endian + ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf16_output); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (big_endian) { + high_surrogate = + uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); + low_surrogate = + uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); +} +/* end file src/westmere/sse_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/westmere/sse_base64.cpp */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ + +// --- encoding ---------------------------------------------------- +template __m128i lookup_pshufb_improved(const __m128i input) { + // credit: Wojciech Muła + // reduce 0..51 -> 0 + // 52..61 -> 1 .. 10 + // 62 -> 11 + // 63 -> 12 + __m128i result = _mm_subs_epu8(input, _mm_set1_epi8(51)); + + // distinguish between ranges 0..25 and 26..51: + // 0 .. 25 -> remains 0 + // 26 .. 51 -> becomes 13 + const __m128i less = _mm_cmpgt_epi8(_mm_set1_epi8(26), input); + result = _mm_or_si128(result, _mm_and_si128(less, _mm_set1_epi8(13))); + + __m128i shift_LUT; + if (base64_url) { + shift_LUT = _mm_setr_epi8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0); + } else { + shift_LUT = _mm_setr_epi8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); + } + + // read shift + result = _mm_shuffle_epi8(shift_LUT, result); + + return _mm_add_epi8(result, input); +} + +inline __m128i insert_line_feed16(__m128i input, size_t K) { + static const uint8_t shuffle_masks[16][16] = { + {0x80, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 0x80, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 0x80, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 0x80, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 0x80, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 0x80, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 0x80, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 0x80, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 0x80, 8, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 0x80, 9, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x80, 10, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0x80, 11, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0x80, 12, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0x80, 13, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 14}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0x80}}; + // Prepare a vector with '\n' (0x0A) + __m128i line_feed_vector = _mm_set1_epi8('\n'); + + // Load the precomputed shuffle mask for K (index K-1) + __m128i mask = _mm_loadu_si128((__m128i *)shuffle_masks[K]); + __m128i lf_pos = _mm_cmpeq_epi8(mask, _mm_set1_epi8(static_cast(0x80))); + + // Perform the shuffle to reposition the K bytes + __m128i shuffled = _mm_shuffle_epi8(input, mask); + + // Blend with line_feed_vector to insert '\n' at the appropriate positions + __m128i result = _mm_blendv_epi8(shuffled, line_feed_vector, lf_pos); + + return result; +} +template +size_t encode_base64_impl(char *dst, const char *src, size_t srclen, + base64_options options, + size_t line_length = simdutf::default_line_length) { + size_t offset = 0; + if (line_length < 4) { + line_length = 4; // We do not support line_length less than 4 + } + // credit: Wojciech Muła + // SSE (lookup: pshufb improved unrolled) + const uint8_t *input = (const uint8_t *)src; + + uint8_t *out = (uint8_t *)dst; + const __m128i shuf = + _mm_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); + + size_t i = 0; + for (; i + 52 <= srclen; i += 48) { + __m128i in0 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 0)); + __m128i in1 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 1)); + __m128i in2 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 2)); + __m128i in3 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 3)); + + in0 = _mm_shuffle_epi8(in0, shuf); + in1 = _mm_shuffle_epi8(in1, shuf); + in2 = _mm_shuffle_epi8(in2, shuf); + in3 = _mm_shuffle_epi8(in3, shuf); + + const __m128i t0_0 = _mm_and_si128(in0, _mm_set1_epi32(0x0fc0fc00)); + const __m128i t0_1 = _mm_and_si128(in1, _mm_set1_epi32(0x0fc0fc00)); + const __m128i t0_2 = _mm_and_si128(in2, _mm_set1_epi32(0x0fc0fc00)); + const __m128i t0_3 = _mm_and_si128(in3, _mm_set1_epi32(0x0fc0fc00)); + + const __m128i t1_0 = _mm_mulhi_epu16(t0_0, _mm_set1_epi32(0x04000040)); + const __m128i t1_1 = _mm_mulhi_epu16(t0_1, _mm_set1_epi32(0x04000040)); + const __m128i t1_2 = _mm_mulhi_epu16(t0_2, _mm_set1_epi32(0x04000040)); + const __m128i t1_3 = _mm_mulhi_epu16(t0_3, _mm_set1_epi32(0x04000040)); + + const __m128i t2_0 = _mm_and_si128(in0, _mm_set1_epi32(0x003f03f0)); + const __m128i t2_1 = _mm_and_si128(in1, _mm_set1_epi32(0x003f03f0)); + const __m128i t2_2 = _mm_and_si128(in2, _mm_set1_epi32(0x003f03f0)); + const __m128i t2_3 = _mm_and_si128(in3, _mm_set1_epi32(0x003f03f0)); + + const __m128i t3_0 = _mm_mullo_epi16(t2_0, _mm_set1_epi32(0x01000010)); + const __m128i t3_1 = _mm_mullo_epi16(t2_1, _mm_set1_epi32(0x01000010)); + const __m128i t3_2 = _mm_mullo_epi16(t2_2, _mm_set1_epi32(0x01000010)); + const __m128i t3_3 = _mm_mullo_epi16(t2_3, _mm_set1_epi32(0x01000010)); + + const __m128i input0 = _mm_or_si128(t1_0, t3_0); + const __m128i input1 = _mm_or_si128(t1_1, t3_1); + const __m128i input2 = _mm_or_si128(t1_2, t3_2); + const __m128i input3 = _mm_or_si128(t1_3, t3_3); + + const __m128i t0 = lookup_pshufb_improved(input0); + const __m128i t1 = lookup_pshufb_improved(input1); + const __m128i t2 = lookup_pshufb_improved(input2); + const __m128i t3 = lookup_pshufb_improved(input3); + + if (use_lines) { + if (line_length >= 64) { // fast path + if (offset + 64 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 64 - location_end; + if (location_end < 16) { + // We can store or extract store. See below. + //_mm_storeu_si128(reinterpret_cast<__m128i *>(out+1), t0); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + insert_line_feed16(t0, location_end)); + out[16] = static_cast(_mm_extract_epi8(t0, 15)); + out += 17; + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), t0); + out += 16; + } + if (location_end >= 16 && location_end < 32) { + // We can store or extract store. See below. + //_mm_storeu_si128(reinterpret_cast<__m128i *>(out+1), t1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + insert_line_feed16(t1, location_end - 16)); + out[16] = static_cast(_mm_extract_epi8(t1, 15)); + out += 17; + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), t1); + out += 16; + } + if (location_end >= 32 && location_end < 48) { + // We can store or extract store. See below. + //_mm_storeu_si128(reinterpret_cast<__m128i *>(out+1), t2); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + insert_line_feed16(t2, location_end - 32)); + out[16] = static_cast(_mm_extract_epi8(t2, 15)); + out += 17; + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), t2); + out += 16; + } + if (location_end >= 48) { + // We can store or extract store. See below. + //_mm_storeu_si128(reinterpret_cast<__m128i *>(out+1), t3); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + insert_line_feed16(t3, location_end - 48)); + out[16] = static_cast(_mm_extract_epi8(t3, 15)); + out += 17; + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), t3); + out += 16; + } + offset = to_move; + } else { + + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), t0); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out + 16), t1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out + 32), t2); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out + 48), t3); + offset += 64; + out += 64; + } + } else { // slow path + // could be optimized + alignas(64) uint8_t buffer[64]; + _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer), t0); + _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + 16), t1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + 32), t2); + _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer + 48), t3); + std::memcpy(out, buffer, 64); + size_t out_pos = 0; + size_t local_offset = offset; + for (size_t j = 0; j < 64;) { + if (local_offset == line_length) { + out[out_pos++] = '\n'; + local_offset = 0; + } + out[out_pos++] = buffer[j++]; + local_offset++; + } + offset = local_offset; + out += out_pos; + } + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), t0); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out + 16), t1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out + 32), t2); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out + 48), t3); + out += 64; + } + } + for (; i + 16 <= srclen; i += 12) { + + __m128i in = _mm_loadu_si128(reinterpret_cast(input + i)); + + // bytes from groups A, B and C are needed in separate 32-bit lanes + // in = [DDDD|CCCC|BBBB|AAAA] + // + // an input triplet has layout + // [????????|ccdddddd|bbbbcccc|aaaaaabb] + // byte 3 byte 2 byte 1 byte 0 -- byte 3 comes from the next + // triplet + // + // shuffling changes the order of bytes: 1, 0, 2, 1 + // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] + // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ + // processed bits + in = _mm_shuffle_epi8(in, shuf); + + // unpacking + + // t0 = [0000cccc|cc000000|aaaaaa00|00000000] + const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); + // t1 = [00000000|00cccccc|00000000|00aaaaaa] + // (c * (1 << 10), a * (1 << 6)) >> 16 (note: an unsigned + // multiplication) + const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); + + // t2 = [00000000|00dddddd|000000bb|bbbb0000] + const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); + // t3 = [00dddddd|00000000|00bbbbbb|00000000]( + // (d * (1 << 8), b * (1 << 4)) + const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); + + // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 + const __m128i indices = _mm_or_si128(t1, t3); + + const __m128i T0 = lookup_pshufb_improved(indices); + + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), T0); + + if (use_lines) { + if (line_length >= 16) { // fast path + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), T0); + if (offset + 16 > line_length) { + size_t location_end = line_length - offset; + size_t to_move = 16 - location_end; + std::memmove(out + location_end + 1, out + location_end, to_move); + out[location_end] = '\n'; + offset = to_move; + out += 16 + 1; + } else { + offset += 16; + out += 16; + } + } else { // slow path + // could be optimized + uint8_t buffer[16]; + _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer), T0); + size_t out_pos = 0; + size_t local_offset = offset; + for (size_t j = 0; j < 16;) { + if (local_offset == line_length) { + out[out_pos++] = '\n'; + local_offset = 0; + } + out[out_pos++] = buffer[j++]; + local_offset++; + } + offset = local_offset; + out += out_pos; + } + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), T0); + out += 16; + } + } + return ((char *)out - (char *)dst) + + scalar::base64::tail_encode_base64_impl( + (char *)out, src + i, srclen - i, options, line_length, offset); +} + +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + return encode_base64_impl(dst, src, srclen, options); +} + +// --- decoding ----------------------------------------------- + +static simdutf_really_inline void compress(__m128i data, uint16_t mask, + char *output) { + if (mask == 0) { + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); + return; + } + + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + + __m128i shufmask = _mm_set_epi64x(tables::base64::thintable_epi8[mask2], + tables::base64::thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(data, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = _mm_loadu_si128(reinterpret_cast( + tables::base64::pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); +} + +static simdutf_really_inline void base64_decode(char *out, __m128i str) { + // credit: aqrit + + const __m128i pack_shuffle = + _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); + + const __m128i t0 = _mm_maddubs_epi16(str, _mm_set1_epi32(0x01400140)); + const __m128i t1 = _mm_madd_epi16(t0, _mm_set1_epi32(0x00011000)); + const __m128i t2 = _mm_shuffle_epi8(t1, pack_shuffle); + // Store the output: + // this writes 16 bytes, but we only need 12. + _mm_storeu_si128((__m128i *)out, t2); +} + +// decode 64 bytes and output 48 bytes +static inline void base64_decode_block(char *out, const char *src) { + base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); + base64_decode(out + 12, + _mm_loadu_si128(reinterpret_cast(src + 16))); + base64_decode(out + 24, + _mm_loadu_si128(reinterpret_cast(src + 32))); + base64_decode(out + 36, + _mm_loadu_si128(reinterpret_cast(src + 48))); +} + +static inline void base64_decode_block_safe(char *out, const char *src) { + base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); + base64_decode(out + 12, + _mm_loadu_si128(reinterpret_cast(src + 16))); + base64_decode(out + 24, + _mm_loadu_si128(reinterpret_cast(src + 32))); + char buffer[16]; + base64_decode(buffer, + _mm_loadu_si128(reinterpret_cast(src + 48))); + std::memcpy(out + 36, buffer, 12); +} + +// --- decoding - base64 class -------------------------------- + +class block64 { + __m128i chunks[4]; + +public: + // The caller of this function is responsible to ensure that there are 64 + // bytes available from reading at src. + simdutf_really_inline block64(const char *src) { + chunks[0] = _mm_loadu_si128(reinterpret_cast(src)); + chunks[1] = _mm_loadu_si128(reinterpret_cast(src + 16)); + chunks[2] = _mm_loadu_si128(reinterpret_cast(src + 32)); + chunks[3] = _mm_loadu_si128(reinterpret_cast(src + 48)); + } + +public: + // The caller of this function is responsible to ensure that there are 128 + // bytes available from reading at src. The data is read into a block64 + // structure. + simdutf_really_inline block64(const char16_t *src) { + const auto m1 = _mm_loadu_si128(reinterpret_cast(src)); + const auto m2 = _mm_loadu_si128(reinterpret_cast(src + 8)); + const auto m3 = + _mm_loadu_si128(reinterpret_cast(src + 16)); + const auto m4 = + _mm_loadu_si128(reinterpret_cast(src + 24)); + const auto m5 = + _mm_loadu_si128(reinterpret_cast(src + 32)); + const auto m6 = + _mm_loadu_si128(reinterpret_cast(src + 40)); + const auto m7 = + _mm_loadu_si128(reinterpret_cast(src + 48)); + const auto m8 = + _mm_loadu_si128(reinterpret_cast(src + 56)); + chunks[0] = _mm_packus_epi16(m1, m2); + chunks[1] = _mm_packus_epi16(m3, m4); + chunks[2] = _mm_packus_epi16(m5, m6); + chunks[3] = _mm_packus_epi16(m7, m8); + } + +public: + simdutf_really_inline void copy_block(char *output) { + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), chunks[0]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 16), chunks[1]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 32), chunks[2]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 48), chunks[3]); + } + +public: + simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { + if (is_power_of_two(mask)) { + return compress_block_single(mask, output); + } + + uint64_t nmask = ~mask; + compress(chunks[0], uint16_t(mask), output); + compress(chunks[1], uint16_t(mask >> 16), + output + count_ones(nmask & 0xFFFF)); + compress(chunks[2], uint16_t(mask >> 32), + output + count_ones(nmask & 0xFFFFFFFF)); + compress(chunks[3], uint16_t(mask >> 48), + output + count_ones(nmask & 0xFFFFFFFFFFFFULL)); + return count_ones(nmask); + } + +private: + simdutf_really_inline size_t compress_block_single(uint64_t mask, + char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + switch (pos64 >> 4) { + case 0b00: { + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(chunks[0], sh); + + _mm_storeu_si128((__m128i *)(output + 0 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 1 * 16 - 1), chunks[1]); + _mm_storeu_si128((__m128i *)(output + 2 * 16 - 1), chunks[2]); + _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); + } break; + case 0b01: { + _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(chunks[1], sh); + + _mm_storeu_si128((__m128i *)(output + 1 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 2 * 16 - 1), chunks[2]); + _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); + } break; + case 0b10: { + _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); + _mm_storeu_si128((__m128i *)(output + 1 * 16), chunks[1]); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(chunks[2], sh); + + _mm_storeu_si128((__m128i *)(output + 2 * 16), compressed); + _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); + } break; + case 0b11: { + _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); + _mm_storeu_si128((__m128i *)(output + 1 * 16), chunks[1]); + _mm_storeu_si128((__m128i *)(output + 2 * 16), chunks[2]); + + const __m128i v0 = _mm_set1_epi8(char(pos - 1)); + const __m128i v1 = + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const __m128i v2 = _mm_cmpgt_epi8(v1, v0); + const __m128i sh = _mm_sub_epi8(v1, v2); + const __m128i compressed = _mm_shuffle_epi8(chunks[3], sh); + + _mm_storeu_si128((__m128i *)(output + 3 * 16), compressed); + } break; + } + + return 63; + } + +public: + template + simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { + uint32_t err0 = 0; + uint32_t err1 = 0; + uint32_t err2 = 0; + uint32_t err3 = 0; + uint64_t m0 = to_base64_mask( + &chunks[0], &err0); + uint64_t m1 = to_base64_mask( + &chunks[1], &err1); + uint64_t m2 = to_base64_mask( + &chunks[2], &err2); + uint64_t m3 = to_base64_mask( + &chunks[3], &err3); + if (!ignore_garbage) { + *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | + ((uint64_t)err3 << 48); + } + return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); + } + +private: + template + simdutf_really_inline uint16_t to_base64_mask(__m128i *src, uint32_t *error) { + const __m128i ascii_space_tbl = + _mm_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, + 0x0, 0xc, 0xd, 0x0, 0x0); + // credit: aqrit + __m128i delta_asso; + if (default_or_url) { + delta_asso = + _mm_setr_epi8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16); + } else if (base64_url) { + delta_asso = _mm_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); + } else { + delta_asso = + _mm_setr_epi8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); + } + __m128i delta_values; + if (default_or_url) { + delta_values = _mm_setr_epi8( + uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0x13), + uint8_t(0x04), uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), + uint8_t(0xB9), uint8_t(0x00), uint8_t(0xFF), uint8_t(0x11), + uint8_t(0xFF), uint8_t(0xBF), uint8_t(0x10), uint8_t(0xB9)); + + } else if (base64_url) { + delta_values = _mm_setr_epi8(0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), + uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), + 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), + uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); + } else { + delta_values = + _mm_setr_epi8(int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)); + } + __m128i check_asso; + if (default_or_url) { + check_asso = + _mm_setr_epi8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x03, 0x07, 0x0B, 0x0E, 0x0B, 0x06); + } else if (base64_url) { + check_asso = _mm_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); + } else { + check_asso = + _mm_setr_epi8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); + } + __m128i check_values; + if (default_or_url) { + check_values = _mm_setr_epi8( + uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xD5), uint8_t(0xA6), + uint8_t(0xB5), uint8_t(0xA1), uint8_t(0x00), uint8_t(0x80), + uint8_t(0x00), uint8_t(0x80), uint8_t(0x00), uint8_t(0x80)); + } else if (base64_url) { + check_values = _mm_setr_epi8(uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), + uint8_t(0xB6), uint8_t(0xA6), uint8_t(0xB5), + uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, + uint8_t(0x80), 0x0, uint8_t(0x80)); + } else { + check_values = + _mm_setr_epi8(int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)); + } + const __m128i shifted = _mm_srli_epi32(*src, 3); + + __m128i delta_hash = + _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, *src), shifted); + if (default_or_url) { + delta_hash = _mm_and_si128(delta_hash, _mm_set1_epi8(0xf)); + } + const __m128i check_hash = + _mm_avg_epu8(_mm_shuffle_epi8(check_asso, *src), shifted); + + const __m128i out = + _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), *src); + const __m128i chk = + _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), *src); + const int mask = _mm_movemask_epi8(chk); + if (!ignore_garbage && mask) { + __m128i ascii_space = + _mm_cmpeq_epi8(_mm_shuffle_epi8(ascii_space_tbl, *src), *src); + *error = (mask ^ _mm_movemask_epi8(ascii_space)); + } + *src = out; + return (uint16_t)mask; + } + +public: + simdutf_really_inline void base64_decode_block(char *out) { + base64_decode(out, chunks[0]); + base64_decode(out + 12, chunks[1]); + base64_decode(out + 24, chunks[2]); + base64_decode(out + 36, chunks[3]); + } + +public: + simdutf_really_inline void base64_decode_block_safe(char *out) { + base64_decode(out, chunks[0]); + base64_decode(out + 12, chunks[1]); + base64_decode(out + 24, chunks[2]); + char buffer[16]; + base64_decode(buffer, chunks[3]); + std::memcpy(out + 36, buffer, 12); + } +}; +/* end file src/westmere/sse_base64.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 + +} // unnamed namespace +} // namespace westmere +} // namespace simdutf + +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace westmere { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); + +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} + +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; +} + +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8_validation { + +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); +} + +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; + } + + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + } + } + + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8_validation { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); +} + +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); +} + +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); +} + +} // namespace utf8_validation +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace ascii_validation { + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); + + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + return false; + } + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + return in.is_ascii(); +} + +} // namespace ascii_validation +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8_to_utf16 { + +using namespace simd; + +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8_to_utf16 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } + + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 2; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + + size_t iterations = 0; + size_t pos = 0; + size_t count = 0; + for (; pos + N <= size; pos += N) { + const auto input = + vector_i8::load(reinterpret_cast(in + pos)); + + const auto continuation = input > int8_t(-65); + const auto utf_4bytes = vector_u8(input.value) >= uint8_t(240); + + local -= vector_u8(continuation); + local -= vector_u8(utf_4bytes); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8_to_utf32 { + +using namespace simd; + +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8_to_utf32 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +/* begin file src/generic/utf32.h */ +#include + +namespace simdutf { +namespace westmere { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, + size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + const auto v_0000007f = vector_u32::splat(0x0000007f); + const auto v_000007ff = vector_u32::splat(0x000007ff); + const auto v_0000ffff = vector_u32::splat(0x0000ffff); +#else + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + const auto one = vector_u32::splat(1); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use vector of uint32 counters, this is why this limit is used + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in0 > v_0000007f); + acc -= as_vector_u32(in1 > v_0000007f); + acc -= as_vector_u32(in2 > v_0000007f); + acc -= as_vector_u32(in3 > v_0000007f); + + acc -= as_vector_u32(in0 > v_000007ff); + acc -= as_vector_u32(in1 > v_000007ff); + acc -= as_vector_u32(in2 > v_000007ff); + acc -= as_vector_u32(in3 > v_000007ff); + + acc -= as_vector_u32(in0 > v_0000ffff); + acc -= as_vector_u32(in1 > v_0000ffff); + acc -= as_vector_u32(in2 > v_0000ffff); + acc -= as_vector_u32(in3 > v_0000ffff); +#else + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in > v_0000007f); + acc -= as_vector_u32(in > v_000007ff); + acc -= as_vector_u32(in > v_0000ffff); +#else + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf16 { + +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; + } + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} + +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); + + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; + } + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} + +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; + + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } + + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf16.h */ +/* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf16 { + +using namespace simd; + +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline result +utf8_length_from_utf16_with_replacement(const char16_t *in, size_t size) { + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + if (N + 1 > size) { + return scalar::utf16::utf8_length_from_utf16_with_replacement( + in, size); + } // special case for short inputs + size_t pos = 0; + bool any_surrogates = false; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + auto v_mismatched_count = vector_u16::zero(); + + size_t count = 0; + size_t mismatched_count = 0; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + if (scalar::utf16::is_low_surrogate(in[0])) { + any_surrogates = true; + mismatched_count += 1; + } + + for (; pos < (size - 1) / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + if (is_surrogate.to_bitmask() != 0 || + scalar::utf16::is_low_surrogate(in[pos + N])) { + any_surrogates = true; + auto input_next = + vector_u16::load(reinterpret_cast(in + pos + 1)); + if simdutf_constexpr (!match_system(big_endian)) { + input_next = input_next.swap_bytes(); + } + + const auto lb_masked = input & (0xfc00); + const auto block_masked = input_next & (0xfc00); + + const auto lb_is_high = lb_masked == (0xd800); + const auto block_is_low = block_masked == (0xdc00); + + const auto illseq = min(vector_u16(lb_is_high ^ block_is_low), one); + + v_mismatched_count += illseq; + } + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + mismatched_count += v_mismatched_count.sum(); + v_mismatched_count = vector_u16::zero(); + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + mismatched_count += v_mismatched_count.sum(); + } + + if (scalar::utf16::is_low_surrogate(in[pos])) { + any_surrogates = true; + if (!scalar::utf16::is_high_surrogate(in[pos - 1])) { + mismatched_count -= 1; + count += 2; + pos += 1; + } + } + count += pos; + count += mismatched_count; + if (scalar::utf16::is_high_surrogate(in[pos - 1])) { + any_surrogates = true; + if (pos == size) { + count += 2; + } else if (scalar::utf16::is_low_surrogate(in[pos])) { + pos += 1; + count += 2; + } + } + result scalar_result = + scalar::utf16::utf8_length_from_utf16_with_replacement( + in + pos, size - pos); + return {any_surrogates ? SURROGATE : scalar_result.error, + count + scalar_result.count}; +} + +} // namespace utf16 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ +#endif // SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +template +const result validate_utf16_as_ascii_with_errors(const char16_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + size_t pos = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input_vec( + reinterpret_cast(input + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input_vec.swap_bytes(); + } + uint64_t matches = input_vec.lteq(uint16_t(0x7f)); + if (~matches) { + // Found a match, return the first one + int index = trailing_zeroes(~matches) / 2; + return result(error_code::TOO_LARGE, pos + index); + } + } + + // Scalar tail + while (pos < size) { + + char16_t v = scalar::utf16::swap_if_needed(input[pos]); + if (v > 0x7F) { + return result(error_code::TOO_LARGE, pos); + } + pos++; + } + return result(error_code::SUCCESS, size); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, + + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace westmere +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf32.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace utf32 { + +simdutf_really_inline bool validate(const char32_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return true; + } + + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff); + const auto offset = vector_u32::splat(0xffff2000); + const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); + auto currentmax = vector_u32::zero(); + auto currentoffsetmax = vector_u32::zero(); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if simdutf_constexpr (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + currentmax = max(currentmax, in); + currentoffsetmax = max(currentoffsetmax, in + offset); + input += N; + } + + const auto too_large = currentmax > standardmax; + if (too_large.any()) { + return false; + } + + const auto surrogate = currentoffsetmax > standardoffsetmax; + if (surrogate.any()) { + return false; + } + + return scalar::utf32::validate(input, end - input); +} + +simdutf_really_inline result validate_with_errors(const char32_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return result(error_code::SUCCESS, 0); + } + + const char32_t *start = input; + const char32_t *end = input + size; + + using vector_u32 = simd32; + + const auto standardmax = vector_u32::splat(0x10ffff + 1); + const auto surrogate_mask = vector_u32::splat(0xfffff800); + const auto surrogate_byte = vector_u32::splat(0x0000d800); + + constexpr size_t N = vector_u32::ELEMENTS; + + while (input + N < end) { + auto in = vector_u32(input); + if simdutf_constexpr (!match_system(endianness::BIG)) { + in.swap_bytes(); + } + + const auto too_large = in >= standardmax; + const auto surrogate = (in & surrogate_mask) == surrogate_byte; + + const auto combined = too_large | surrogate; + if (simdutf_unlikely(combined.any())) { + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; + } + + input += N; + } + + const size_t consumed = input - start; + auto sr = scalar::utf32::validate_with_errors(input, end - input); + sr.count += consumed; + + return sr; +} + +} // namespace utf32 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/validate_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/generic/base64.h */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ +namespace simdutf { +namespace westmere { +namespace { +namespace base64 { + +/* + The following template function implements API for Base64 decoding. + + An implementation is responsible for providing the `block64` type and + associated methods that perform actual conversion. Please refer + to any vectorized implementation to learn the API of these procedures. +*/ +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = + default_or_url ? tables::base64::to_base64_default_or_url_value + : (base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + auto ri = simdutf::scalar::base64::find_end(src, srclen, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + srclen = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + char *end_of_safe_64byte_zone = + dst == nullptr + ? nullptr + : ((srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 + : dst); + + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; + + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b(src); + src += 64; + uint64_t error = 0; + const uint64_t badcharmask = + b.to_base64_mask(&error); + if (!ignore_garbage && error) { + src -= 64; + const size_t error_offset = trailing_zeroes(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + bufferptr += b.compress_block(badcharmask, bufferptr); + } else if (bufferptr != buffer) { + b.copy_block(bufferptr); + bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + b.base64_decode_block_safe(dst); + } else { + b.base64_decode_block(dst); + } + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 2); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!ignore_garbage && + (!scalar::base64::is_eight_byte(*src) || val > 64)) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; +#if !SIMDUTF_IS_BIG_ENDIAN + triple = scalar::u32_swap_bytes(triple); +#endif + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result( + r, size_t(src - srcinit), size_t(dst - dstinit), equallocation, + full_input_length, last_chunk_options); + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(srcinit + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(srcinit + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; + } + if (!ignore_garbage && equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} + +} // namespace base64 +} // unnamed namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/base64.h */ +/* begin file src/generic/find.h */ +namespace simdutf { +namespace westmere { +namespace { +namespace util { + +simdutf_really_inline const char *find(const char *start, const char *end, + char character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + // Align the start pointer to 64 bytes + uintptr_t misalignment = reinterpret_cast(start) % 64; + if (misalignment != 0) { + size_t adjustment = 64 - misalignment; + if (size_t(std::distance(start, end)) < adjustment) { + adjustment = std::distance(start, end); + } + for (size_t i = 0; i < adjustment; i++) { + if (start[i] == character) { + return start + i; + } + } + start += adjustment; + } + + // Main loop for 64-byte aligned data + for (; std::distance(start, end) >= 64; start += 64) { + simd8x64 input(reinterpret_cast(start)); + uint64_t matches = input.eq(uint8_t(character)); + if (matches != 0) { + // Found a match, return the first one + int index = trailing_zeroes(matches); + return start + index; + } + } + return std::find(start, end, character); +} + +simdutf_really_inline const char16_t * +find(const char16_t *start, const char16_t *end, char16_t character) noexcept { + // Handle empty or invalid range + if (start >= end) + return end; + // Align the start pointer to 64 bytes if misalignment is even + uintptr_t misalignment = reinterpret_cast(start) % 64; + if (misalignment != 0 && misalignment % 2 == 0) { + size_t adjustment = (64 - misalignment) / sizeof(char16_t); + if (size_t(std::distance(start, end)) < adjustment) { + adjustment = std::distance(start, end); + } + for (size_t i = 0; i < adjustment; i++) { + if (start[i] == character) { + return start + i; + } + } + start += adjustment; + } + + // Main loop for 64-byte aligned data + for (; std::distance(start, end) >= 32; start += 32) { + simd16x32 input(reinterpret_cast(start)); + uint64_t matches = input.eq(uint16_t(character)); + if (matches != 0) { + // Found a match, return the first one + int index = trailing_zeroes(matches) / 2; + return start + index; + } + } + return std::find(start, end, character); +} + +} // namespace util +} // namespace +} // namespace westmere +} // namespace simdutf +/* end file src/generic/find.h */ +#endif // SIMDUTF_FEATURE_BASE64 + +// +// Implementation-specific overrides +// + +namespace simdutf { +namespace westmere { + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + + int out = 0; + uint32_t utf16_err = (length % 2); + uint32_t utf32_err = (length % 4); + uint32_t ends_with_high = 0; + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + const __m128i standardmax = _mm_set1_epi32(0x10ffff); + const __m128i offset = _mm_set1_epi32(0xffff2000); + const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); + __m128i currentmax = _mm_setzero_si128(); + __m128i currentoffsetmax = _mm_setzero_si128(); + + utf8_checker c{}; + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + // utf8 checks + c.check_next_input(in); + + // utf16le checks + auto in0 = simd16(in.chunks[0]); + auto in1 = simd16(in.chunks[1]); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const auto packed1 = simd16::pack(t0, t1); + auto in2 = simd16(in.chunks[2]); + auto in3 = simd16(in.chunks[3]); + const auto t2 = in2.shr<8>(); + const auto t3 = in3.shr<8>(); + const auto packed2 = simd16::pack(t2, t3); + + const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8; + const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8; + const uint32_t surrogates_bitmask = + (surrogates_wordmask_hi.to_bitmask() << 16) | + surrogates_wordmask_lo.to_bitmask(); + const auto vL_lo = (packed1 & v_fc) == v_dc; + const auto vL_hi = (packed2 & v_fc) == v_dc; + const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask(); + const uint32_t H = L ^ surrogates_bitmask; + utf16_err |= (((H << 1) | ends_with_high) != L); + ends_with_high = (H & 0x80000000) != 0; + + // utf32le checks + currentmax = _mm_max_epu32(in.chunks[0], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[1], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[2], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[3], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax); + + reader.advance(); + } + + uint8_t block[64]{}; + size_t idx = reader.block_index(); + std::memcpy(block, &input[idx], length - idx); + simd::simd8x64 in(block); + c.check_next_input(in); + + // utf16le last block check + auto in0 = simd16(in.chunks[0]); + auto in1 = simd16(in.chunks[1]); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const auto packed1 = simd16::pack(t0, t1); + auto in2 = simd16(in.chunks[2]); + auto in3 = simd16(in.chunks[3]); + const auto t2 = in2.shr<8>(); + const auto t3 = in3.shr<8>(); + const auto packed2 = simd16::pack(t2, t3); + + const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8; + const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8; + const uint32_t surrogates_bitmask = + (surrogates_wordmask_hi.to_bitmask() << 16) | + surrogates_wordmask_lo.to_bitmask(); + const auto vL_lo = (packed1 & v_fc) == v_dc; + const auto vL_hi = (packed2 & v_fc) == v_dc; + const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask(); + const uint32_t H = L ^ surrogates_bitmask; + utf16_err |= (((H << 1) | ends_with_high) != L); + // this is required to check for last byte ending in high and end of input + // is reached + ends_with_high = (H & 0x80000000) != 0; + utf16_err |= ends_with_high; + + // utf32le last block check + currentmax = _mm_max_epu32(in.chunks[0], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[1], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[2], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[3], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax); + + reader.advance(); + + c.check_eof(); + bool is_valid_utf8 = !c.errors(); + __m128i is_zero = + _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); + utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0); + + is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0); + if (is_valid_utf8) { + out |= encoding_type::UTF8; + } + if (utf16_err == 0) { + out |= encoding_type::UTF16_LE; + } + if (utf32_err == 0) { + out |= encoding_type::UTF32_LE; + } + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return westmere::utf8_validation::generic_validate_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return westmere::utf8_validation::generic_validate_utf8_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return westmere::ascii_validation::generic_validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return westmere::ascii_validation::generic_validate_ascii_with_errors(buf, + len); +} +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return westmere::utf16::validate_utf16_as_ascii_with_errors< + endianness::LITTLE>(buf, len) + .error == SUCCESS; +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return westmere::utf16::validate_utf16_as_ascii_with_errors( + buf, len) + .error == SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid UTF-16. protect the implementation from + // handling nullptr + return true; + } + const auto res = + westmere::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { + return false; + } + + if (res.count == len) + return true; + + return scalar::utf16::validate(buf + res.count, + len - res.count); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid UTF-16. protect the implementation from + // handling nullptr + return true; + } + const auto res = + westmere::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { + return false; + } + + if (res.count == len) + return true; + + return scalar::utf16::validate(buf + res.count, + len - res.count); +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + const result res = + westmere::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + const result scalar_res = + scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + const result res = + westmere::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16fix_sse(input, len, output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16fix_sse(input, len, output); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + return utf32::validate(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + return utf32::validate_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + + std::pair ret = + sse_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; + + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + sse_convert_latin1_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; + } + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + sse_convert_latin1_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; + } + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + sse_convert_latin1_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; + } + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return westmere::utf8_to_latin1::convert_valid(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + sse_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + sse_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + sse_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + sse_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: we could provide an optimized function. + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: we could provide an optimized function. + return convert_utf16le_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + sse_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + sse_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + westmere::sse_convert_utf16_to_utf8_with_errors( + buf, len, utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + westmere::sse_convert_utf16_to_utf8_with_errors( + buf, len, utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + sse_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + // if (ret.first != buf + len) { + if (ret.first < buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + westmere::sse_convert_utf32_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: we could provide an optimized function. + return convert_utf32_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + sse_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + westmere::sse_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + sse_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + sse_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + westmere::sse_convert_utf16_to_utf32_with_errors( + buf, len, utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + westmere::sse_convert_utf16_to_utf32_with_errors( + buf, len, utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf32_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + sse_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + sse_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + westmere::sse_convert_utf32_to_utf16_with_errors( + buf, len, utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + westmere::sse_convert_utf32_to_utf16_with_errors( + buf, len, utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return utf8::count_code_points_bytemask(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_bytemask(input, + length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_bytemask(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t len) const noexcept { + const uint8_t *str = reinterpret_cast(input); + size_t answer = len / sizeof(__m128i) * sizeof(__m128i); + size_t i = 0; + if (answer >= 2048) { // long strings optimization + __m128i two_64bits = _mm_setzero_si128(); + while (i + sizeof(__m128i) <= len) { + __m128i runner = _mm_setzero_si128(); + size_t iterations = (len - i) / sizeof(__m128i); + if (iterations > 255) { + iterations = 255; + } + size_t max_i = i + iterations * sizeof(__m128i) - sizeof(__m128i); + for (; i + 4 * sizeof(__m128i) <= max_i; i += 4 * sizeof(__m128i)) { + __m128i input1 = _mm_loadu_si128((const __m128i *)(str + i)); + __m128i input2 = + _mm_loadu_si128((const __m128i *)(str + i + sizeof(__m128i))); + __m128i input3 = + _mm_loadu_si128((const __m128i *)(str + i + 2 * sizeof(__m128i))); + __m128i input4 = + _mm_loadu_si128((const __m128i *)(str + i + 3 * sizeof(__m128i))); + __m128i input12 = + _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input1), + _mm_cmpgt_epi8(_mm_setzero_si128(), input2)); + __m128i input34 = + _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input3), + _mm_cmpgt_epi8(_mm_setzero_si128(), input4)); + __m128i input1234 = _mm_add_epi8(input12, input34); + runner = _mm_sub_epi8(runner, input1234); + } + for (; i <= max_i; i += sizeof(__m128i)) { + __m128i more_input = _mm_loadu_si128((const __m128i *)(str + i)); + runner = _mm_sub_epi8(runner, + _mm_cmpgt_epi8(_mm_setzero_si128(), more_input)); + } + two_64bits = + _mm_add_epi64(two_64bits, _mm_sad_epu8(runner, _mm_setzero_si128())); + } + answer += + _mm_extract_epi64(two_64bits, 0) + _mm_extract_epi64(two_64bits, 1); + } else if (answer > 0) { // short string optimization + for (; i + 2 * sizeof(__m128i) <= len; i += 2 * sizeof(__m128i)) { + __m128i latin = _mm_loadu_si128((const __m128i *)(input + i)); + uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin); + answer += count_ones(non_ascii); + latin = _mm_loadu_si128((const __m128i *)(input + i) + 1); + non_ascii = (uint16_t)_mm_movemask_epi8(latin); + answer += count_ones(non_ascii); + } + for (; i + sizeof(__m128i) <= len; i += sizeof(__m128i)) { + __m128i latin = _mm_loadu_si128((const __m128i *)(input + i)); + uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin); + answer += count_ones(non_ascii); + } + } + return answer + scalar::latin1::utf8_length_from_latin1( + reinterpret_cast(str + i), len - i); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8_bytemask(input, length); +} +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_with_replacement( + input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_with_replacement( + input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return utf32::utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const __m128i v_00000000 = _mm_setzero_si128(); + const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + __m128i in = _mm_loadu_si128((__m128i *)(input + pos)); + const __m128i surrogate_bytemask = + _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000); + const uint16_t surrogate_bitmask = + static_cast(_mm_movemask_epi8(surrogate_bytemask)); + size_t surrogate_count = (16 - count_ones(surrogate_bitmask)) / 4; + count += 4 + surrogate_count; + } + return count + + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return base64::compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64_impl(output, input, length, options, + line_length); + + } else { + return encode_base64_impl(output, input, length, options, + line_length); + } +} + +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + return util::find(start, end, character); +} + +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + return util::find(start, end, character); +} +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace westmere +} // namespace simdutf + +/* begin file src/simdutf/westmere/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +#undef SIMDUTF_SIMD_HAS_BYTEMASK +/* end file src/simdutf/westmere/end.h */ +/* end file src/westmere/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_LASX +/* begin file src/lasx/implementation.cpp */ +/* begin file src/simdutf/lasx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lasx" +// #define SIMDUTF_IMPLEMENTATION lasx +#define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 + +#if SIMDUTF_CAN_ALWAYS_RUN_LASX +// nothing needed. +#else +SIMDUTF_TARGET_LASX +#endif +/* end file src/simdutf/lasx/begin.h */ +namespace simdutf { +namespace lasx { +namespace { +#ifndef SIMDUTF_LASX_H + #error "lasx.h must be included" +#endif +using namespace simd; + +#if SIMDUTF_FEATURE_UTF8 +// convert vmskltz/vmskgez/vmsknz to +// simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index +const uint8_t lasx_1_2_utf8_bytes_mask[] = { + 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, + 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, + 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, + 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, + 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, + 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, + 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104, + 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63, + 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148, + 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147, + 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152, + 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143, + 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164, + 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163, + 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168, + 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, + 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, + 255}; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 +simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { + return __lsx_vshuf4i_b(vec, 0b10110001); +} +simdutf_really_inline __m256i lasx_swap_bytes(__m256i vec) { + return __lasx_xvshuf4i_b(vec, 0b10110001); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + return input.is_ascii(); +} +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + return is_third_byte ^ is_fourth_byte; +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) +// common functions for utf8 conversions +simdutf_really_inline __m128i convert_utf8_3_byte_to_utf16(__m128i in) { + // Low half contains 10bbbbbb|10cccccc + // High half contains 1110aaaa|1110aaaa + const v16u8 sh = {2, 1, 5, 4, 8, 7, 11, 10, 0, 0, 3, 3, 6, 6, 9, 9}; + const v8u16 v0fff = {0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff}; + + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, (__m128i)sh); + // 1110aaaa => aaaa0000 + __m128i perm_high = __lsx_vslli_b(__lsx_vbsrl_v(perm, 8), 4); + // 10bbbbbb 10cccccc => 0010bbbb bbcccccc + __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), /* perm >> 2*/ + perm, __lsx_vrepli_h(0x3f) /* 0x003f */); + // 0010bbbb bbcccccc => aaaabbbb bbcccccc + composed = __lsx_vbitsel_v(perm_high, composed, (__m128i)v0fff); + + return composed; +} + +simdutf_really_inline __m128i convert_utf8_2_byte_to_utf16(__m128i in) { + // 10bbbbb 110aaaaa => 00bbbbb 000aaaaa + __m128i composed = __lsx_vand_v(in, __lsx_vldi(0x3f)); + // 00bbbbbb 000aaaaa => 00000aaa aabbbbbb + composed = __lsx_vbitsel_v( + __lsx_vsrli_h(__lsx_vslli_h(composed, 8), 2), /* (aaaaa << 8) >> 2 */ + __lsx_vsrli_h(composed, 8), /* bbbbbb >> 8 */ + __lsx_vrepli_h(0x3f)); /* 0x003f */ + return composed; +} + +simdutf_really_inline __m128i +convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { + // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. + // This is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. + __m128i sh = + __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]), + 0); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000000 00bbbbbb + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_h(0x7f)); // 6 or 7 bits + // 1 byte: 00000000 00000000 + // 2 byte: 00000aaa aa000000 + __m128i v1f00 = lsx_splat_u16(0x1f00); + __m128i composed = __lsx_vsrli_h(__lsx_vand_v(perm, v1f00), 2); // 5 bits + // Combine with a shift right accumulate + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000aaa aabbbbbb + composed = __lsx_vadd_h(ascii, composed); + return composed; +} +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32) + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/lasx/lasx_validate_utf16.cpp */ +template +simd8 utf16_gather_high_bytes(const simd16 in0, + const simd16 in1) { + if (big_endian) { + const auto mask = simd16(0x00ff); + const auto t0 = in0 & mask; + const auto t1 = in1 & mask; + + return simd16::pack(t0, t1); + } else { + return simd16::pack_shifted_right<8>(in0, in1); + } +} +/* end file src/lasx/lasx_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/lasx/lasx_validate_utf32le.cpp */ +const char32_t *lasx_validate_utf32le(const char32_t *input, size_t size) { + const char32_t *end = input + size; + + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)input & 0x1F) && input < end) { + uint32_t word = *input++; + if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { + return nullptr; + } + } + + __m256i offset = lasx_splat_u32(0xffff2000); + __m256i standardoffsetmax = lasx_splat_u32(0xfffff7ff); + __m256i standardmax = lasx_splat_u32(0x10ffff); + __m256i currentmax = __lasx_xvldi(0x0); + __m256i currentoffsetmax = __lasx_xvldi(0x0); + + while (input + 8 < end) { + __m256i in = __lasx_xvld(reinterpret_cast(input), 0); + currentmax = __lasx_xvmax_wu(in, currentmax); + // 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF + currentoffsetmax = + __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); + input += 8; + } + __m256i is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); + if (__lasx_xbnz_v(is_zero)) { + return nullptr; + } + + is_zero = __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lasx_xbnz_v(is_zero)) { + return nullptr; + } + return input; +} + +const result lasx_validate_utf32le_with_errors(const char32_t *input, + size_t size) { + const char32_t *start = input; + const char32_t *end = input + size; + + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)input & 0x1F) && input < end) { + uint32_t word = *input; + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, input - start); + } + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, input - start); + } + input++; + } + + __m256i offset = lasx_splat_u32(0xffff2000); + __m256i standardoffsetmax = lasx_splat_u32(0xfffff7ff); + __m256i standardmax = lasx_splat_u32(0x10ffff); + __m256i currentmax = __lasx_xvldi(0x0); + __m256i currentoffsetmax = __lasx_xvldi(0x0); + + while (input + 8 < end) { + __m256i in = __lasx_xvld(reinterpret_cast(input), 0); + currentmax = __lasx_xvmax_wu(in, currentmax); + currentoffsetmax = + __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); + + __m256i is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); + if (__lasx_xbnz_v(is_zero)) { + return result(error_code::TOO_LARGE, input - start); + } + is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lasx_xbnz_v(is_zero)) { + return result(error_code::SURROGATE, input - start); + } + input += 8; + } + + return result(error_code::SUCCESS, input - start); +} +/* end file src/lasx/lasx_validate_utf32le.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lasx/lasx_convert_latin1_to_utf8.cpp */ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ + +std::pair +lasx_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const size_t safety_margin = 12; + const char *end = latin1_input + len; + + // We always write 16 bytes, of which more than the first 8 bytes + // are valid. A safety margin of 8 is more than sufficient. + while (end - latin1_input >= std::ptrdiff_t(16 + safety_margin)) { + __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); + uint32_t ascii_mask = __lsx_vpickve2gr_wu(__lsx_vmskgez_b(in8), 0); + if (ascii_mask == 0xFFFF) { + __lsx_vst(in8, utf8_output, 0); + utf8_output += 16; + latin1_input += 16; + continue; + } + // We just fallback on UTF-16 code. This could be optimized/simplified + // further. + __m256i in16 = __lasx_vext2xv_hu_bu(____m256i(in8)); + // 1. prepare 2-byte values + // input 8-bit word : [aabb|bbbb] x 16 + // expected output : [1100|00aa|10bb|bbbb] x 16 + // t0 = [0000|00aa|bbbb|bb00] + __m256i t0 = __lasx_xvslli_h(in16, 2); + // t1 = [0000|00aa|0000|0000] + __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x300)); + // t3 = [0000|00aa|00bb|bbbb] + __m256i t2 = __lasx_xvbitsel_v(t1, in16, __lasx_xvrepli_h(0x3f)); + // t4 = [1100|00aa|10bb|bbbb] + __m256i t3 = __lasx_xvor_v(t2, __lasx_xvreplgr2vr_h(uint16_t(0xc080))); + // merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = __lasx_xvsle_hu(in16, __lasx_xvrepli_h(0x7F)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t3, in16, one_byte_bytemask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[(ascii_mask & 0xFF)]][0]; + __m128i shuffle0 = __lsx_vld(row0 + 1, 0); + __m128i utf8_unpacked_lo = lasx_extracti128_lo(utf8_unpacked); + __m128i utf8_packed0 = + __lsx_vshuf_b(utf8_unpacked_lo, utf8_unpacked_lo, shuffle0); + __lsx_vst(utf8_packed0, utf8_output, 0); + utf8_output += row0[0]; + + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[(ascii_mask >> 8)]][0]; + __m128i shuffle1 = __lsx_vld(row1 + 1, 0); + __m128i utf8_unpacked_hi = lasx_extracti128_hi(utf8_unpacked); + __m128i utf8_packed1 = + __lsx_vshuf_b(utf8_unpacked_hi, utf8_unpacked_hi, shuffle1); + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; + + latin1_input += 16; + } // while + + return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); +} +/* end file src/lasx/lasx_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lasx/lasx_convert_latin1_to_utf16.cpp */ +std::pair +lasx_convert_latin1_to_utf16le(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + *utf16_output++ = uint8_t(*buf) & 0xFF; + buf++; + } + + while (end - buf >= 32) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); + + __m256i inlow = __lasx_vext2xv_hu_bu(in8); + __m256i in8_high = __lasx_xvpermi_q(in8, in8, 0b00000001); + __m256i inhigh = __lasx_vext2xv_hu_bu(in8_high); + __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); + __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); + + utf16_output += 32; + buf += 32; + } + + if (end - buf >= 16) { + __m128i zero = __lsx_vldi(0); + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i inlow = __lsx_vilvl_b(zero, in8); + __m128i inhigh = __lsx_vilvh_b(zero, in8); + __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); + __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); + + utf16_output += 16; + buf += 16; + } + return std::make_pair(buf, utf16_output); +} + +std::pair +lasx_convert_latin1_to_utf16be(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + *utf16_output++ = char16_t((uint16_t(*buf++) << 8)); + } + + __m256i zero = __lasx_xvldi(0); + while (end - buf >= 32) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); + + __m256i in8_shuf = __lasx_xvpermi_d(in8, 0b11011000); + + __m256i inlow = __lasx_xvilvl_b(in8_shuf, zero); + __m256i inhigh = __lasx_xvilvh_b(in8_shuf, zero); + __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); + __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); + utf16_output += 32; + buf += 32; + } + + if (end - buf >= 16) { + __m128i zero_128 = __lsx_vldi(0); + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i inlow = __lsx_vilvl_b(in8, zero_128); + __m128i inhigh = __lsx_vilvh_b(in8, zero_128); + __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); + __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); + utf16_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf16_output); +} +/* end file src/lasx/lasx_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lasx/lasx_convert_latin1_to_utf32.cpp */ +std::pair +lasx_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const char *end = buf + len; + + // LASX requires 32-byte alignment, otherwise performance will be degraded + while (((uint64_t)utf32_output & 0x1F) && buf < end) { + *utf32_output++ = ((uint32_t)*buf) & 0xFF; + buf++; + } + + while (end - buf >= 32) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); + + __m256i in32_0 = __lasx_vext2xv_wu_bu(in8); + __lasx_xvst(in32_0, reinterpret_cast(utf32_output), 0); + + __m256i in8_1 = __lasx_xvpermi_d(in8, 0b00000001); + __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); + __lasx_xvst(in32_1, reinterpret_cast(utf32_output), 32); + + __m256i in8_2 = __lasx_xvpermi_d(in8, 0b00000010); + __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); + __lasx_xvst(in32_2, reinterpret_cast(utf32_output), 64); + + __m256i in8_3 = __lasx_xvpermi_d(in8, 0b00000011); + __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); + __lasx_xvst(in32_3, reinterpret_cast(utf32_output), 96); + + utf32_output += 32; + buf += 32; + } + + if (end - buf >= 16) { + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, in8); + __m128i in16high = __lsx_vilvh_b(zero, in8); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + + __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); + __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); + __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); + __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); + + utf32_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf32_output); +} +/* end file src/lasx/lasx_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/lasx/lasx_convert_utf8_to_utf16.cpp */ +// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 16, usually 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { + __m128i zero = __lsx_vldi(0); + if simdutf_constexpr (match_system(big_endian)) { + __lsx_vst(__lsx_vilvl_b(zero, in), + reinterpret_cast(utf16_output), 0); + __lsx_vst(__lsx_vilvh_b(zero, in), + reinterpret_cast(utf16_output), 16); + } else { + __lsx_vst(__lsx_vilvl_b(in, zero), + reinterpret_cast(utf16_output), 0); + __lsx_vst(__lsx_vilvh_b(in, zero), + reinterpret_cast(utf16_output), 16); + } + utf16_output += 16; // We wrote 16 16-bit characters. + return 16; // We consumed 16 bytes. + } + + // 3 byte sequences are the next most common, as seen in CJK, which has long + // sequences of these. + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. + __m128i composed = convert_utf8_3_byte_to_utf16(in); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 4; // We wrote 4 16-bit characters. + return 12; // We consumed 12 bytes. + } + + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xAAAA) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte + // UTF-16 code units. + __m128i composed = convert_utf8_2_byte_to_utf16(in); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 8; // We wrote 6 16-bit characters. + return 16; // We consumed 12 bytes. + } + + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + const __m128i zero = __lsx_vldi(0); + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + __m128i composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + // Store + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 6; // We wrote 6 16-bit characters. + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // XXX: depending on the system scalar instructions might be faster. + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // 1 byte: 00000000 0ccccccc + // 2 byte: xx0bbbbb x0cccccc + // 3 byte: xxbbbbbb x0cccccc + __m128i lowperm = __lsx_vpickev_h(perm, perm); + // 1 byte: 00000000 00000000 + // 2 byte: 00000000 00000000 + // 3 byte: 00000000 1110aaaa + __m128i highperm = __lsx_vpickod_h(perm, perm); + // 3 byte: aaaa0000 00000000 + highperm = __lsx_vslli_h(highperm, 12); + // ASCII + // 1 byte: 00000000 0ccccccc + // 2+byte: 00000000 00cccccc + __m128i ascii = __lsx_vand_v(lowperm, __lsx_vrepli_h(0x7f)); + // 1 byte: 00000000 00000000 + // 2 byte: xx0bbbbb 00000000 + // 3 byte: xxbbbbbb 00000000 + __m128i middlebyte = __lsx_vand_v(lowperm, lsx_splat_u16(0xFF00)); + // 1 byte: 00000000 0ccccccc + // 2 byte: 0010bbbb bbcccccc + // 3 byte: 0010bbbb bbcccccc + __m128i composed = __lsx_vor_v(__lsx_vsrli_h(middlebyte, 2), ascii); + + __m128i v0fff = __lsx_vreplgr2vr_h(uint16_t(0xfff)); + // aaaabbbb bbcccccc + composed = __lsx_vbitsel_v(highperm, composed, v0fff); + + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 4; // We wrote 4 16-bit codepoints + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + __m128i expected_mask = + (__m128i)v16u8{0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, + 0xf8, 0xc0, 0xc0, 0xc0, 0x0, 0x0, 0x0, 0x0}; + __m128i expected = + (__m128i)v16u8{0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, + 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; + __m128i check = __lsx_vseq_b(__lsx_vand_v(in, expected_mask), expected); + if (__lsx_bz_b(check)) + return 12; + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but + // it is easier when we can assume they are all pairs. This version does + // not use the LUT, but 4 byte sequences are less common and the overhead + // of the extra memory access is less important than the early branch + // overhead in shorter sequences. + + // Swap byte pairs + // 10dddddd 10cccccc|10bbbbbb 11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + __m128i swap = lsx_swap_bytes(in); + // Shift left 2 bits + // cccccc00 dddddd00 xxxxxxxx bbbbbb00 + __m128i shift = __lsx_vslli_b(swap, 2); + // Create a magic number containing the low 2 bits of the trail surrogate + // and all the corrections needed to create the pair. UTF-8 4b prefix = + // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) + // surrogate high = +0x0000|0xD800 + // surrogate low = +0xDC00|0x0000 + // ------------------------------- + // = +0xDC00|0xE7C0 + __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xDC00E7C0)); + // Generate unadjusted trail surrogate minus lowest 2 bits + // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 + __m128i trail = __lsx_vbitsel_v(shift, swap, lsx_splat_u32(0x0000FF00)); + // Insert low 2 bits of trail surrogate to magic number for later + // 11011100 00000000 11100111 110000cc + __m128i magic_with_low_2 = __lsx_vor_v(__lsx_vsrli_w(shift, 30), magic); + + // Generate lead surrogate + // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx + // 000000cc ccdddddd|xxxxxxxx xxxxxxxx + __m128i lead = __lsx_vbitsel_v( + __lsx_vsrli_h(__lsx_vand_v(shift, __lsx_vldi(0x3F)), 4), swap, + __lsx_vrepli_h(0x3f /* 0x003f*/)); + + // Blend pairs + // 000000cc ccdddddd|11110aaa bbbbbb00 + __m128i blend = __lsx_vbitsel_v(lead, trail, lsx_splat_u32(0x0000FFFF)); + + // Add magic number to finish the result + // 110111CC CCDDDDDD|110110AA BBBBBBCC + __m128i composed = __lsx_vadd_h(blend, magic_with_low_2); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 6; // We 3 32-bit surrogate pairs. + return 12; // We consumed 12 bytes. + } + // 3 1-4 byte sequences + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 3 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // added to fix issue https://github.com/simdutf/simdutf/issues/514 + // We only want to write 2 * 16-bit code units when that is actually what we + // have. Unfortunately, we cannot trust the input. So it is possible to get + // 0xff as an input byte and it should not result in a surrogate pair. We + // need to check for that. + uint32_t permbuffer[4]; + __lsx_vst(perm, permbuffer, 0); + // Mask the low and middle bytes + // 00000000 00000000 00000000 0ddddddd + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7f)); + // Because the surrogates need more work, the high surrogate is computed + // first. + __m128i middlehigh = __lsx_vslli_w(perm, 2); + // 00000000 00000000 00cccccc 00000000 + __m128i middlebyte = __lsx_vand_v(perm, lsx_splat_u32(0x00003F00)); + // Start assembling the sequence. Since the 4th byte is in the same position + // as it would be in a surrogate and there is no dependency, shift left + // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: + // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx + __m128i ab = __lsx_vbitsel_v(middlehigh, perm, lsx_splat_u32(0xFF000000)); + // Top 16 bits contains the high ten bits of the surrogate pair before + // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa + // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction + __m128i v_fffc0000 = __lsx_vreplgr2vr_w(uint32_t(0xFFFC0000)); + __m128i abc = __lsx_vbitsel_v(__lsx_vslli_w(middlebyte, 4), ab, v_fffc0000); + // Combine the low 6 or 7 bits by a shift right accumulate + // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct + // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o + // correction + __m128i composed = __lsx_vor_v(ascii, __lsx_vsrli_w(abc, 6)); + // After this is for surrogates + // Blend the low and high surrogates + // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd + __m128i mixed = __lsx_vbitsel_v(abc, composed, lsx_splat_u32(0x0000FFFF)); + // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits + // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: + // 11110aaa bbbbbbcc|000000cc ccdddddd + __m128i v_ffff03ff = __lsx_vreplgr2vr_w(uint32_t(0xFFFF03FF)); + __m128i masked_pair = __lsx_vand_v(mixed, v_ffff03ff); + // Correct the remaining UTF-8 prefix, surrogate offset, and add the + // surrogate prefixes in one magic 16-bit addition. similar magic number but + // without the continue byte adjust and halfword swapped UTF-8 4b prefix = + // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) + // surrogate high = +0xD800|0x0000 + // surrogate low = +0x0000|0xDC00 + // ----------------------------------- + // = +0xE7C0|0xDC00 + __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xE7C0DC00)); + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete + __m128i surrogates = __lsx_vadd_w(masked_pair, magic); + // If the high bit is 1 (s32 less than zero), this needs a surrogate pair + __m128i is_pair = __lsx_vslt_w(perm, zero); + // Select either the 4 byte surrogate pair or the 2 byte solo codepoint + // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD + __m128i selected = __lsx_vbitsel_v(composed, surrogates, is_pair); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + selected = lsx_swap_bytes(selected); + } + // Attempting to shuffle and store would be complex, just scalarize. + uint32_t buffer_tmp[4]; + __lsx_vst(selected, buffer_tmp, 0); + // Test for the top bit of the surrogate mask. Remove due to issue 514 + // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : + // 0x00800000; + for (size_t i = 0; i < 3; i++) { + // Surrogate + // Used to be if (buffer[i] & SURROGATE_MASK) { + // See discussion above. + // patch for issue https://github.com/simdutf/simdutf/issues/514 + if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { + utf16_output[0] = uint16_t(buffer_tmp[i] >> 16); + utf16_output[1] = uint16_t(buffer_tmp[i] & 0xFFFF); + utf16_output += 2; + } else { + utf16_output[0] = uint16_t(buffer_tmp[i] & 0xFFFF); + utf16_output++; + } + } + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } +} +/* end file src/lasx/lasx_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/lasx/lasx_convert_utf8_to_utf32.cpp */ +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_out) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint32_t *&utf32_output = reinterpret_cast(utf32_out); + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xFFF; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + if ((utf8_end_of_code_point_mask & 0xffff) == 0xffff) { + // We process in chunks of 16 bytes. + // use fast implementation in src/simdutf/arm64/simd.h + // Ideally the compiler can keep the tables in registers. + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, in); + __m128i in16high = __lsx_vilvh_b(zero, in); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + + __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); + __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); + __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); + __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); + + utf32_output += 16; // We wrote 16 32-bit characters. + return 16; // We consumed 16 bytes. + } + __m128i zero = __lsx_vldi(0); + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_3_byte_to_utf16(in); + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + utf32_output += 4; // We wrote 4 32-bit characters. + return 12; // We consumed 12 bytes. + } + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if (input_utf8_end_of_code_point_mask == 0xaaa) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte + // UTF-32 code units. Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_2_byte_to_utf16(in); + + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); + utf32_output += 6; + return 12; // We consumed 12 bytes. + } + // Either no fast path or an unimportant fast path. + + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); + utf32_output += 6; + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // Shuffle + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // Split + // 00000000 00000000 0ccccccc + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); // 6 or 7 bits + // Note: unmasked + // xxxxxxxx aaaaxxxx xxxxxxxx + __m128i high = + __lsx_vsrli_w(__lsx_vand_v(perm, __lsx_vldi(0xf)), 4); // 4 bits + // Use 16 bit bic instead of and. + // The top bits will be corrected later in the bsl + // 00000000 10bbbbbb 00000000 + __m128i middle = + __lsx_vand_v(perm, lsx_splat_u32(0x0000FF00)); // 5 or 6 bits + // Combine low and middle with shift right accumulate + // 00000000 00xxbbbb bbcccccc + __m128i lowmid = __lsx_vor_v(ascii, __lsx_vsrli_w(middle, 2)); + // Insert top 4 bits from high byte with bitwise select + // 00000000 aaaabbbb bbcccccc + __m128i composed = __lsx_vbitsel_v(lowmid, high, lsx_splat_u32(0x0000F000)); + __lsx_vst(composed, utf32_output, 0); + utf32_output += 4; // We wrote 4 32-bit characters. + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-32 code units. This uses the same method as the fixed 3 byte + // version, reversing and shift left insert. However, there is no need for + // a shuffle mask now, just rev16 and rev32. + // + // This version does not use the LUT, but 4 byte sequences are less common + // and the overhead of the extra memory access is less important than the + // early branch overhead in shorter sequences, so it comes last. + + // Swap pairs of bytes + // 10dddddd|10cccccc|10bbbbbb|11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + __m128i swap = lsx_swap_bytes(in); + // Shift left and insert + // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb + __m128i merge1 = __lsx_vbitsel_v(__lsx_vsrli_h(swap, 2), swap, + __lsx_vrepli_h(0x3f /*0x003F*/)); + // Shift insert again + // xxxxxxxx xxxaaabb bbbbcccc ccdddddd + __m128i merge2 = + __lsx_vbitsel_v(__lsx_vslli_w(merge1, 12), /* merge1 << 12 */ + __lsx_vsrli_w(merge1, 16), /* merge1 >> 16 */ + lsx_splat_u32(0x00000FFF)); + // Clear the garbage + // 00000000 000aaabb bbbbcccc ccdddddd + __m128i composed = __lsx_vand_v(merge2, lsx_splat_u32(0x1FFFFF)); + // Store + __lsx_vst(composed, utf32_output, 0); + utf32_output += 3; // We wrote 3 32-bit characters. + return 12; // We consumed 12 bytes. + } + // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit + // due to surrogates no longer being involved. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 2 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + + // Ascii + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); + __m128i middle = __lsx_vand_v(perm, lsx_splat_u32(0x00003f00)); + // 00000000 00000000 0000cccc ccdddddd + __m128i cd = __lsx_vor_v(__lsx_vsrli_w(middle, 2), ascii); + + __m128i correction = __lsx_vand_v(perm, lsx_splat_u32(0x00400000)); + __m128i corrected = __lsx_vadd_b(perm, __lsx_vsrli_w(correction, 1)); + // Insert twice + // 00000000 000aaabb bbbbxxxx xxxxxxxx + __m128i corrected_srli2 = + __lsx_vsrli_w(__lsx_vand_v(corrected, __lsx_vrepli_b(0x7)), 2); + __m128i ab = + __lsx_vbitsel_v(corrected_srli2, corrected, __lsx_vrepli_h(0x3f)); + ab = __lsx_vsrli_w(ab, 4); + // 00000000 000aaabb bbbbcccc ccdddddd + __m128i composed = __lsx_vbitsel_v(ab, cd, lsx_splat_u32(0x00000FFF)); + // Store + __lsx_vst(composed, utf32_output, 0); + utf32_output += 3; // We wrote 3 32-bit characters. + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } +} +/* end file src/lasx/lasx_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lasx/lasx_convert_utf8_to_latin1.cpp */ +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { + // We process in chunks of 16 bytes + __lsx_vst(in, reinterpret_cast(latin1_output), 0); + latin1_output += 16; // We wrote 16 18-bit characters. + return 16; // We consumed 16 bytes. + } + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 + // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy + // scenario we process SIX (6) input code-code units. The max length in bytes + // of six code code units spanning between 1 and 2 bytes each is 12 bytes. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); + // ascii mask + // 1 byte: 11111111 11111111 + // 2 byte: 00000000 00000000 + __m128i ascii_mask = __lsx_vslt_bu(perm, __lsx_vldi(0x80)); + // utf8 mask + // 1 byte: 00000000 00000000 + // 2 byte: 00111111 00111111 + __m128i utf8_mask = __lsx_vand_v(__lsx_vsle_bu(__lsx_vldi(0x80), perm), + __lsx_vldi(0b00111111)); + // mask + // 1 byte: 11111111 11111111 + // 2 byte: 00111111 00111111 + __m128i mask = __lsx_vor_v(utf8_mask, ascii_mask); + + __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), perm, mask); + // writing 8 bytes even though we only care about the first 6 bytes. + __m128i latin1_packed = __lsx_vpickev_b(__lsx_vldi(0), composed); + + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/lasx/lasx_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lasx/lasx_convert_utf16_to_latin1.cpp */ +template +std::pair +lasx_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 16) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + if simdutf_constexpr (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + in1 = lsx_swap_bytes(in1); + } + if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vpickev_b(in1, in); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +template +std::pair +lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + while (end - buf >= 16) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + if simdutf_constexpr (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + in1 = lsx_swap_bytes(in1); + } + if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vpickev_b(in1, in); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 16; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/lasx/lasx_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/lasx/lasx_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single LASX register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it's an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole LASX register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two LASX registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ + +template +std::pair +lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *end = buf + len; + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + if simdutf_constexpr (!match_system(big_endian)) { + in = lasx_swap_bytes(in); + } + if (__lasx_xbnz_h(__lasx_xvslt_hu( + in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! + // 1. pack the bytes + __m256i utf8_packed = + __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); + // 2. store (16 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 + // expected output : [110a|aaaa|10bb|bbbb] x 16 + // t0 = [000a|aaaa|bbbb|bb00] + __m256i t0 = __lasx_xvslli_h(in, 2); + // t1 = [000a|aaaa|0000|0000] + __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); + // t2 = [0000|0000|00bb|bbbb] + __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + __m256i t3 = __lasx_xvor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); + __m256i t4 = __lasx_xvor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = + __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + // 4. pack the bytes + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + // 5. store bytes + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; + + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; + + buf += 16; + continue; + } + __m256i surrogates_bytemask = __lasx_xvseq_h( + __lasx_xvand_v(in, lasx_splat_u16(0xf800)), lasx_splat_u16(0xd800)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lasx_xbz_v(surrogates_bytemask)) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m256i t0 = __lasx_xvpickev_b(in, in); + t0 = __lasx_xvilvl_b(t0, t0); + + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m256i s0 = __lasx_xvsrli_h(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m256i s1 = __lasx_xvslli_h(in, 2); + // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] + s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); + + // [00bb|bbbb|0000|aaaa] + __m256i s2 = __lasx_xvor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); + __m256i m0 = + __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); + __m256i s4 = __lasx_xvxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); + __m256i one_byte_bytemask_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + + __m256i one_or_two_bytes_bytemask_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); + __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( + one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); + + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 0); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + if simdutf_constexpr (!match_system(big_endian)) { + in = lasx_swap_bytes(in); + } + if (__lasx_xbnz_h(__lasx_xvslt_hu( + in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! + // 1. pack the bytes + __m256i utf8_packed = + __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); + // 2. store (16 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 + // expected output : [110a|aaaa|10bb|bbbb] x 16 + // t0 = [000a|aaaa|bbbb|bb00] + __m256i t0 = __lasx_xvslli_h(in, 2); + // t1 = [000a|aaaa|0000|0000] + __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); + // t2 = [0000|0000|00bb|bbbb] + __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + __m256i t3 = __lasx_xvor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); + __m256i t4 = __lasx_xvor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = + __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + // 4. pack the bytes + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + // 5. store bytes + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; + + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; + + buf += 16; + continue; + } + __m256i surrogates_bytemask = __lasx_xvseq_h( + __lasx_xvand_v(in, lasx_splat_u16(0xf800)), lasx_splat_u16(0xd800)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lasx_xbz_v(surrogates_bytemask)) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m256i t0 = __lasx_xvpickev_b(in, in); + t0 = __lasx_xvilvl_b(t0, t0); + + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m256i s0 = __lasx_xvsrli_h(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m256i s1 = __lasx_xvslli_h(in, 2); + // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] + s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); + + // [00bb|bbbb|0000|aaaa] + __m256i s2 = __lasx_xvor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); + __m256i m0 = + __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); + __m256i s4 = __lasx_xvxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); + __m256i one_byte_bytemask_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + + __m256i one_or_two_bytes_bytemask_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); + __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( + one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); + + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 0); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/lasx/lasx_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/lasx/lasx_convert_utf16_to_utf32.cpp */ +template +std::pair +lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *end = buf + len; + + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf32_output & 0x1f) && buf < end) { + uint16_t word = scalar::utf16::swap_if_needed(buf[0]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + buf++; + } else { + if (buf + 1 >= end) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = scalar::utf16::swap_if_needed(buf[1]); + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + buf += 2; + } + } + + __m256i v_f800 = lasx_splat_u16(0xf800); + __m256i v_d800 = lasx_splat_u16(0xd800); + + while (end - buf >= 16) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + if simdutf_constexpr (!match_system(big_endian)) { + in = lasx_swap_bytes(in); + } + + __m256i surrogates_bytemask = + __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lasx_xbz_v(surrogates_bytemask)) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); + __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); + __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); + utf32_output += 16; + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(buf, reinterpret_cast(utf32_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf32_output & 0x1f) && buf < end) { + uint16_t word = scalar::utf16::swap_if_needed(buf[0]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + buf++; + } else if (buf + 1 < end) { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = scalar::utf16::swap_if_needed(buf[1]); + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + buf += 2; + } else { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf32_output)); + } + } + + __m256i v_f800 = lasx_splat_u16(0xf800); + __m256i v_d800 = lasx_splat_u16(0xd800); + while (end - buf >= 16) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + if simdutf_constexpr (!match_system(big_endian)) { + in = lasx_swap_bytes(in); + } + + __m256i surrogates_bytemask = + __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lasx_xbz_v(surrogates_bytemask)) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); + __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); + __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); + utf32_output += 16; + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf32_output)); +} +/* end file src/lasx/lasx_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lasx/lasx_convert_utf32_to_latin1.cpp */ +std::pair +lasx_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + const __m256i shuf_mask = ____m256i( + (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); + __m256i v_ff = __lasx_xvrepli_w(0xFF); + + while (end - buf >= 16) { + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); + + __m256i in12 = __lasx_xvor_v(in1, in2); + if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { + // 1. pack the bytes + __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); + latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); + __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); + latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +std::pair +lasx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; + + const __m256i shuf_mask = ____m256i( + (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); + __m256i v_ff = __lasx_xvrepli_w(0xFF); + + while (end - buf >= 16) { + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); + + __m256i in12 = __lasx_xvor_v(in1, in2); + if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { + // 1. pack the bytes + __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); + latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); + __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); + latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 16; k++) { + uint32_t word = buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/lasx/lasx_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/lasx/lasx_convert_utf32_to_utf8.cpp */ +std::pair +lasx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *end = buf + len; + + // load addr align 32 + while (((uint64_t)buf & 0x1F) && buf < end) { + uint32_t word = *buf; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + buf++; + } + + __m256i v_c080 = lasx_splat_u16(0xc080); + __m256i v_07ff = lasx_splat_u16(0x07ff); + __m256i v_dfff = lasx_splat_u16(0xdfff); + __m256i v_d800 = lasx_splat_u16(0xd800); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + __m256i forbidden_bytemask = __lasx_xvldi(0x0); + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf > std::ptrdiff_t(16 + safety_margin)) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); + + // Check if no bits set above 16th + if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); + + if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + __m256i utf8_packed = __lasx_xvpermi_d( + __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); + // 2. store (8 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = __lasx_xvor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = __lasx_xvor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = + __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + // 4. pack the bytes + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = __lsx_vshuf_b( + zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = __lsx_vshuf_b( + zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + // 5. store bytes + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; + + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; + + buf += 16; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & + #3 in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); + t0 = __lasx_xvilvl_b(t0, t0); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); + // [00bb|bbbb|0000|aaaa] + __m256i s2 = __lasx_xvor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m256i one_or_two_bytes_bytemask = + __lasx_xvsle_hu(utf16_packed, v_07ff); + __m256i m0 = + __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); + __m256i s4 = __lasx_xvxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); + + __m256i one_or_two_bytes_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + + __m256i one_byte_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)); + __m256i mask1 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)); + + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 0); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + // check for invalid input + if (__lasx_xbnz_v(forbidden_bytemask)) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} + +std::pair +lasx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + // load addr align 32 + while (((uint64_t)buf & 0x1F) && buf < end) { + uint32_t word = *buf; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + buf++; + } + + __m256i v_c080 = lasx_splat_u16(0xc080); + __m256i v_07ff = lasx_splat_u16(0x07ff); + __m256i v_dfff = lasx_splat_u16(0xdfff); + __m256i v_d800 = lasx_splat_u16(0xd800); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + __m256i forbidden_bytemask = __lasx_xvldi(0x0); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf > std::ptrdiff_t(16 + safety_margin)) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); + + // Check if no bits set above 16th + if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); + + if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + __m256i utf8_packed = __lasx_xvpermi_d( + __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); + // 2. store (8 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = __lasx_xvor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = __lasx_xvor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = + __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + // 4. pack the bytes + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = __lsx_vshuf_b( + zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = __lsx_vshuf_b( + zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + // 5. store bytes + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; + + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; + + buf += 16; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + if (__lasx_xbnz_v(forbidden_bytemask)) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); + } + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & + #3 in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); + t0 = __lasx_xvilvl_b(t0, t0); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3F00)); + // [00bb|bbbb|0000|aaaa] + __m256i s2 = __lasx_xvor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m256i one_or_two_bytes_bytemask = + __lasx_xvsle_hu(utf16_packed, v_07ff); + __m256i m0 = + __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); + __m256i s4 = __lasx_xvxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); + + __m256i one_or_two_bytes_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + + __m256i one_byte_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)); + __m256i mask1 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)); + + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 0); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/lasx/lasx_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/lasx/lasx_convert_utf32_to_utf16.cpp */ +template +std::pair +lasx_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *end = buf + len; + + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + uint32_t word = *buf++; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + // buf++; + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + // buf++; + } + } + + __m256i forbidden_bytemask = __lasx_xvrepli_h(0); + __m256i v_d800 = lasx_splat_u16(0xd800); + __m256i v_dfff = lasx_splat_u16(0xdfff); + while (end - buf >= 16) { + __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); + + // Check if no bits set above 16th + if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + + if simdutf_constexpr (!match_system(big_endian)) { + utf16_packed = lasx_swap_bytes(utf16_packed); + } + __lasx_xvst(utf16_packed, utf16_output, 0); + utf16_output += 16; + buf += 16; + } else { + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + // check for invalid input + if (__lasx_xbnz_v(forbidden_bytemask)) { + return std::make_pair(nullptr, reinterpret_cast(utf16_output)); + } + return std::make_pair(buf, reinterpret_cast(utf16_output)); +} + +template +std::pair +lasx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + uint32_t word = *buf++; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(result(error_code::SURROGATE, buf - start - 1), + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start - 1), + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + + __m256i forbidden_bytemask = __lasx_xvrepli_h(0); + __m256i v_d800 = lasx_splat_u16(0xd800); + __m256i v_dfff = lasx_splat_u16(0xdfff); + while (end - buf >= 16) { + __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); + + // Check if no bits set above 16th + if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + if (__lasx_xbnz_v(forbidden_bytemask)) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf16_output)); + } + + if simdutf_constexpr (!match_system(big_endian)) { + utf16_packed = lasx_swap_bytes(utf16_packed); + } + + __lasx_xvst(utf16_packed, utf16_output, 0); + utf16_output += 16; + buf += 16; + } else { + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf16_output)); +} +/* end file src/lasx/lasx_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/lasx/lasx_base64.cpp */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ + +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + // credit: Wojciech Muła + // SSE (lookup: pshufb improved unrolled) + const uint8_t *input = (const uint8_t *)src; + static const char *lookup_tbl = + isbase64url + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + uint8_t *out = (uint8_t *)dst; + + v32u8 shuf; + __m256i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, + base64_tbl2, base64_tbl3; + if (srclen >= 28) { + shuf = v32u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, + 1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; + + v_fc0fc00 = __lasx_xvreplgr2vr_w(uint32_t(0x0fc0fc00)); + v_3f03f0 = __lasx_xvreplgr2vr_w(uint32_t(0x003f03f0)); + shift_r = __lasx_xvreplgr2vr_w(uint32_t(0x0006000a)); + shift_l = __lasx_xvreplgr2vr_w(uint32_t(0x00080004)); + base64_tbl0 = ____m256i(__lsx_vld(lookup_tbl, 0)); + base64_tbl1 = ____m256i(__lsx_vld(lookup_tbl, 16)); + base64_tbl2 = ____m256i(__lsx_vld(lookup_tbl, 32)); + base64_tbl3 = ____m256i(__lsx_vld(lookup_tbl, 48)); + } + size_t i = 0; + for (; i + 100 <= srclen; i += 96) { + __m128i in0_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 0); + __m128i in0_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); + __m128i in1_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 2); + __m128i in1_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 3); + __m128i in2_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 4); + __m128i in2_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 5); + __m128i in3_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 6); + __m128i in3_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 7); + + __m256i in0 = lasx_set_q(in0_hi, in0_lo); + __m256i in1 = lasx_set_q(in1_hi, in1_lo); + __m256i in2 = lasx_set_q(in2_hi, in2_lo); + __m256i in3 = lasx_set_q(in3_hi, in3_lo); + + in0 = __lasx_xvshuf_b(in0, in0, (__m256i)shuf); + in1 = __lasx_xvshuf_b(in1, in1, (__m256i)shuf); + in2 = __lasx_xvshuf_b(in2, in2, (__m256i)shuf); + in3 = __lasx_xvshuf_b(in3, in3, (__m256i)shuf); + + __m256i t0_0 = __lasx_xvand_v(in0, v_fc0fc00); + __m256i t0_1 = __lasx_xvand_v(in1, v_fc0fc00); + __m256i t0_2 = __lasx_xvand_v(in2, v_fc0fc00); + __m256i t0_3 = __lasx_xvand_v(in3, v_fc0fc00); + + __m256i t1_0 = __lasx_xvsrl_h(t0_0, shift_r); + __m256i t1_1 = __lasx_xvsrl_h(t0_1, shift_r); + __m256i t1_2 = __lasx_xvsrl_h(t0_2, shift_r); + __m256i t1_3 = __lasx_xvsrl_h(t0_3, shift_r); + + __m256i t2_0 = __lasx_xvand_v(in0, v_3f03f0); + __m256i t2_1 = __lasx_xvand_v(in1, v_3f03f0); + __m256i t2_2 = __lasx_xvand_v(in2, v_3f03f0); + __m256i t2_3 = __lasx_xvand_v(in3, v_3f03f0); + + __m256i t3_0 = __lasx_xvsll_h(t2_0, shift_l); + __m256i t3_1 = __lasx_xvsll_h(t2_1, shift_l); + __m256i t3_2 = __lasx_xvsll_h(t2_2, shift_l); + __m256i t3_3 = __lasx_xvsll_h(t2_3, shift_l); + + __m256i input0 = __lasx_xvor_v(t1_0, t3_0); + __m256i input0_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input0); + __m256i input0_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input0, __lasx_xvldi(32))); + __m256i input0_mask = __lasx_xvslei_bu(input0, 31); + __m256i input0_result = + __lasx_xvbitsel_v(input0_shuf1, input0_shuf0, input0_mask); + __lasx_xvst(input0_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; + + __m256i input1 = __lasx_xvor_v(t1_1, t3_1); + __m256i input1_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input1); + __m256i input1_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input1, __lasx_xvldi(32))); + __m256i input1_mask = __lasx_xvslei_bu(input1, 31); + __m256i input1_result = + __lasx_xvbitsel_v(input1_shuf1, input1_shuf0, input1_mask); + __lasx_xvst(input1_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; + + __m256i input2 = __lasx_xvor_v(t1_2, t3_2); + __m256i input2_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input2); + __m256i input2_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input2, __lasx_xvldi(32))); + __m256i input2_mask = __lasx_xvslei_bu(input2, 31); + __m256i input2_result = + __lasx_xvbitsel_v(input2_shuf1, input2_shuf0, input2_mask); + __lasx_xvst(input2_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; + + __m256i input3 = __lasx_xvor_v(t1_3, t3_3); + __m256i input3_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input3); + __m256i input3_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input3, __lasx_xvldi(32))); + __m256i input3_mask = __lasx_xvslei_bu(input3, 31); + __m256i input3_result = + __lasx_xvbitsel_v(input3_shuf1, input3_shuf0, input3_mask); + __lasx_xvst(input3_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; + } + for (; i + 28 <= srclen; i += 24) { + + __m128i in_lo = __lsx_vld(reinterpret_cast(input + i), 0); + __m128i in_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); + + __m256i in = lasx_set_q(in_hi, in_lo); + + // bytes from groups A, B and C are needed in separate 32-bit lanes + // in = [DDDD|CCCC|BBBB|AAAA] + // + // an input triplet has layout + // [????????|ccdddddd|bbbbcccc|aaaaaabb] + // byte 3 byte 2 byte 1 byte 0 -- byte 3 comes from the next + // triplet + // + // shuffling changes the order of bytes: 1, 0, 2, 1 + // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] + // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ + // processed bits + in = __lasx_xvshuf_b(in, in, (__m256i)shuf); + + // unpacking + // t0 = [0000cccc|cc000000|aaaaaa00|00000000] + __m256i t0 = __lasx_xvand_v(in, v_fc0fc00); + // t1 = [00000000|00cccccc|00000000|00aaaaaa] + // ((c >> 6), (a >> 10)) + __m256i t1 = __lasx_xvsrl_h(t0, shift_r); + + // t2 = [00000000|00dddddd|000000bb|bbbb0000] + __m256i t2 = __lasx_xvand_v(in, v_3f03f0); + // t3 = [00dddddd|00000000|00bbbbbb|00000000] + // ((d << 8), (b << 4)) + __m256i t3 = __lasx_xvsll_h(t2, shift_l); + + // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 + __m256i indices = __lasx_xvor_v(t1, t3); + + __m256i indices_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, indices); + __m256i indices_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(indices, __lasx_xvldi(32))); + __m256i indices_mask = __lasx_xvslei_bu(indices, 31); + __m256i indices_result = + __lasx_xvbitsel_v(indices_shuf1, indices_shuf0, indices_mask); + __lasx_xvst(indices_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; + } + + return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, + srclen - i, options); +} + +static inline void compress(__m128i data, uint16_t mask, char *output) { + if (mask == 0) { + __lsx_vst(data, reinterpret_cast<__m128i *>(output), 0); + return; + } + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + + v2u64 shufmask = {tables::base64::thintable_epi8[mask1], + tables::base64::thintable_epi8[mask2]}; + + // we increment by 0x08 the second half of the mask + const v4u32 hi = {0, 0, 0x08080808, 0x08080808}; + __m128i shufmask1 = __lsx_vadd_b((__m128i)shufmask, (__m128i)hi); + + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(data, data, shufmask1); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + __lsx_vld(reinterpret_cast( + tables::base64::pshufb_combine_table + pop1 * 8), + 0); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + + __lsx_vst(answer, reinterpret_cast<__m128i *>(output), 0); +} + +struct block64 { + __m256i chunks[2]; +}; + +template +static inline uint32_t to_base64_mask(__m256i *src, bool *error) { + __m256i ascii_space_tbl = + ____m256i((__m128i)v16u8{0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}); + // credit: aqrit + __m256i delta_asso; + if (default_or_url) { + delta_asso = + ____m256i((__m128i)v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x11, 0x0, 0x16}); + } else { + delta_asso = + ____m256i((__m128i)v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}); + } + __m256i delta_values; + if (default_or_url) { + delta_values = ____m256i( + (__m128i)v16i8{int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0xFF), int8_t(0x11), + int8_t(0xFF), int8_t(0xBF), int8_t(0x10), int8_t(0xB9)}); + } else if (base64_url) { + delta_values = ____m256i( + (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), + int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}); + } else { + delta_values = ____m256i( + (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}); + } + + __m256i check_asso; + if (default_or_url) { + check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x0E, 0x0B, 0x06}); + + } else if (base64_url) { + check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x06, 0x0B, 0x12}); + } else { + check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x0B, 0x0B, 0x0F}); + } + + __m256i check_values; + if (default_or_url) { + + check_values = ____m256i( + (__m128i)v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0xA1), int8_t(0x00), int8_t(0x80), + int8_t(0x00), int8_t(0x80), int8_t(0x00), int8_t(0x80)}); + } else if (base64_url) { + check_values = ____m256i( + (__m128i)v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), + int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}); + } else { + check_values = ____m256i( + (__m128i)v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}); + } + + __m256i shifted = __lasx_xvsrli_b(*src, 3); + __m256i asso_index = __lasx_xvand_v(*src, __lasx_xvldi(0xF)); + __m256i delta_hash = __lasx_xvavgr_bu( + __lasx_xvshuf_b(delta_asso, delta_asso, asso_index), shifted); + __m256i check_hash = __lasx_xvavgr_bu( + __lasx_xvshuf_b(check_asso, check_asso, asso_index), shifted); + + __m256i out = __lasx_xvsadd_b( + __lasx_xvshuf_b(delta_values, delta_values, delta_hash), *src); + __m256i chk = __lasx_xvsadd_b( + __lasx_xvshuf_b(check_values, check_values, check_hash), *src); + __m256i chk_ltz = __lasx_xvmskltz_b(chk); + unsigned int mask = __lasx_xvpickve2gr_wu(chk_ltz, 0); + mask = mask | (__lsx_vpickve2gr_hu(lasx_extracti128_hi(chk_ltz), 0) << 16); + if (mask) { + __m256i ascii_space = __lasx_xvseq_b( + __lasx_xvshuf_b(ascii_space_tbl, ascii_space_tbl, asso_index), *src); + __m256i ascii_space_ltz = __lasx_xvmskltz_b(ascii_space); + unsigned int ascii_space_mask = __lasx_xvpickve2gr_wu(ascii_space_ltz, 0); + ascii_space_mask = + ascii_space_mask | + (__lsx_vpickve2gr_hu(lasx_extracti128_hi(ascii_space_ltz), 0) << 16); + *error |= (mask != ascii_space_mask); + } + + *src = out; + return (uint32_t)mask; +} + +template +static inline uint64_t to_base64_mask(block64 *b, bool *error) { + *error = 0; + uint64_t m0 = + to_base64_mask(&b->chunks[0], error); + uint64_t m1 = + to_base64_mask(&b->chunks[1], error); + return m0 | (m1 << 32); +} + +static inline void copy_block(block64 *b, char *output) { + __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output), 0); + __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output), 32); +} + +static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { + uint64_t nmask = ~mask; + uint64_t count = + __lsx_vpickve2gr_d(__lsx_vpcnt_h(__lsx_vreplgr2vr_d(nmask)), 0); + uint16_t *count_ptr = (uint16_t *)&count; + compress(lasx_extracti128_lo(b->chunks[0]), uint16_t(mask), output); + compress(lasx_extracti128_hi(b->chunks[0]), uint16_t(mask >> 16), + output + count_ptr[0]); + compress(lasx_extracti128_lo(b->chunks[1]), uint16_t(mask >> 32), + output + count_ptr[0] + count_ptr[1]); + compress(lasx_extracti128_hi(b->chunks[1]), uint16_t(mask >> 48), + output + count_ptr[0] + count_ptr[1] + count_ptr[2]); + return count_ones(nmask); +} + +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +inline size_t compress_block_single(block64 *b, uint64_t mask, char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + + // Predefine the index vector + const v16u8 v1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + + switch (pos64 >> 4) { + case 0b00: { + const __m128i lane0 = lasx_extracti128_lo(b->chunks[0]); + const __m128i lane1 = lasx_extracti128_hi(b->chunks[0]); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); // v1 > v0 + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(lane0, lane0, sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 0 * 16), 0); + __lsx_vst(lane1, reinterpret_cast<__m128i *>(output + 1 * 16 - 1), 0); + __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output + 2 * 16 - 1), + 0); + } break; + case 0b01: { + const __m128i lane0 = lasx_extracti128_lo(b->chunks[0]); + const __m128i lane1 = lasx_extracti128_hi(b->chunks[0]); + __lsx_vst(lane0, reinterpret_cast<__m128i *>(output + 0 * 16), 0); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(lane1, lane1, sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 1 * 16), 0); + __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output + 2 * 16 - 1), + 0); + } break; + case 0b10: { + __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output + 0 * 16), 0); + + const __m128i lane2 = lasx_extracti128_lo(b->chunks[1]); + const __m128i lane3 = lasx_extracti128_hi(b->chunks[1]); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(lane2, lane2, sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 2 * 16), 0); + __lsx_vst(lane3, reinterpret_cast<__m128i *>(output + 3 * 16 - 1), 0); + } break; + case 0b11: { + __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output + 0 * 16), 0); + __lsx_vst(lasx_extracti128_lo(b->chunks[1]), + reinterpret_cast<__m128i *>(output + 2 * 16), 0); + + const __m128i lane3 = lasx_extracti128_hi(b->chunks[1]); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(lane3, lane3, sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 3 * 16), 0); + } break; + } + return 63; +} + +// The caller of this function is responsible to ensure that there are 64 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char *src) { + b->chunks[0] = __lasx_xvld(reinterpret_cast(src), 0); + b->chunks[1] = __lasx_xvld(reinterpret_cast(src), 32); +} + +// The caller of this function is responsible to ensure that there are 128 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char16_t *src) { + __m256i m1 = __lasx_xvld(reinterpret_cast(src), 0); + __m256i m2 = __lasx_xvld(reinterpret_cast(src), 32); + __m256i m3 = __lasx_xvld(reinterpret_cast(src), 64); + __m256i m4 = __lasx_xvld(reinterpret_cast(src), 96); + b->chunks[0] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m2, m1, 0), 0b11011000); + b->chunks[1] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m4, m3, 0), 0b11011000); +} + +static inline void base64_decode(char *out, __m256i str) { + __m256i t0 = __lasx_xvor_v( + __lasx_xvslli_w(str, 26), + __lasx_xvslli_w(__lasx_xvand_v(str, lasx_splat_u32(0x0000ff00)), 12)); + __m256i t1 = + __lasx_xvsrli_w(__lasx_xvand_v(str, lasx_splat_u32(0x003f0000)), 2); + __m256i t2 = __lasx_xvor_v(t0, t1); + __m256i t3 = __lasx_xvor_v(t2, __lasx_xvsrli_w(str, 16)); + __m256i pack_shuffle = ____m256i( + (__m128i)v16u8{3, 2, 1, 7, 6, 5, 11, 10, 9, 15, 14, 13, 0, 0, 0, 0}); + t3 = __lasx_xvshuf_b(t3, t3, (__m256i)pack_shuffle); + + // Store the output: + __lsx_vst(lasx_extracti128_lo(t3), out, 0); + __lsx_vst(lasx_extracti128_hi(t3), out, 12); +} +// decode 64 bytes and output 48 bytes +static inline void base64_decode_block(char *out, const char *src) { + base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); + base64_decode(out + 24, + __lasx_xvld(reinterpret_cast(src), 32)); +} + +static inline void base64_decode_block_safe(char *out, const char *src) { + base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); + alignas(32) char buffer[32]; + base64_decode(buffer, + __lasx_xvld(reinterpret_cast(src), 32)); + std::memcpy(out + 24, buffer, 24); +} + +static inline void base64_decode_block(char *out, block64 *b) { + base64_decode(out, b->chunks[0]); + base64_decode(out + 24, b->chunks[1]); +} +static inline void base64_decode_block_safe(char *out, block64 *b) { + base64_decode(out, b->chunks[0]); + alignas(32) char buffer[32]; + base64_decode(buffer, b->chunks[1]); + std::memcpy(out + 24, buffer, 24); +} + +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = + default_or_url ? tables::base64::to_base64_default_or_url_value + : (base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + auto ri = simdutf::scalar::base64::find_end(src, srclen, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + srclen = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + char *end_of_safe_64byte_zone = + (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; + + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; + + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b; + load_block(&b, src); + src += 64; + bool error = false; + uint64_t badcharmask = + to_base64_mask(&b, &error); + if (error && !ignore_garbage) { + src -= 64; + while (src < srcend && scalar::base64::is_eight_byte(*src) && + to_base64[uint8_t(*src)] <= 64) { + src++; + } + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + if (is_power_of_two(badcharmask)) { + bufferptr += compress_block_single(&b, badcharmask, bufferptr); + } else { + bufferptr += compress_block(&b, badcharmask, bufferptr); + } + } else if (bufferptr != buffer) { + copy_block(&b, bufferptr); + bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, &b); + } else { + base64_decode_block(dst, &b); + } + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 2); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if ((!scalar::base64::is_eight_byte(*src) || val > 64) && + !ignore_garbage) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + // lasx is little-endian + triple = scalar::u32_swap_bytes(triple); + std::memcpy(dst, &triple, 4); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + // lasx is little-endian + triple = scalar::u32_swap_bytes(triple); + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result( + r, size_t(src - srcinit), size_t(dst - dstinit), equallocation, + full_input_length, last_chunk_options); + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(srcinit + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(srcinit + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; + } + if (equalsigns > 0 && !ignore_garbage) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} +/* end file src/lasx/lasx_base64.cpp */ +/* begin file src/lasx/lasx_find.cpp */ +simdutf_really_inline const char *util_find(const char *start, const char *end, + char character) noexcept { + if (start >= end) + return end; + + const int step = 32; + __m256i char_vec = __lasx_xvreplgr2vr_b(static_cast(character)); + + while (end - start >= step) { + __m256i data = __lasx_xvld(reinterpret_cast(start), 0); + __m256i cmp = __lasx_xvseq_b(data, char_vec); + if (__lasx_xbnz_v(cmp)) { + __m256i res = __lasx_xvmsknz_b(cmp); + uint32_t mask0 = __lasx_xvpickve2gr_wu(res, 0); + uint32_t mask1 = __lasx_xvpickve2gr_wu(res, 4); + uint32_t mask = (mask0 | (mask1 << 16)); + return start + trailing_zeroes(mask); + } + + start += step; + } + + // Handle remaining bytes with scalar loop + for (; start < end; ++start) { + if (*start == character) { + return start; + } + } + + return end; +} + +simdutf_really_inline const char16_t *util_find(const char16_t *start, + const char16_t *end, + char16_t character) noexcept { + if (start >= end) + return end; + + const int step = 16; + __m256i char_vec = __lasx_xvreplgr2vr_h(static_cast(character)); + + while (end - start >= step) { + __m256i data = __lasx_xvld(reinterpret_cast(start), 0); + __m256i cmp = __lasx_xvseq_h(data, char_vec); + if (__lasx_xbnz_v(cmp)) { + __m256i res = __lasx_xvmsknz_b(cmp); + uint32_t mask0 = __lasx_xvpickve2gr_wu(res, 0); + uint32_t mask1 = __lasx_xvpickve2gr_wu(res, 4); + uint32_t mask = (mask0 | (mask1 << 16)); + return start + trailing_zeroes(mask) / 2; + } + + start += step; + } + + // Handle remaining elements with scalar loop + for (; start < end; ++start) { + if (*start == character) { + return start; + } + } + + return end; +} +/* end file src/lasx/lasx_find.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace +} // namespace lasx +} // namespace simdutf + +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace lasx { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); + +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} + +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; +} + +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_validation { + +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); +} + +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; + } + + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + } + } + + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_validation { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); +} + +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); +} + +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); +} + +} // namespace utf8_validation +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace ascii_validation { + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); + + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + return false; + } + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + return in.is_ascii(); +} + +} // namespace ascii_validation +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + // transcoding from UTF-8 to Latin 1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, + + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace lasx +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_to_utf16 { + +using namespace simd; + +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_to_utf16 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } + + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 2; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + + size_t iterations = 0; + size_t pos = 0; + size_t count = 0; + for (; pos + N <= size; pos += N) { + const auto input = + vector_i8::load(reinterpret_cast(in + pos)); + + const auto continuation = input > int8_t(-65); + const auto utf_4bytes = vector_u8(input.value) >= uint8_t(240); + + local -= vector_u8(continuation); + local -= vector_u8(utf_4bytes); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + // transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_to_utf32 { + +using namespace simd; + +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_to_utf32 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/generic/utf16/count_code_points_bytemask.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf16 { + +using namespace simd; + +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + size_t pos = 0; + size_t count = 0; + + constexpr size_t max_iterations = 65535; + const auto one = vector_u16::splat(1); + const auto zero = vector_u16::zero(); + + size_t iteration = 0; + + auto counters = zero; + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(in + pos); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + + const auto t0 = input & uint16_t(0xfc00); + const auto t1 = t0 ^ uint16_t(0xdc00); + + // t2[0] == 1 iff input[0] outside range 0xdc00..dfff (the word is not a + // high surrogate) + const auto t2 = min(t1, one); + + counters += t2; + + iteration += 1; + if (iteration == max_iterations) { + count += counters.sum(); + counters = zero; + iteration = 0; + } + } + + if (iteration > 0) { + count += counters.sum(); + } + + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf16/count_code_points_bytemask.h */ +/* begin file src/generic/utf16/change_endianness.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf16 { + +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; + + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } + + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf16/change_endianness.h */ +/* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf16 { + +using namespace simd; + +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline result +utf8_length_from_utf16_with_replacement(const char16_t *in, size_t size) { + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + if (N + 1 > size) { + return scalar::utf16::utf8_length_from_utf16_with_replacement( + in, size); + } // special case for short inputs + size_t pos = 0; + bool any_surrogates = false; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + auto v_mismatched_count = vector_u16::zero(); + + size_t count = 0; + size_t mismatched_count = 0; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + if (scalar::utf16::is_low_surrogate(in[0])) { + any_surrogates = true; + mismatched_count += 1; + } + + for (; pos < (size - 1) / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + if (is_surrogate.to_bitmask() != 0 || + scalar::utf16::is_low_surrogate(in[pos + N])) { + any_surrogates = true; + auto input_next = + vector_u16::load(reinterpret_cast(in + pos + 1)); + if simdutf_constexpr (!match_system(big_endian)) { + input_next = input_next.swap_bytes(); + } + + const auto lb_masked = input & (0xfc00); + const auto block_masked = input_next & (0xfc00); + + const auto lb_is_high = lb_masked == (0xd800); + const auto block_is_low = block_masked == (0xdc00); + + const auto illseq = min(vector_u16(lb_is_high ^ block_is_low), one); + + v_mismatched_count += illseq; + } + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + mismatched_count += v_mismatched_count.sum(); + v_mismatched_count = vector_u16::zero(); + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + mismatched_count += v_mismatched_count.sum(); + } + + if (scalar::utf16::is_low_surrogate(in[pos])) { + any_surrogates = true; + if (!scalar::utf16::is_high_surrogate(in[pos - 1])) { + mismatched_count -= 1; + count += 2; + pos += 1; + } + } + count += pos; + count += mismatched_count; + if (scalar::utf16::is_high_surrogate(in[pos - 1])) { + any_surrogates = true; + if (pos == size) { + count += 2; + } else if (scalar::utf16::is_low_surrogate(in[pos])) { + pos += 1; + count += 2; + } + } + result scalar_result = + scalar::utf16::utf8_length_from_utf16_with_replacement( + in + pos, size - pos); + return {any_surrogates ? SURROGATE : scalar_result.error, + count + scalar_result.count}; +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ +/* begin file src/generic/utf16/utf32_length_from_utf16.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf16 { + +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf16/utf32_length_from_utf16.h */ +/* begin file src/generic/utf16/to_well_formed.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf16 { + +// Note: this is direct translation of westmere implementation. + +/* + * Process one block of 8 characters. If in_place is false, + * copy the block from in to out. If there is a sequencing + * error in the block, overwrite the illsequenced characters + * with the replacement character. This function reads one + * character before the beginning of the buffer as a lookback. + * If that character is illsequenced, it too is overwritten. + */ +template +simdutf_really_inline void utf16fix_block(char16_t *out, const char16_t *in) { + const char16_t replacement = scalar::utf16::replacement(); + + using vector_u16 = simd16; + auto swap_if_needed = [](uint16_t x) simdutf_constexpr -> uint16_t { + return scalar::utf16::swap_if_needed(x); + }; + + const auto lookback = vector_u16::load(in - 1); + const auto block = vector_u16::load(in); + + const auto lb_masked = lookback & swap_if_needed(0xfc00); + const auto block_masked = block & swap_if_needed(0xfc00); + + const auto lb_is_high = lb_masked == swap_if_needed(0xd800); + const auto block_is_low = block_masked == swap_if_needed(0xdc00); + const auto illseq = lb_is_high ^ block_is_low; + if (!illseq.is_zero()) { + /* compute the cause of the illegal sequencing */ + const auto lb_illseq = ~block_is_low & lb_is_high; + const auto block_illseq = + (~lb_is_high & block_is_low) | lb_illseq.template byte_right_shift<2>(); + + /* fix illegal sequencing in the lookback */ + const auto lb = lb_illseq.first(); + out[-1] = char16_t((lb & replacement) | (~lb & out[-1])); + /* fix illegal sequencing in the main block */ + const auto mask = as_vector_u16(block_illseq); + const auto fixed = (~mask & block) | (mask & replacement); + + fixed.store(reinterpret_cast(out)); + } else if (!in_place) { + block.store(reinterpret_cast(out)); + } +} + +template +void to_well_formed(const char16_t *in, size_t n, char16_t *out) { + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + if (n < N + 1) { + scalar::utf16::to_well_formed_utf16(in, n, out); + return; + } + + const char16_t replacement = scalar::utf16::replacement(); + + out[0] = + scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; + + /* duplicate code to have the compiler specialise utf16fix_block() */ + if (in == out) { + constexpr bool inplace = true; + for (size_t i = 1; i + N < n; i += N) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - N, in + n - N); + } else { + constexpr bool copy_data = false; + for (size_t i = 1; i + N < n; i += N) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - N, in + n - N); + } + + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf16/to_well_formed.h */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +template +const result validate_utf16_as_ascii_with_errors(const char16_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + size_t pos = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input_vec( + reinterpret_cast(input + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input_vec.swap_bytes(); + } + uint64_t matches = input_vec.lteq(uint16_t(0x7f)); + if (~matches) { + // Found a match, return the first one + int index = trailing_zeroes(~matches) / 2; + return result(error_code::TOO_LARGE, pos + index); + } + } + + // Scalar tail + while (pos < size) { + + char16_t v = scalar::utf16::swap_if_needed(input[pos]); + if (v > 0x7F) { + return result(error_code::TOO_LARGE, pos); + } + pos++; + } + return result(error_code::SUCCESS, size); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf32.h */ +#include + +namespace simdutf { +namespace lasx { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, + size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + const auto v_0000007f = vector_u32::splat(0x0000007f); + const auto v_000007ff = vector_u32::splat(0x000007ff); + const auto v_0000ffff = vector_u32::splat(0x0000ffff); +#else + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + const auto one = vector_u32::splat(1); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use vector of uint32 counters, this is why this limit is used + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in0 > v_0000007f); + acc -= as_vector_u32(in1 > v_0000007f); + acc -= as_vector_u32(in2 > v_0000007f); + acc -= as_vector_u32(in3 > v_0000007f); + + acc -= as_vector_u32(in0 > v_000007ff); + acc -= as_vector_u32(in1 > v_000007ff); + acc -= as_vector_u32(in2 > v_000007ff); + acc -= as_vector_u32(in3 > v_000007ff); + + acc -= as_vector_u32(in0 > v_0000ffff); + acc -= as_vector_u32(in1 > v_0000ffff); + acc -= as_vector_u32(in2 > v_0000ffff); + acc -= as_vector_u32(in3 > v_0000ffff); +#else + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in > v_0000007f); + acc -= as_vector_u32(in > v_000007ff); + acc -= as_vector_u32(in > v_0000ffff); +#else + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 + +// +// Implementation-specific overrides +// +namespace simdutf { +namespace lasx { + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + // todo: reimplement as a one-pass algorithm. + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return lasx::utf8_validation::generic_validate_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return lasx::utf8_validation::generic_validate_utf8_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return lasx::ascii_validation::generic_validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return lasx::ascii_validation::generic_validate_ascii_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return lasx::utf16::validate_utf16_as_ascii_with_errors( + buf, len) + .error == SUCCESS; +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return lasx::utf16::validate_utf16_as_ascii_with_errors(buf, + len) + .error == SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const auto res = + lasx::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { + return false; + } + + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + + const auto res = + lasx::utf16::validate_utf16_with_errors(buf, len); + if (res.is_err()) { + return false; + } + + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + const result res = + lasx::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + const result scalar_res = + scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + const result res = + lasx::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + const result scalar_res = + scalar::utf16::validate_with_errors(buf + res.count, + len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16::to_well_formed(input, len, output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + return utf16::to_well_formed(input, len, output); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char32_t *tail = lasx_validate_utf32le(buf, len); + if (tail) { + return scalar::utf32::validate(tail, len - (tail - buf)); + } else { + return false; + } +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = lasx_validate_utf32le_with_errors(buf, len); + if (res.count != len) { + result scalar_res = + scalar::utf32::validate_with_errors(buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lasx_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; + + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lasx_convert_latin1_to_utf16le(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lasx_convert_latin1_to_utf16be(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lasx_convert_latin1_to_utf32(buf, len, utf32_output); + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if (pos + 1 >= len) + return 0; + if ((buf[pos] & 0b11100000) == 0b11000000) { + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return 0; + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0xFF < code_point) { + return 0; + } + *latin1_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return convert_size; + utf8_to_latin1::validating_transcoder converter; + size_t convert_result = + converter.convert(buf + pos, len - pos, latin1_output); + return convert_result ? convert_size + convert_result : 0; +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if ((buf[pos] & 0b11100000) == 0b11000000) { + if (pos + 1 >= len) + return result(error_code::TOO_SHORT, pos); + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return result(error_code::TOO_SHORT, pos); + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + if (code_point < 0x80) + return result(error_code::OVERLONG, pos); + if (0xFF < code_point) + return result(error_code::TOO_LARGE, pos); + *latin1_output++ = char(code_point); + pos += 2; + } else if ((buf[pos] & 0b11110000) == 0b11100000) { + return result(error_code::TOO_LARGE, pos); + } else if ((buf[pos] & 0b11111000) == 0b11110000) { + return result(error_code::TOO_LARGE, pos); + } else { + if ((buf[pos] & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } + return result(error_code::HEADER_BITS, pos); + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return result(error_code::SUCCESS, convert_size); + + utf8_to_latin1::validating_transcoder converter; + result res = + converter.convert_with_errors(buf + pos, len - pos, latin1_output); + return res.error ? result(res.error, res.count + pos) + : result(res.error, res.count + convert_size); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if (pos + 1 >= len) + break; + if ((buf[pos] & 0b11100000) == 0b11000000) { + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return 0; + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + *latin1_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return convert_size; + + size_t convert_result = + lasx::utf8_to_latin1::convert_valid(buf + pos, len - pos, latin1_output); + return convert_result ? convert_size + convert_result : 0; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16le_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lasx_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lasx_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lasx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lasx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return 0; + } + std::pair ret = + lasx_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lasx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lasx_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lasx_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lasx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lasx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf32_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lasx_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lasx_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lasx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lasx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + size_t pos = 0; + size_t count = 0; + // Performance degradation when memory address is not 32-byte aligned + while ((((uint64_t)input + pos) & 0x1F && pos < length)) { + if (input[pos++] > -65) { + count++; + } + } + __m256i v_bf = __lasx_xvldi(0xBF); // 0b10111111 + for (; pos + 32 <= length; pos += 32) { + __m256i in = __lasx_xvld(reinterpret_cast(input + pos), 0); + __m256i utf8_count = + __lasx_xvpcnt_h(__lasx_xvmskltz_b(__lasx_xvslt_b(v_bf, in))); + count = count + __lasx_xvpickve2gr_wu(utf8_count, 0) + + __lasx_xvpickve2gr_wu(utf8_count, 4); + } + return count + scalar::utf8::count_code_points(input + pos, length - pos); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + const uint8_t *data = reinterpret_cast(input); + const uint8_t *data_end = data + length; + uint64_t result = 0; + while (data_end - data > 16) { + uint64_t two_bytes = 0; + __m128i input_vec = __lsx_vld(data, 0); + two_bytes = + __lsx_vpickve2gr_hu(__lsx_vpcnt_h(__lsx_vmskltz_b(input_vec)), 0); + result += 16 + two_bytes; + data += 16; + } + return result + scalar::latin1::utf8_length_from_latin1((const char *)data, + data_end - data); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_bytemask(input, + length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_bytemask(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8_bytemask(input, length); +} +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::LITTLE>(input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::BIG>(input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return utf32::utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + __m128i v_ffff = lsx_splat_u32(0x0000ffff); + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); + __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); + size_t surrogate_count = __lsx_vpickve2gr_bu( + __lsx_vpcnt_b(__lsx_vmskltz_w(surrogate_bytemask)), 0); + count += 4 + surrogate_count; + } + return count + + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + return scalar::base64::tail_encode_base64_impl(output, input, length, + options, line_length); +} + +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + return util_find(start, end, character); +} + +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + return util_find(start, end, character); +} +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace lasx +} // namespace simdutf + +/* begin file src/simdutf/lasx/end.h */ +#undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP + +#if SIMDUTF_CAN_ALWAYS_RUN_LASX +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif +/* end file src/simdutf/lasx/end.h */ +/* end file src/lasx/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_LSX +/* begin file src/lsx/implementation.cpp */ +/* begin file src/simdutf/lsx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lsx" +// #define SIMDUTF_IMPLEMENTATION lsx +#define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 +/* end file src/simdutf/lsx/begin.h */ +namespace simdutf { +namespace lsx { +namespace { +#ifndef SIMDUTF_LSX_H + #error "lsx.h must be included" +#endif +using namespace simd; + +#if SIMDUTF_FEATURE_UTF8 +// convert vmskltz/vmskgez/vmsknz to +// simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index +const uint8_t lsx_1_2_utf8_bytes_mask[] = { + 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, + 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, + 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, + 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, + 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, + 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, + 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104, + 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63, + 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148, + 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147, + 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152, + 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143, + 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164, + 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163, + 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168, + 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, + 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, + 255}; +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 +simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { + return __lsx_vshuf4i_b(vec, 0b10110001); +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ + SIMDUTF_FEATURE_UTF8 +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + return input.is_ascii(); +} +#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || + // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + return is_third_byte ^ is_fourth_byte; +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) +// common functions for utf8 conversions +simdutf_really_inline __m128i convert_utf8_3_byte_to_utf16(__m128i in) { + // Low half contains 10bbbbbb|10cccccc + // High half contains 1110aaaa|1110aaaa + const v16u8 sh = {2, 1, 5, 4, 8, 7, 11, 10, 0, 0, 3, 3, 6, 6, 9, 9}; + const v8u16 v0fff = {0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff}; + + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, (__m128i)sh); + // 1110aaaa => aaaa0000 + __m128i perm_high = __lsx_vslli_b(__lsx_vbsrl_v(perm, 8), 4); + // 10bbbbbb 10cccccc => 0010bbbb bbcccccc + __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), /* perm >> 2*/ + perm, __lsx_vrepli_h(0x3f) /* 0x003f */); + // 0010bbbb bbcccccc => aaaabbbb bbcccccc + composed = __lsx_vbitsel_v(perm_high, composed, (__m128i)v0fff); + + return composed; +} + +simdutf_really_inline __m128i convert_utf8_2_byte_to_utf16(__m128i in) { + // 10bbbbb 110aaaaa => 00bbbbb 000aaaaa + __m128i composed = __lsx_vand_v(in, __lsx_vldi(0x3f)); + // 00bbbbbb 000aaaaa => 00000aaa aabbbbbb + composed = __lsx_vbitsel_v( + __lsx_vsrli_h(__lsx_vslli_h(composed, 8), 2), /* (aaaaa << 8) >> 2 */ + __lsx_vsrli_h(composed, 8), /* bbbbbb >> 8 */ + __lsx_vrepli_h(0x3f)); /* 0x003f */ + return composed; +} + +simdutf_really_inline __m128i +convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { + // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. + // This is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. + __m128i sh = + __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]), + 0); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000000 00bbbbbb + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_h(0x7f)); // 6 or 7 bits + // 1 byte: 00000000 00000000 + // 2 byte: 00000aaa aa000000 + const __m128i v1f00 = lsx_splat_u16(0x1f00); + __m128i composed = __lsx_vsrli_h(__lsx_vand_v(perm, v1f00), 2); // 5 bits + // Combine with a shift right accumulate + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000aaa aabbbbbb + composed = __lsx_vadd_h(ascii, composed); + return composed; +} +#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || + // SIMDUTF_FEATURE_UTF32) + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/lsx/lsx_validate_utf16.cpp */ +template +simd8 utf16_gather_high_bytes(const simd16 in0, + const simd16 in1) { + if (big_endian) { + const auto mask = simd16(0x00ff); + const auto t0 = in0 & mask; + const auto t1 = in1 & mask; + + return simd16::pack(t0, t1); + } else { + return simd16::pack_shifted_right<8>(in0, in1); + } +} +/* end file src/lsx/lsx_validate_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/lsx/lsx_validate_utf32le.cpp */ +const char32_t *lsx_validate_utf32le(const char32_t *input, size_t size) { + const char32_t *end = input + size; + + __m128i offset = lsx_splat_u32(0xffff2000); + __m128i standardoffsetmax = lsx_splat_u32(0xfffff7ff); + __m128i standardmax = lsx_splat_u32(0x10ffff); + __m128i currentmax = lsx_splat_u32(0); + __m128i currentoffsetmax = lsx_splat_u32(0); + + while (input + 4 < end) { + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + currentmax = __lsx_vmax_wu(in, currentmax); + // 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF + currentoffsetmax = + __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); + + input += 4; + } + + __m128i is_zero = + __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); + if (__lsx_bnz_v(is_zero)) { + return nullptr; + } + + is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lsx_bnz_v(is_zero)) { + return nullptr; + } + + return input; +} + +const result lsx_validate_utf32le_with_errors(const char32_t *input, + size_t size) { + const char32_t *start = input; + const char32_t *end = input + size; + + __m128i offset = lsx_splat_u32(0xffff2000); + __m128i standardoffsetmax = lsx_splat_u32(0xfffff7ff); + __m128i standardmax = lsx_splat_u32(0x10ffff); + __m128i currentmax = lsx_splat_u32(0); + __m128i currentoffsetmax = lsx_splat_u32(0); + + while (input + 4 < end) { + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + currentmax = __lsx_vmax_wu(in, currentmax); + currentoffsetmax = + __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); + + __m128i is_zero = + __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); + if (__lsx_bnz_v(is_zero)) { + return result(error_code::TOO_LARGE, input - start); + } + + is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lsx_bnz_v(is_zero)) { + return result(error_code::SURROGATE, input - start); + } + + input += 4; + } + + return result(error_code::SUCCESS, input - start); +} +/* end file src/lsx/lsx_validate_utf32le.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lsx/lsx_convert_latin1_to_utf8.cpp */ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ + +std::pair +lsx_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char *end = latin1_input + len; + + __m128i zero = __lsx_vldi(0); + // We always write 16 bytes, of which more than the first 8 bytes + // are valid. A safety margin of 8 is more than sufficient. + while (end - latin1_input >= 16) { + __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); + uint32_t ascii = __lsx_vpickve2gr_hu(__lsx_vmskgez_b(in8), 0); + if (ascii == 0xffff) { // ASCII fast path!!!! + __lsx_vst(in8, utf8_output, 0); + utf8_output += 16; + latin1_input += 16; + continue; + } + // We just fallback on UTF-16 code. This could be optimized/simplified + // further. + __m128i in16 = __lsx_vilvl_b(zero, in8); + // 1. prepare 2-byte values + // input 8-bit word : [aabb|bbbb] x 8 + // expected output : [1100|00aa|10bb|bbbb] x 8 + // t0 = [0000|00aa|bbbb|bb00] + __m128i t0 = __lsx_vslli_h(in16, 2); + // t1 = [0000|00aa|0000|0000] + __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x300)); + // t3 = [0000|00aa|00bb|bbbb] + __m128i t2 = __lsx_vbitsel_v(t1, in16, __lsx_vrepli_h(0x3f)); + // t4 = [1100|00aa|10bb|bbbb] + __m128i t3 = __lsx_vor_v(t2, __lsx_vreplgr2vr_h(uint16_t(0xc080))); + // merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = __lsx_vsle_hu(in16, __lsx_vrepli_h(0x7F)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t3, in16, one_byte_bytemask); + + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[(ascii & 0xff)]][0]; + __m128i shuffle = __lsx_vld(row + 1, 0); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + + // store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + // adjust pointers + latin1_input += 8; + utf8_output += row[0]; + + } // while + + return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); +} +/* end file src/lsx/lsx_convert_latin1_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lsx/lsx_convert_latin1_to_utf16.cpp */ +std::pair +lsx_convert_latin1_to_utf16le(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + + __m128i zero = __lsx_vldi(0); + while (end - buf >= 16) { + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i inlow = __lsx_vilvl_b(zero, in8); + __m128i inhigh = __lsx_vilvh_b(zero, in8); + __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); + __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); + + utf16_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf16_output); +} + +std::pair +lsx_convert_latin1_to_utf16be(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + __m128i zero = __lsx_vldi(0); + while (end - buf >= 16) { + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i inlow = __lsx_vilvl_b(in8, zero); + __m128i inhigh = __lsx_vilvh_b(in8, zero); + __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); + __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); + utf16_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf16_output); +} +/* end file src/lsx/lsx_convert_latin1_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lsx/lsx_convert_latin1_to_utf32.cpp */ +std::pair +lsx_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const char *end = buf + len; + + while (end - buf >= 16) { + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, in8); + __m128i in16high = __lsx_vilvh_b(zero, in8); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + + __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); + __lsx_vst(in32_1, reinterpret_cast(utf32_output + 4), 0); + __lsx_vst(in32_2, reinterpret_cast(utf32_output + 8), 0); + __lsx_vst(in32_3, reinterpret_cast(utf32_output + 12), 0); + + utf32_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf32_output); +} +/* end file src/lsx/lsx_convert_latin1_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +/* begin file src/lsx/lsx_convert_utf8_to_utf16.cpp */ +// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 16, usually 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { + // We process in chunks of 16 bytes + // The routine in simd.h is reused. + simd8 temp{in}; + temp.store_ascii_as_utf16(utf16_output); + utf16_output += 16; // We wrote 16 16-bit characters. + return 16; // We consumed 16 bytes. + } + + uint64_t buffer[2]; + // 3 byte sequences are the next most common, as seen in CJK, which has long + // sequences of these. + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. + __m128i composed = convert_utf8_3_byte_to_utf16(in); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 4; // We wrote 4 16-bit characters. + return 12; // We consumed 12 bytes. + } + + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xAAAA) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte + // UTF-16 code units. + __m128i composed = convert_utf8_2_byte_to_utf16(in); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 6; // We wrote 6 16-bit characters. + return 12; // We consumed 12 bytes. + } + + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + const __m128i zero = __lsx_vldi(0); + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + __m128i composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + // Store + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 6; // We wrote 6 16-bit characters. + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // XXX: depending on the system scalar instructions might be faster. + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // 1 byte: 00000000 0ccccccc + // 2 byte: xx0bbbbb x0cccccc + // 3 byte: xxbbbbbb x0cccccc + __m128i lowperm = __lsx_vpickev_h(perm, perm); + // 1 byte: 00000000 00000000 + // 2 byte: 00000000 00000000 + // 3 byte: 00000000 1110aaaa + __m128i highperm = __lsx_vpickod_h(perm, perm); + // 3 byte: aaaa0000 00000000 + highperm = __lsx_vslli_h(highperm, 12); + // ASCII + // 1 byte: 00000000 0ccccccc + // 2+byte: 00000000 00cccccc + __m128i ascii = __lsx_vand_v(lowperm, __lsx_vrepli_h(0x7f)); + // 1 byte: 00000000 00000000 + // 2 byte: xx0bbbbb 00000000 + // 3 byte: xxbbbbbb 00000000 + __m128i middlebyte = __lsx_vand_v(lowperm, lsx_splat_u16(0xFF00)); + // 1 byte: 00000000 0ccccccc + // 2 byte: 0010bbbb bbcccccc + // 3 byte: 0010bbbb bbcccccc + __m128i composed = __lsx_vor_v(__lsx_vsrli_h(middlebyte, 2), ascii); + + __m128i v0fff = __lsx_vreplgr2vr_h(uint16_t(0xfff)); + // aaaabbbb bbcccccc + composed = __lsx_vbitsel_v(highperm, composed, v0fff); + + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 4; // We wrote 4 16-bit codepoints + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but + // it is easier when we can assume they are all pairs. This version does + // not use the LUT, but 4 byte sequences are less common and the overhead + // of the extra memory access is less important than the early branch + // overhead in shorter sequences. + + __m128i expected_mask = + (__m128i)v16u8{0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, + 0xf8, 0xc0, 0xc0, 0xc0, 0x0, 0x0, 0x0, 0x0}; + __m128i expected = + (__m128i)v16u8{0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, + 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; + __m128i check = __lsx_vseq_b(__lsx_vand_v(in, expected_mask), expected); + if (__lsx_bz_b(check)) + return 12; + // Swap byte pairs + // 10dddddd 10cccccc|10bbbbbb 11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + __m128i swap = lsx_swap_bytes(in); + // Shift left 2 bits + // cccccc00 dddddd00 xxxxxxxx bbbbbb00 + __m128i shift = __lsx_vslli_b(swap, 2); + // Create a magic number containing the low 2 bits of the trail surrogate + // and all the corrections needed to create the pair. UTF-8 4b prefix = + // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) + // surrogate high = +0x0000|0xD800 + // surrogate low = +0xDC00|0x0000 + // ------------------------------- + // = +0xDC00|0xE7C0 + __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xDC00E7C0)); + // Generate unadjusted trail surrogate minus lowest 2 bits + // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 + __m128i trail = __lsx_vbitsel_v(shift, swap, lsx_splat_u32(0x0000ff00)); + // Insert low 2 bits of trail surrogate to magic number for later + // 11011100 00000000 11100111 110000cc + __m128i magic_with_low_2 = __lsx_vor_v(__lsx_vsrli_w(shift, 30), magic); + + // Generate lead surrogate + // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx + // 000000cc ccdddddd|xxxxxxxx xxxxxxxx + __m128i lead = __lsx_vbitsel_v( + __lsx_vsrli_h(__lsx_vand_v(shift, __lsx_vldi(0x3F)), 4), swap, + __lsx_vrepli_h(0x3f /* 0x003f*/)); + + // Blend pairs + // 000000cc ccdddddd|11110aaa bbbbbb00 + __m128i blend = __lsx_vbitsel_v(lead, trail, lsx_splat_u32(0x0000FFFF)); + + // Add magic number to finish the result + // 110111CC CCDDDDDD|110110AA BBBBBBCC + __m128i composed = __lsx_vadd_h(blend, magic_with_low_2); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + // __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + __lsx_vst(composed, reinterpret_cast(buffer), 0); + std::memcpy(utf16_output, buffer, 12); + utf16_output += 6; // We 3 32-bit surrogate pairs. + return 12; // We consumed 12 bytes. + } + // 3 1-4 byte sequences + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 3 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // added to fix issue https://github.com/simdutf/simdutf/issues/514 + // We only want to write 2 * 16-bit code units when that is actually what we + // have. Unfortunately, we cannot trust the input. So it is possible to get + // 0xff as an input byte and it should not result in a surrogate pair. We + // need to check for that. + uint32_t permbuffer[4]; + __lsx_vst(perm, permbuffer, 0); + // Mask the low and middle bytes + // 00000000 00000000 00000000 0ddddddd + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7f)); + // Because the surrogates need more work, the high surrogate is computed + // first. + __m128i middlehigh = __lsx_vslli_w(perm, 2); + // 00000000 00000000 00cccccc 00000000 + __m128i middlebyte = __lsx_vand_v(perm, lsx_splat_u32(0x00003F00)); + // Start assembling the sequence. Since the 4th byte is in the same position + // as it would be in a surrogate and there is no dependency, shift left + // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: + // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx + __m128i ab = __lsx_vbitsel_v(middlehigh, perm, lsx_splat_u32(0xFF000000)); + // Top 16 bits contains the high ten bits of the surrogate pair before + // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa + // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction + __m128i v_fffc0000 = __lsx_vreplgr2vr_w(uint32_t(0xFFFC0000)); + __m128i abc = __lsx_vbitsel_v(__lsx_vslli_w(middlebyte, 4), ab, v_fffc0000); + // Combine the low 6 or 7 bits by a shift right accumulate + // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct + // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o + // correction + __m128i composed = __lsx_vor_v(ascii, __lsx_vsrli_w(abc, 6)); + // After this is for surrogates + // Blend the low and high surrogates + // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd + __m128i mixed = __lsx_vbitsel_v(abc, composed, lsx_splat_u32(0x0000FFFF)); + // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits + // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: + // 11110aaa bbbbbbcc|000000cc ccdddddd + __m128i v_ffff03ff = __lsx_vreplgr2vr_w(uint32_t(0xFFFF03FF)); + __m128i masked_pair = __lsx_vand_v(mixed, v_ffff03ff); + // Correct the remaining UTF-8 prefix, surrogate offset, and add the + // surrogate prefixes in one magic 16-bit addition. similar magic number but + // without the continue byte adjust and halfword swapped UTF-8 4b prefix = + // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) + // surrogate high = +0xD800|0x0000 + // surrogate low = +0x0000|0xDC00 + // ----------------------------------- + // = +0xE7C0|0xDC00 + __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xE7C0DC00)); + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete + __m128i surrogates = __lsx_vadd_w(masked_pair, magic); + // If the high bit is 1 (s32 less than zero), this needs a surrogate pair + __m128i is_pair = __lsx_vslt_w(perm, zero); + // Select either the 4 byte surrogate pair or the 2 byte solo codepoint + // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD + __m128i selected = __lsx_vbitsel_v(composed, surrogates, is_pair); + // Byte swap if necessary + if simdutf_constexpr (!match_system(big_endian)) { + selected = lsx_swap_bytes(selected); + } + // Attempting to shuffle and store would be complex, just scalarize. + uint32_t buffer_tmp[4]; + __lsx_vst(selected, buffer_tmp, 0); + // Test for the top bit of the surrogate mask. Remove due to issue 514 + // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : + // 0x00800000; + for (size_t i = 0; i < 3; i++) { + // Surrogate + // Used to be if (buffer[i] & SURROGATE_MASK) { + // See discussion above. + // patch for issue https://github.com/simdutf/simdutf/issues/514 + if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { + utf16_output[0] = uint16_t(buffer_tmp[i] >> 16); + utf16_output[1] = uint16_t(buffer_tmp[i] & 0xFFFF); + utf16_output += 2; + } else { + utf16_output[0] = uint16_t(buffer_tmp[i] & 0xFFFF); + utf16_output++; + } + } + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } +} +/* end file src/lsx/lsx_convert_utf8_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/lsx/lsx_convert_utf8_to_utf32.cpp */ +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_out) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint32_t *&utf32_output = reinterpret_cast(utf32_out); + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xFFF; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + if ((utf8_end_of_code_point_mask & 0xffff) == 0xffff) { + // We process in chunks of 16 bytes. + // use fast implementation in src/simdutf/arm64/simd.h + // Ideally the compiler can keep the tables in registers. + simd8 temp{in}; + temp.store_ascii_as_utf32_tbl(utf32_out); + utf32_output += 16; // We wrote 16 32-bit characters. + return 16; // We consumed 16 bytes. + } + __m128i zero = __lsx_vldi(0); + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_3_byte_to_utf16(in); + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + utf32_output += 4; // We wrote 4 32-bit characters. + return 12; // We consumed 12 bytes. + } + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if (input_utf8_end_of_code_point_mask == 0xaaa) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte + // UTF-32 code units. Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_2_byte_to_utf16(in); + + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); + utf32_output += 6; + return 12; // We consumed 12 bytes. + } + /// Either no fast path or an unimportant fast path. + + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); + utf32_output += 6; + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // Shuffle + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // Split + // 00000000 00000000 0ccccccc + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); // 6 or 7 bits + // Note: unmasked + // xxxxxxxx aaaaxxxx xxxxxxxx + __m128i high = + __lsx_vsrli_w(__lsx_vand_v(perm, __lsx_vldi(0xf)), 4); // 4 bits + // Use 16 bit bic instead of and. + // The top bits will be corrected later in the bsl + // 00000000 10bbbbbb 00000000 + __m128i middle = + __lsx_vand_v(perm, lsx_splat_u32(0x0000FF00)); // 5 or 6 bits + // Combine low and middle with shift right accumulate + // 00000000 00xxbbbb bbcccccc + __m128i lowmid = __lsx_vor_v(ascii, __lsx_vsrli_w(middle, 2)); + // Insert top 4 bits from high byte with bitwise select + // 00000000 aaaabbbb bbcccccc + __m128i composed = __lsx_vbitsel_v(lowmid, high, lsx_splat_u32(0x0000F000)); + __lsx_vst(composed, utf32_output, 0); + utf32_output += 4; // We wrote 4 32-bit characters. + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-32 code units. This uses the same method as the fixed 3 byte + // version, reversing and shift left insert. However, there is no need for + // a shuffle mask now, just rev16 and rev32. + // + // This version does not use the LUT, but 4 byte sequences are less common + // and the overhead of the extra memory access is less important than the + // early branch overhead in shorter sequences, so it comes last. + + // Swap pairs of bytes + // 10dddddd|10cccccc|10bbbbbb|11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + __m128i swap = lsx_swap_bytes(in); + // Shift left and insert + // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb + __m128i merge1 = __lsx_vbitsel_v(__lsx_vsrli_h(swap, 2), swap, + __lsx_vrepli_h(0x3f /*0x003F*/)); + // Shift insert again + // xxxxxxxx xxxaaabb bbbbcccc ccdddddd + __m128i merge2 = + __lsx_vbitsel_v(__lsx_vslli_w(merge1, 12), /* merge1 << 12 */ + __lsx_vsrli_w(merge1, 16), /* merge1 >> 16 */ + lsx_splat_u32(0x00000FFF)); + // Clear the garbage + // 00000000 000aaabb bbbbcccc ccdddddd + __m128i composed = __lsx_vand_v(merge2, lsx_splat_u32(0x1FFFFF)); + // Store + __lsx_vst(composed, utf32_output, 0); + utf32_output += 3; // We wrote 3 32-bit characters. + return 12; // We consumed 12 bytes. + } + // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit + // due to surrogates no longer being involved. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 2 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + + // Ascii + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); + __m128i middle = __lsx_vand_v(perm, lsx_splat_u32(0x00003f00)); + // 00000000 00000000 0000cccc ccdddddd + __m128i cd = __lsx_vor_v(__lsx_vsrli_w(middle, 2), ascii); + + __m128i correction = __lsx_vand_v(perm, lsx_splat_u32(0x00400000)); + __m128i corrected = __lsx_vadd_b(perm, __lsx_vsrli_w(correction, 1)); + // Insert twice + // 00000000 000aaabb bbbbxxxx xxxxxxxx + __m128i corrected_srli2 = + __lsx_vsrli_w(__lsx_vand_v(corrected, __lsx_vrepli_b(0x7)), 2); + __m128i ab = + __lsx_vbitsel_v(corrected_srli2, corrected, __lsx_vrepli_h(0x3f)); + ab = __lsx_vsrli_w(ab, 4); + // 00000000 000aaabb bbbbcccc ccdddddd + __m128i composed = __lsx_vbitsel_v(ab, cd, lsx_splat_u32(0x00000FFF)); + // Store + __lsx_vst(composed, utf32_output, 0); + utf32_output += 3; // We wrote 3 32-bit characters. + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } +} +/* end file src/lsx/lsx_convert_utf8_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lsx/lsx_convert_utf8_to_latin1.cpp */ +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { + // We process in chunks of 16 bytes + __lsx_vst(in, reinterpret_cast(latin1_output), 0); + latin1_output += 16; // We wrote 16 18-bit characters. + return 16; // We consumed 16 bytes. + } + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 + // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy + // scenario we process SIX (6) input code-code units. The max length in bytes + // of six code code units spanning between 1 and 2 bytes each is 12 bytes. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); + // ascii mask + // 1 byte: 11111111 11111111 + // 2 byte: 00000000 00000000 + __m128i ascii_mask = __lsx_vslt_bu(perm, __lsx_vldi(0x80)); + // utf8 mask + // 1 byte: 00000000 00000000 + // 2 byte: 00111111 00111111 + __m128i utf8_mask = __lsx_vand_v(__lsx_vsle_bu(__lsx_vldi(0x80), perm), + __lsx_vldi(0b00111111)); + // mask + // 1 byte: 11111111 11111111 + // 2 byte: 00111111 00111111 + __m128i mask = __lsx_vor_v(utf8_mask, ascii_mask); + + __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), perm, mask); + // writing 8 bytes even though we only care about the first 6 bytes. + __m128i latin1_packed = __lsx_vpickev_b(__lsx_vldi(0), composed); + + uint64_t buffer[2]; + // __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + __lsx_vst(latin1_packed, reinterpret_cast(buffer), 0); + std::memcpy(latin1_output, buffer, 6); + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/lsx/lsx_convert_utf8_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lsx/lsx_convert_utf16_to_latin1.cpp */ +template +std::pair +lsx_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 16) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + if simdutf_constexpr (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + in1 = lsx_swap_bytes(in1); + } + if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vpickev_b(in1, in); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +template +std::pair +lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + while (end - buf >= 16) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + if simdutf_constexpr (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + in1 = lsx_swap_bytes(in1); + } + if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vpickev_b(in1, in); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 16; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/lsx/lsx_convert_utf16_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +/* begin file src/lsx/lsx_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it's an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *end = buf + len; + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + if simdutf_constexpr (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + } + if (__lsx_bz_v( + __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + if simdutf_constexpr (!match_system(big_endian)) { + nextin = lsx_swap_bytes(nextin); + } + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(nextin, in); + // 2. store (16 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } else { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(in, in); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } + } + + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + // t0 = [000a|aaaa|bbbb|bb00] + __m128i t0 = __lsx_vslli_h(in, 2); + // t1 = [000a|aaaa|0000|0000] + __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); + // t2 = [0000|0000|00bb|bbbb] + __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + __m128i t3 = __lsx_vor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); + __m128i t4 = __lsx_vor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = + __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + // 4. pack the bytes + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + // 5. store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + __m128i surrogates_bytemask = __lsx_vseq_h( + __lsx_vand_v(in, lsx_splat_u16(0xf800)), lsx_splat_u16(0xd800)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lsx_bz_v(surrogates_bytemask)) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m128i t0 = __lsx_vpickev_b(in, in); + t0 = __lsx_vilvl_b(t0, t0); + + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m128i s0 = __lsx_vsrli_h(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m128i s1 = __lsx_vslli_h(in, 2); + // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] + s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3f00)); + + // [00bb|bbbb|0000|aaaa] + __m128i s2 = __lsx_vor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); + __m128i m0 = + __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); + __m128i s4 = __lsx_vxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); + + __m128i one_or_two_bytes_bytemask_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + + __m128i one_byte_bytemask_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, + one_byte_bytemask_low)), + 0); + const uint32_t mask1 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, + one_byte_bytemask_high)), + 0); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + if simdutf_constexpr (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + } + if (__lsx_bz_v( + __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + if simdutf_constexpr (!match_system(big_endian)) { + nextin = lsx_swap_bytes(nextin); + } + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(nextin, in); + // 2. store (16 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } else { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(in, in); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } + } + + __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + // t0 = [000a|aaaa|bbbb|bb00] + __m128i t0 = __lsx_vslli_h(in, 2); + // t1 = [000a|aaaa|0000|0000] + __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); + // t2 = [0000|0000|00bb|bbbb] + __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + __m128i t3 = __lsx_vor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); + __m128i t4 = __lsx_vor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = + __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + // 4. pack the bytes + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + // 5. store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + __m128i surrogates_bytemask = __lsx_vseq_h( + __lsx_vand_v(in, lsx_splat_u16(0xf800)), lsx_splat_u16(0xd800)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lsx_bz_v(surrogates_bytemask)) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m128i t0 = __lsx_vpickev_b(in, in); + t0 = __lsx_vilvl_b(t0, t0); + + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m128i s0 = __lsx_vsrli_h(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m128i s1 = __lsx_vslli_h(in, 2); + // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] + s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3f00)); + + // [00bb|bbbb|0000|aaaa] + __m128i s2 = __lsx_vor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); + __m128i m0 = + __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); + __m128i s4 = __lsx_vxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); + + __m128i one_or_two_bytes_bytemask_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + + __m128i one_byte_bytemask_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, + one_byte_bytemask_low)), + 0); + const uint32_t mask1 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, + one_byte_bytemask_high)), + 0); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/lsx/lsx_convert_utf16_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/lsx/lsx_convert_utf16_to_utf32.cpp */ +template +std::pair +lsx_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *end = buf + len; + + __m128i zero = __lsx_vldi(0); + __m128i v_f800 = lsx_splat_u16(0xf800); + __m128i v_d800 = lsx_splat_u16(0xd800); + + while (end - buf >= 8) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + if simdutf_constexpr (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + } + + __m128i surrogates_bytemask = + __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lsx_bz_v(surrogates_bytemask)) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); + __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(buf, reinterpret_cast(utf32_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + __m128i zero = __lsx_vldi(0); + __m128i v_f800 = lsx_splat_u16(0xf800); + __m128i v_d800 = lsx_splat_u16(0xd800); + + while (end - buf >= 8) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + if simdutf_constexpr (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + } + + __m128i surrogates_bytemask = + __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); + if (__lsx_bz_v(surrogates_bytemask)) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); + __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = scalar::utf16::swap_if_needed(buf[k]); + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + scalar::utf16::swap_if_needed(buf[k + 1]); + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf32_output)); +} +/* end file src/lsx/lsx_convert_utf16_to_utf32.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +/* begin file src/lsx/lsx_convert_utf32_to_latin1.cpp */ +std::pair +lsx_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; + __m128i v_ff = __lsx_vrepli_w(0xFF); + + while (end - buf >= 16) { + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); + + __m128i in12 = __lsx_vor_v(in1, in2); + if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +std::pair +lsx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; + + const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; + __m128i v_ff = __lsx_vrepli_w(0xFF); + + while (end - buf >= 16) { + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); + + __m128i in12 = __lsx_vor_v(in1, in2); + + if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 8; k++) { + uint32_t word = buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/lsx/lsx_convert_utf32_to_latin1.cpp */ +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +/* begin file src/lsx/lsx_convert_utf32_to_utf8.cpp */ +std::pair +lsx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *end = buf + len; + + __m128i v_c080 = lsx_splat_u16(0xc080); + __m128i v_07ff = lsx_splat_u16(0x07ff); + __m128i v_dfff = lsx_splat_u16(0xdfff); + __m128i v_d800 = lsx_splat_u16(0xd800); + __m128i forbidden_bytemask = __lsx_vldi(0x0); + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf > std::ptrdiff_t(16 + safety_margin)) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + + // Check if no bits set above 16th + if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) + __m128i utf16_packed = __lsx_vpickev_h(nextin, in); + + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; // we are done for this round! + } + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [000a|aaaa|bbbb|bb00] + const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); + // t2 = [0000|0000|00bb|bbbb] + const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + const __m128i t3 = __lsx_vor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m128i t4 = __lsx_vor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = + __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + uint32_t m2 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + // 5. store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single + UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three + UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); + t0 = __lsx_vilvl_b(t0, t0); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m128i s1 = __lsx_vslli_h(utf16_packed, 2); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3F00)); + // [00bb|bbbb|0000|aaaa] + __m128i s2 = __lsx_vor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); + __m128i m0 = + __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); + __m128i s4 = __lsx_vxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); + + __m128i one_or_two_bytes_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + + __m128i one_byte_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)), + 0); + const uint32_t mask1 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)), + 0); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + buf += 8; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + // check for invalid input + if (__lsx_bnz_v(forbidden_bytemask)) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} + +std::pair +lsx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + __m128i v_c080 = lsx_splat_u16(0xc080); + __m128i v_07ff = lsx_splat_u16(0x07ff); + __m128i v_dfff = lsx_splat_u16(0xdfff); + __m128i v_d800 = lsx_splat_u16(0xd800); + __m128i forbidden_bytemask = __lsx_vldi(0x0); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf > std::ptrdiff_t(16 + safety_margin)) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + + // Check if no bits set above 16th + if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) + __m128i utf16_packed = __lsx_vpickev_h(nextin, in); + + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; // we are done for this round! + } + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [000a|aaaa|bbbb|bb00] + const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); + // t2 = [0000|0000|00bb|bbbb] + const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + const __m128i t3 = __lsx_vor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m128i t4 = __lsx_vor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = + __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + uint32_t m2 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + // 5. store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + if (__lsx_bnz_v(forbidden_bytemask)) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); + } + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single + UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three + UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); + t0 = __lsx_vilvl_b(t0, t0); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m128i s1 = __lsx_vslli_h(utf16_packed, 2); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3F00)); + // [00bb|bbbb|0000|aaaa] + __m128i s2 = __lsx_vor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + // __m128i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); + __m128i m0 = + __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); + __m128i s4 = __lsx_vxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); + + __m128i one_or_two_bytes_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + + __m128i one_byte_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)), + 0); + const uint32_t mask1 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)), + 0); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + buf += 8; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/lsx/lsx_convert_utf32_to_utf8.cpp */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +/* begin file src/lsx/lsx_convert_utf32_to_utf16.cpp */ +template +std::pair +lsx_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *end = buf + len; + + __m128i forbidden_bytemask = __lsx_vrepli_h(0); + __m128i v_d800 = lsx_splat_u16(0xd800); + __m128i v_dfff = lsx_splat_u16(0xdfff); + while (end - buf >= 8) { + __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + + // Check if no bits set above 16th + if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { + __m128i utf16_packed = __lsx_vpickev_h(in1, in0); + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + + if simdutf_constexpr (!match_system(big_endian)) { + utf16_packed = lsx_swap_bytes(utf16_packed); + } + __lsx_vst(utf16_packed, utf16_output, 0); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 3; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + // check for invalid input + if (__lsx_bnz_v(forbidden_bytemask)) { + return std::make_pair(nullptr, reinterpret_cast(utf16_output)); + } + return std::make_pair(buf, reinterpret_cast(utf16_output)); +} + +template +std::pair +lsx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + __m128i forbidden_bytemask = __lsx_vrepli_h(0); + __m128i v_d800 = lsx_splat_u16(0xd800); + __m128i v_dfff = lsx_splat_u16(0xdfff); + + while (end - buf >= 8) { + __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + // Check if no bits set above 16th + if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { + __m128i utf16_packed = __lsx_vpickev_h(in1, in0); + + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + if (__lsx_bnz_v(forbidden_bytemask)) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf16_output)); + } + + if simdutf_constexpr (!match_system(big_endian)) { + utf16_packed = lsx_swap_bytes(utf16_packed); + } + + __lsx_vst(utf16_packed, utf16_output, 0); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 3; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if simdutf_constexpr (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf16_output)); +} +/* end file src/lsx/lsx_convert_utf32_to_utf16.cpp */ +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +#if SIMDUTF_FEATURE_BASE64 +/* begin file src/lsx/lsx_base64.cpp */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ + +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + // credit: Wojciech Muła + // SSE (lookup: pshufb improved unrolled) + const uint8_t *input = (const uint8_t *)src; + static const char *lookup_tbl = + isbase64url + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + uint8_t *out = (uint8_t *)dst; + + v16u8 shuf; + __m128i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, + base64_tbl2, base64_tbl3; + if (srclen >= 16) { + shuf = v16u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; + v_fc0fc00 = __lsx_vreplgr2vr_w(uint32_t(0x0fc0fc00)); + v_3f03f0 = __lsx_vreplgr2vr_w(uint32_t(0x003f03f0)); + shift_r = __lsx_vreplgr2vr_w(uint32_t(0x0006000a)); + shift_l = __lsx_vreplgr2vr_w(uint32_t(0x00080004)); + base64_tbl0 = __lsx_vld(lookup_tbl, 0); + base64_tbl1 = __lsx_vld(lookup_tbl, 16); + base64_tbl2 = __lsx_vld(lookup_tbl, 32); + base64_tbl3 = __lsx_vld(lookup_tbl, 48); + } + + size_t i = 0; + for (; i + 52 <= srclen; i += 48) { + __m128i in0 = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 0); + __m128i in1 = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); + __m128i in2 = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 2); + __m128i in3 = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 3); + + in0 = __lsx_vshuf_b(in0, in0, (__m128i)shuf); + in1 = __lsx_vshuf_b(in1, in1, (__m128i)shuf); + in2 = __lsx_vshuf_b(in2, in2, (__m128i)shuf); + in3 = __lsx_vshuf_b(in3, in3, (__m128i)shuf); + + __m128i t0_0 = __lsx_vand_v(in0, v_fc0fc00); + __m128i t0_1 = __lsx_vand_v(in1, v_fc0fc00); + __m128i t0_2 = __lsx_vand_v(in2, v_fc0fc00); + __m128i t0_3 = __lsx_vand_v(in3, v_fc0fc00); + + __m128i t1_0 = __lsx_vsrl_h(t0_0, shift_r); + __m128i t1_1 = __lsx_vsrl_h(t0_1, shift_r); + __m128i t1_2 = __lsx_vsrl_h(t0_2, shift_r); + __m128i t1_3 = __lsx_vsrl_h(t0_3, shift_r); + + __m128i t2_0 = __lsx_vand_v(in0, v_3f03f0); + __m128i t2_1 = __lsx_vand_v(in1, v_3f03f0); + __m128i t2_2 = __lsx_vand_v(in2, v_3f03f0); + __m128i t2_3 = __lsx_vand_v(in3, v_3f03f0); + + __m128i t3_0 = __lsx_vsll_h(t2_0, shift_l); + __m128i t3_1 = __lsx_vsll_h(t2_1, shift_l); + __m128i t3_2 = __lsx_vsll_h(t2_2, shift_l); + __m128i t3_3 = __lsx_vsll_h(t2_3, shift_l); + + __m128i input0 = __lsx_vor_v(t1_0, t3_0); + __m128i input0_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input0); + __m128i input0_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input0, __lsx_vldi(32))); + __m128i input0_mask = __lsx_vslei_bu(input0, 31); + __m128i input0_result = + __lsx_vbitsel_v(input0_shuf1, input0_shuf0, input0_mask); + __lsx_vst(input0_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; + + __m128i input1 = __lsx_vor_v(t1_1, t3_1); + __m128i input1_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input1); + __m128i input1_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input1, __lsx_vldi(32))); + __m128i input1_mask = __lsx_vslei_bu(input1, 31); + __m128i input1_result = + __lsx_vbitsel_v(input1_shuf1, input1_shuf0, input1_mask); + __lsx_vst(input1_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; + + __m128i input2 = __lsx_vor_v(t1_2, t3_2); + __m128i input2_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input2); + __m128i input2_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input2, __lsx_vldi(32))); + __m128i input2_mask = __lsx_vslei_bu(input2, 31); + __m128i input2_result = + __lsx_vbitsel_v(input2_shuf1, input2_shuf0, input2_mask); + __lsx_vst(input2_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; + + __m128i input3 = __lsx_vor_v(t1_3, t3_3); + __m128i input3_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input3); + __m128i input3_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input3, __lsx_vldi(32))); + __m128i input3_mask = __lsx_vslei_bu(input3, 31); + __m128i input3_result = + __lsx_vbitsel_v(input3_shuf1, input3_shuf0, input3_mask); + __lsx_vst(input3_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; + } + for (; i + 16 <= srclen; i += 12) { + + __m128i in = __lsx_vld(reinterpret_cast(input + i), 0); + + // bytes from groups A, B and C are needed in separate 32-bit lanes + // in = [DDDD|CCCC|BBBB|AAAA] + // + // an input triplet has layout + // [????????|ccdddddd|bbbbcccc|aaaaaabb] + // byte 3 byte 2 byte 1 byte 0 -- byte 3 comes from the next + // triplet + // + // shuffling changes the order of bytes: 1, 0, 2, 1 + // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] + // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ + // processed bits + in = __lsx_vshuf_b(in, in, (__m128i)shuf); + + // unpacking + // t0 = [0000cccc|cc000000|aaaaaa00|00000000] + __m128i t0 = __lsx_vand_v(in, v_fc0fc00); + // t1 = [00000000|00cccccc|00000000|00aaaaaa] + // ((c >> 6), (a >> 10)) + __m128i t1 = __lsx_vsrl_h(t0, shift_r); + + // t2 = [00000000|00dddddd|000000bb|bbbb0000] + __m128i t2 = __lsx_vand_v(in, v_3f03f0); + // t3 = [00dddddd|00000000|00bbbbbb|00000000] + // ((d << 8), (b << 4)) + __m128i t3 = __lsx_vsll_h(t2, shift_l); + + // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 + __m128i indices = __lsx_vor_v(t1, t3); + + __m128i indices_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, indices); + __m128i indices_shuf1 = __lsx_vshuf_b( + base64_tbl3, base64_tbl2, __lsx_vsub_b(indices, __lsx_vldi(32))); + __m128i indices_mask = __lsx_vslei_bu(indices, 31); + __m128i indices_result = + __lsx_vbitsel_v(indices_shuf1, indices_shuf0, indices_mask); + + __lsx_vst(indices_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; + } + + return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, + srclen - i, options); +} + +static inline void compress(__m128i data, uint16_t mask, char *output) { + if (mask == 0) { + __lsx_vst(data, reinterpret_cast<__m128i *>(output), 0); + return; + } + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + + v2u64 shufmask = {tables::base64::thintable_epi8[mask1], + tables::base64::thintable_epi8[mask2]}; + + // we increment by 0x08 the second half of the mask + v4u32 hi = {0, 0, 0x08080808, 0x08080808}; + __m128i shufmask1 = __lsx_vadd_b((__m128i)shufmask, (__m128i)hi); + + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(data, data, shufmask1); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + __lsx_vld(reinterpret_cast( + tables::base64::pshufb_combine_table + pop1 * 8), + 0); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + + __lsx_vst(answer, reinterpret_cast<__m128i *>(output), 0); +} + +struct block64 { + __m128i chunks[4]; +}; + +template +static inline uint16_t to_base64_mask(__m128i *src, bool *error) { + const v16u8 ascii_space_tbl = {0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}; + // credit: aqrit + /* + '0'(0x30)-'9'(0x39) => delta_values_index = 4 + 'A'(0x41)-'Z'(0x5a) => delta_values_index = 4/5/12(4+8) + 'a'(0x61)-'z'(0x7a) => delta_values_index = 6/7/14(6+8) + '+'(0x2b) => delta_values_index = 3 + '/'(0x2f) => delta_values_index = 2+8 = 10 + '-'(0x2d) => delta_values_index = 2+8 = 10 + '_'(0x5f) => delta_values_index = 5+8 = 13 + */ + v16u8 delta_asso; + if (default_or_url) { + delta_asso = v16u8{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16}; + } else { + delta_asso = v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x0, 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}; + } + v16i8 delta_values; + if (default_or_url) { + delta_values = + v16i8{int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0xFF), int8_t(0x11), + int8_t(0xFF), int8_t(0xBF), int8_t(0x10), int8_t(0xB9)}; + } else if (base64_url) { + delta_values = + v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), + int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}; + } else { + delta_values = + v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}; + } + + v16u8 check_asso; + if (default_or_url) { + check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0E, 0x0B, 0x06}; + } else if (base64_url) { + check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x06, 0x0B, 0x12}; + } else { + check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F}; + } + + v16i8 check_values; + if (default_or_url) { + check_values = + v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0xA1), int8_t(0x00), int8_t(0x80), + int8_t(0x00), int8_t(0x80), int8_t(0x00), int8_t(0x80)}; + } else if (base64_url) { + check_values = v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), + int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}; + } else { + check_values = + v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}; + } + + const __m128i shifted = __lsx_vsrli_b(*src, 3); + __m128i asso_index = __lsx_vand_v(*src, __lsx_vldi(0xF)); + const __m128i delta_hash = + __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)delta_asso, (__m128i)delta_asso, + (__m128i)asso_index), + shifted); + const __m128i check_hash = + __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)check_asso, (__m128i)check_asso, + (__m128i)asso_index), + shifted); + + const __m128i out = + __lsx_vsadd_b(__lsx_vshuf_b((__m128i)delta_values, (__m128i)delta_values, + (__m128i)delta_hash), + *src); + const __m128i chk = + __lsx_vsadd_b(__lsx_vshuf_b((__m128i)check_values, (__m128i)check_values, + (__m128i)check_hash), + *src); + unsigned int mask = __lsx_vpickve2gr_hu(__lsx_vmskltz_b(chk), 0); + if (mask) { + __m128i ascii_space = __lsx_vseq_b(__lsx_vshuf_b((__m128i)ascii_space_tbl, + (__m128i)ascii_space_tbl, + (__m128i)asso_index), + *src); + *error |= + (mask != __lsx_vpickve2gr_hu(__lsx_vmskltz_b((__m128i)ascii_space), 0)); + } + + *src = out; + return (uint16_t)mask; +} + +template +static inline uint64_t to_base64_mask(block64 *b, bool *error) { + *error = 0; + uint64_t m0 = + to_base64_mask(&b->chunks[0], error); + uint64_t m1 = + to_base64_mask(&b->chunks[1], error); + uint64_t m2 = + to_base64_mask(&b->chunks[2], error); + uint64_t m3 = + to_base64_mask(&b->chunks[3], error); + return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); +} + +static inline void copy_block(block64 *b, char *output) { + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output), 16); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output), 32); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output), 48); +} + +static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { + uint64_t nmask = ~mask; + uint64_t count = + __lsx_vpickve2gr_d(__lsx_vpcnt_h(__lsx_vreplgr2vr_d(nmask)), 0); + uint16_t *count_ptr = (uint16_t *)&count; + compress(b->chunks[0], uint16_t(mask), output); + compress(b->chunks[1], uint16_t(mask >> 16), output + count_ptr[0]); + compress(b->chunks[2], uint16_t(mask >> 32), + output + count_ptr[0] + count_ptr[1]); + compress(b->chunks[3], uint16_t(mask >> 48), + output + count_ptr[0] + count_ptr[1] + count_ptr[2]); + return count_ones(nmask); +} + +template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } + +inline size_t compress_block_single(block64 *b, uint64_t mask, char *output) { + const size_t pos64 = trailing_zeroes(mask); + const int8_t pos = pos64 & 0xf; + // Predefine the index vector + const v16u8 v1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + + switch (pos64 >> 4) { + case 0b00: { + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); // v1 > v0 + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(b->chunks[0], b->chunks[0], sh); + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 0 * 16), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output + 1 * 16 - 1), + 0); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output + 2 * 16 - 1), + 0); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output + 3 * 16 - 1), + 0); + } break; + + case 0b01: { + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output + 0 * 16), 0); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(b->chunks[1], b->chunks[1], sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 1 * 16), 0); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output + 2 * 16 - 1), + 0); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output + 3 * 16 - 1), + 0); + } break; + + case 0b10: { + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output + 0 * 16), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output + 1 * 16), 0); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(b->chunks[2], b->chunks[2], sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 2 * 16), 0); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output + 3 * 16 - 1), + 0); + } break; + + case 0b11: { + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output + 0 * 16), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output + 1 * 16), 0); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output + 2 * 16), 0); + + const __m128i v0 = __lsx_vreplgr2vr_b((uint8_t)(pos - 1)); + const __m128i v2 = __lsx_vslt_b(v0, (__m128i)v1); + const __m128i sh = __lsx_vsub_b((__m128i)v1, v2); + const __m128i compressed = __lsx_vshuf_b(b->chunks[3], b->chunks[3], sh); + + __lsx_vst(compressed, reinterpret_cast<__m128i *>(output + 3 * 16), 0); + } break; + } + return 63; +} + +// The caller of this function is responsible to ensure that there are 64 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char *src) { + b->chunks[0] = __lsx_vld(reinterpret_cast(src), 0); + b->chunks[1] = __lsx_vld(reinterpret_cast(src), 16); + b->chunks[2] = __lsx_vld(reinterpret_cast(src), 32); + b->chunks[3] = __lsx_vld(reinterpret_cast(src), 48); +} + +// The caller of this function is responsible to ensure that there are 128 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char16_t *src) { + __m128i m1 = __lsx_vld(reinterpret_cast(src), 0); + __m128i m2 = __lsx_vld(reinterpret_cast(src), 16); + __m128i m3 = __lsx_vld(reinterpret_cast(src), 32); + __m128i m4 = __lsx_vld(reinterpret_cast(src), 48); + __m128i m5 = __lsx_vld(reinterpret_cast(src), 64); + __m128i m6 = __lsx_vld(reinterpret_cast(src), 80); + __m128i m7 = __lsx_vld(reinterpret_cast(src), 96); + __m128i m8 = __lsx_vld(reinterpret_cast(src), 112); + b->chunks[0] = __lsx_vssrlni_bu_h(m2, m1, 0); + b->chunks[1] = __lsx_vssrlni_bu_h(m4, m3, 0); + b->chunks[2] = __lsx_vssrlni_bu_h(m6, m5, 0); + b->chunks[3] = __lsx_vssrlni_bu_h(m8, m7, 0); +} + +static inline void base64_decode(char *out, __m128i str) { + __m128i t0 = __lsx_vor_v( + __lsx_vslli_w(str, 26), + __lsx_vslli_w(__lsx_vand_v(str, lsx_splat_u32(0x0000FF00)), 12)); + __m128i t1 = __lsx_vsrli_w(__lsx_vand_v(str, lsx_splat_u32(0x003F0000)), 2); + __m128i t2 = __lsx_vor_v(t0, t1); + __m128i t3 = __lsx_vor_v(t2, __lsx_vsrli_w(str, 16)); + const v16u8 pack_shuffle = {3, 2, 1, 7, 6, 5, 11, 10, + 9, 15, 14, 13, 0, 0, 0, 0}; + t3 = __lsx_vshuf_b(t3, t3, (__m128i)pack_shuffle); + + // Store the output: + // we only need 12. + __lsx_vstelm_d(t3, out, 0, 0); + __lsx_vstelm_w(t3, out + 8, 0, 2); +} +// decode 64 bytes and output 48 bytes +static inline void base64_decode_block(char *out, const char *src) { + base64_decode(out, __lsx_vld(reinterpret_cast(src), 0)); + base64_decode(out + 12, + __lsx_vld(reinterpret_cast(src), 16)); + base64_decode(out + 24, + __lsx_vld(reinterpret_cast(src), 32)); + base64_decode(out + 36, + __lsx_vld(reinterpret_cast(src), 48)); +} +static inline void base64_decode_block_safe(char *out, const char *src) { + base64_decode_block(out, src); +} +static inline void base64_decode_block(char *out, block64 *b) { + base64_decode(out, b->chunks[0]); + base64_decode(out + 12, b->chunks[1]); + base64_decode(out + 24, b->chunks[2]); + base64_decode(out + 36, b->chunks[3]); +} +static inline void base64_decode_block_safe(char *out, block64 *b) { + base64_decode_block(out, b); +} + +template +full_result +compress_decode_base64(char *dst, const char_type *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = + default_or_url ? tables::base64::to_base64_default_or_url_value + : (base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value); + auto ri = simdutf::scalar::base64::find_end(src, srclen, options); + size_t equallocation = ri.equallocation; + size_t equalsigns = ri.equalsigns; + srclen = ri.srclen; + size_t full_input_length = ri.full_input_length; + if (srclen == 0) { + if (!ignore_garbage && equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, full_input_length, 0}; + } + const char_type *const srcinit = src; + const char *const dstinit = dst; + const char_type *const srcend = src + srclen; + + constexpr size_t block_size = 10; + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const char_type *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b; + load_block(&b, src); + src += 64; + bool error = false; + uint64_t badcharmask = + to_base64_mask(&b, &error); + if (badcharmask) { + if (error && !ignore_garbage) { + src -= 64; + while (src < srcend && scalar::base64::is_eight_byte(*src) && + to_base64[uint8_t(*src)] <= 64) { + src++; + } + if (src < srcend) { + // should never happen + } + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + } + + if (badcharmask != 0) { + if (is_power_of_two(badcharmask)) { + bufferptr += compress_block_single(&b, badcharmask, bufferptr); + } else { + bufferptr += compress_block(&b, badcharmask, bufferptr); + } + } else { + // optimization opportunity: if bufferptr == buffer and mask == 0, we + // can avoid the call to compress_block and decode directly. + copy_block(&b, bufferptr); + bufferptr += 64; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 1); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if ((!scalar::base64::is_eight_byte(*src) || val > 64) && + !ignore_garbage) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + base64_decode_block(dst, buffer_start); + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + // lsx is little-endian + triple = scalar::u32_swap_bytes(triple); + std::memcpy(dst, &triple, 4); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + // lsx is little-endian + triple = scalar::u32_swap_bytes(triple); + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + if (!ignore_garbage) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + } else { + while (to_base64[uint8_t(*(src - 1))] >= 64) { + src--; + } + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r = scalar::base64::patch_tail_result( + r, size_t(src - srcinit), size_t(dst - dstinit), equallocation, + full_input_length, last_chunk_options); + // When is_partial(last_chunk_options) is true, we must either end with + // the end of the stream (beyond whitespace) or right after a non-ignorable + // character or at the very beginning of the stream. + // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS && + r.input_count < full_input_length) { + // First check if we can extend the input to the end of the stream + while (r.input_count < full_input_length && + base64_ignorable(*(srcinit + r.input_count), options)) { + r.input_count++; + } + // If we are still not at the end of the stream, then we must backtrack + // to the last non-ignorable character. + if (r.input_count < full_input_length) { + while (r.input_count > 0 && + base64_ignorable(*(srcinit + r.input_count - 1), options)) { + r.input_count--; + } + } + } + return r; + } + if (equalsigns > 0 && !ignore_garbage) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} +/* end file src/lsx/lsx_base64.cpp */ +/* begin file src/lsx/lsx_find.cpp */ +simdutf_really_inline const char *util_find(const char *start, const char *end, + char character) noexcept { + if (start >= end) + return end; + + const int step = 16; + __m128i char_vec = __lsx_vreplgr2vr_b(static_cast(character)); + + while (end - start >= step) { + __m128i data = __lsx_vld(reinterpret_cast(start), 0); + __m128i cmp = __lsx_vseq_b(data, char_vec); + if (__lsx_bnz_v(cmp)) { + uint16_t mask = + static_cast(__lsx_vpickve2gr_hu(__lsx_vmsknz_b(cmp), 0)); + return start + trailing_zeroes(mask); + } + + start += step; + } + + // Handle remaining bytes with scalar loop + for (; start < end; ++start) { + if (*start == character) { + return start; + } + } + + return end; +} + +simdutf_really_inline const char16_t *util_find(const char16_t *start, + const char16_t *end, + char16_t character) noexcept { + if (start >= end) + return end; + + const int step = 8; + __m128i char_vec = __lsx_vreplgr2vr_h(static_cast(character)); + + while (end - start >= step) { + __m128i data = __lsx_vld(reinterpret_cast(start), 0); + __m128i cmp = __lsx_vseq_h(data, char_vec); + if (__lsx_bnz_v(cmp)) { + uint16_t mask = + static_cast(__lsx_vpickve2gr_hu(__lsx_vmsknz_b(cmp), 0)); + return start + trailing_zeroes(mask) / 2; + } + + start += step; + } + + // Handle remaining elements with scalar loop + for (; start < end; ++start) { + if (*start == character) { + return start; + } + } + + return end; +} +/* end file src/lsx/lsx_find.cpp */ +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace +} // namespace lsx +} // namespace simdutf + +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace lsx { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); + +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} + +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; +} + +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_validation { + +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); +} + +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; + } + + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + } + } + + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_validation { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); +} + +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); +} + +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); +} + +} // namespace utf8_validation +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +#if SIMDUTF_FEATURE_ASCII +/* begin file src/generic/ascii_validation.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace ascii_validation { + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); + + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +bool generic_validate_ascii(const char *input, size_t length) { + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + return false; + } + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + return in.is_ascii(); +} + +} // namespace ascii_validation +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/ascii_validation.h */ +#endif // SIMDUTF_FEATURE_ASCII + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + // transcoding from UTF-8 to Latin 1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, + + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace lsx +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + // transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_utf16 { + +using namespace simd; + +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_utf16 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } + + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 2; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + + size_t iterations = 0; + size_t pos = 0; + size_t count = 0; + for (; pos + N <= size; pos += N) { + const auto input = + vector_i8::load(reinterpret_cast(in + pos)); + + const auto continuation = input > int8_t(-65); + const auto utf_4bytes = vector_u8(input.value) >= uint8_t(240); + + local -= vector_u8(continuation); + local -= vector_u8(utf_4bytes); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + // transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_utf32 { + +using namespace simd; + +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_utf32 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 +/* begin file src/generic/utf8.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +#ifdef SIMDUTF_SIMD_HAS_BYTEMASK +simdutf_really_inline size_t count_code_points_bytemask(const char *in, + size_t size) { + using vector_i8 = simd8; + using vector_u8 = simd8; + using vector_u64 = simd64; + + constexpr size_t N = vector_i8::SIZE; + constexpr size_t max_iterations = 255 / 4; + + size_t pos = 0; + size_t count = 0; + + auto counters = vector_u64::zero(); + auto local = vector_u8::zero(); + size_t iterations = 0; + for (; pos + 4 * N <= size; pos += 4 * N) { + const auto input0 = + simd8::load(reinterpret_cast(in + pos + 0 * N)); + const auto input1 = + simd8::load(reinterpret_cast(in + pos + 1 * N)); + const auto input2 = + simd8::load(reinterpret_cast(in + pos + 2 * N)); + const auto input3 = + simd8::load(reinterpret_cast(in + pos + 3 * N)); + const auto mask0 = input0 > int8_t(-65); + const auto mask1 = input1 > int8_t(-65); + const auto mask2 = input2 > int8_t(-65); + const auto mask3 = input3 > int8_t(-65); + + local -= vector_u8(mask0); + local -= vector_u8(mask1); + local -= vector_u8(mask2); + local -= vector_u8(mask3); + + iterations += 1; + if (iterations == max_iterations) { + counters += sum_8bytes(local); + local = vector_u8::zero(); + iterations = 0; + } + } + + if (iterations > 0) { + count += local.sum_bytes(); + } + + count += counters.sum(); + + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} +#endif // SIMDUTF_SIMD_HAS_BYTEMASK + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} + +} // namespace utf8 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8.h */ +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF16 +/* begin file src/generic/utf16/count_code_points_bytemask.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf16 { + +using namespace simd; + +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + size_t pos = 0; + size_t count = 0; + + constexpr size_t max_iterations = 65535; + const auto one = vector_u16::splat(1); + const auto zero = vector_u16::zero(); + + size_t iteration = 0; + + auto counters = zero; + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(in + pos); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + + const auto t0 = input & uint16_t(0xfc00); + const auto t1 = t0 ^ uint16_t(0xdc00); + + // t2[0] == 1 iff input[0] outside range 0xdc00..dfff (the word is not a + // high surrogate) + const auto t2 = min(t1, one); + + counters += t2; + + iteration += 1; + if (iteration == max_iterations) { + count += counters.sum(); + counters = zero; + iteration = 0; + } + } + + if (iteration > 0) { + count += counters.sum(); + } + + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf16/count_code_points_bytemask.h */ +/* begin file src/generic/utf16/change_endianness.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf16 { + +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; + + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } + + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf16/change_endianness.h */ +/* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf16 { + +using namespace simd; + +template +simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, + size_t size) { + size_t pos = 0; + + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + + // each char16 yields at least one byte + size_t count = size / N * N; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + for (; pos < size / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + /* + Explanation how the counting works. + + In the case of a non-surrogate character we count: + * always 1 -- see how `count` is initialized above; + * c0 = 1 if the current char yields 2 or 3 bytes; + * c1 = 1 if the current char yields 3 bytes. + + Thus, we always have correct count for the current char: + from 1, 2 or 3 bytes. + + A trickier part is how we count surrogate pairs. Whether + we encounter a surrogate (low or high), we count it as + 3 chars and then minus 1 (`is_surrogate` is -1 or 0). + Each surrogate char yields 2. A surrogate pair, that + is a low surrogate followed by a high one, yields + the expected 4 bytes. + + It also correctly handles cases when low surrogate is + processed by the this loop, but high surrogate is counted + by the scalar procedure. The scalar procedure uses exactly + the described approach, thanks to that for valid UTF-16 + strings it always count correctly. + */ + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + } + + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline result +utf8_length_from_utf16_with_replacement(const char16_t *in, size_t size) { + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + if (N + 1 > size) { + return scalar::utf16::utf8_length_from_utf16_with_replacement( + in, size); + } // special case for short inputs + size_t pos = 0; + bool any_surrogates = false; + + const auto one = vector_u16::splat(1); + + auto v_count = vector_u16::zero(); + auto v_mismatched_count = vector_u16::zero(); + + size_t count = 0; + size_t mismatched_count = 0; + + // in a single iteration the increment is 0, 1 or 2, despite we have + // three additions + constexpr size_t max_iterations = 65535 / 2; + size_t iteration = max_iterations; + + if (scalar::utf16::is_low_surrogate(in[0])) { + any_surrogates = true; + mismatched_count += 1; + } + + for (; pos < (size - 1) / N * N; pos += N) { + auto input = vector_u16::load(reinterpret_cast(in + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input = input.swap_bytes(); + } + // 0xd800 .. 0xdbff - low surrogate + // 0xdc00 .. 0xdfff - high surrogate + const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); + + // c0 - chars that yield 2- or 3-byte UTF-8 codes + const auto c0 = min(input & uint16_t(0xff80), one); + + // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) + const auto c1 = min(input & uint16_t(0xf800), one); + + v_count += c0; + v_count += c1; + v_count += vector_u16(is_surrogate); + if (is_surrogate.to_bitmask() != 0 || + scalar::utf16::is_low_surrogate(in[pos + N])) { + any_surrogates = true; + auto input_next = + vector_u16::load(reinterpret_cast(in + pos + 1)); + if simdutf_constexpr (!match_system(big_endian)) { + input_next = input_next.swap_bytes(); + } + + const auto lb_masked = input & (0xfc00); + const auto block_masked = input_next & (0xfc00); + + const auto lb_is_high = lb_masked == (0xd800); + const auto block_is_low = block_masked == (0xdc00); + + const auto illseq = min(vector_u16(lb_is_high ^ block_is_low), one); + + v_mismatched_count += illseq; + } + + iteration -= 1; + if (iteration == 0) { + count += v_count.sum(); + v_count = vector_u16::zero(); + mismatched_count += v_mismatched_count.sum(); + v_mismatched_count = vector_u16::zero(); + iteration = max_iterations; + } + } + + if (iteration > 0) { + count += v_count.sum(); + mismatched_count += v_mismatched_count.sum(); + } + + if (scalar::utf16::is_low_surrogate(in[pos])) { + any_surrogates = true; + if (!scalar::utf16::is_high_surrogate(in[pos - 1])) { + mismatched_count -= 1; + count += 2; + pos += 1; + } + } + count += pos; + count += mismatched_count; + if (scalar::utf16::is_high_surrogate(in[pos - 1])) { + any_surrogates = true; + if (pos == size) { + count += 2; + } else if (scalar::utf16::is_low_surrogate(in[pos])) { + pos += 1; + count += 2; + } + } + result scalar_result = + scalar::utf16::utf8_length_from_utf16_with_replacement( + in + pos, size - pos); + return {any_surrogates ? SURROGATE : scalar_result.error, + count + scalar_result.count}; +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ +/* begin file src/generic/utf16/utf32_length_from_utf16.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf16 { + +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf16/utf32_length_from_utf16.h */ +/* begin file src/generic/utf16/to_well_formed.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf16 { + +// Note: this is direct translation of westmere implementation. + +/* + * Process one block of 8 characters. If in_place is false, + * copy the block from in to out. If there is a sequencing + * error in the block, overwrite the illsequenced characters + * with the replacement character. This function reads one + * character before the beginning of the buffer as a lookback. + * If that character is illsequenced, it too is overwritten. + */ +template +simdutf_really_inline void utf16fix_block(char16_t *out, const char16_t *in) { + const char16_t replacement = scalar::utf16::replacement(); + + using vector_u16 = simd16; + auto swap_if_needed = [](uint16_t x) simdutf_constexpr -> uint16_t { + return scalar::utf16::swap_if_needed(x); + }; + + const auto lookback = vector_u16::load(in - 1); + const auto block = vector_u16::load(in); + + const auto lb_masked = lookback & swap_if_needed(0xfc00); + const auto block_masked = block & swap_if_needed(0xfc00); + + const auto lb_is_high = lb_masked == swap_if_needed(0xd800); + const auto block_is_low = block_masked == swap_if_needed(0xdc00); + const auto illseq = lb_is_high ^ block_is_low; + if (!illseq.is_zero()) { + /* compute the cause of the illegal sequencing */ + const auto lb_illseq = ~block_is_low & lb_is_high; + const auto block_illseq = + (~lb_is_high & block_is_low) | lb_illseq.template byte_right_shift<2>(); + + /* fix illegal sequencing in the lookback */ + const auto lb = lb_illseq.first(); + out[-1] = char16_t((lb & replacement) | (~lb & out[-1])); + /* fix illegal sequencing in the main block */ + const auto mask = as_vector_u16(block_illseq); + const auto fixed = (~mask & block) | (mask & replacement); + + fixed.store(reinterpret_cast(out)); + } else if (!in_place) { + block.store(reinterpret_cast(out)); + } +} + +template +void to_well_formed(const char16_t *in, size_t n, char16_t *out) { + using vector_u16 = simd16; + constexpr size_t N = vector_u16::ELEMENTS; + + if (n < N + 1) { + scalar::utf16::to_well_formed_utf16(in, n, out); + return; + } + + const char16_t replacement = scalar::utf16::replacement(); + + out[0] = + scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; + + /* duplicate code to have the compiler specialise utf16fix_block() */ + if (in == out) { + constexpr bool inplace = true; + for (size_t i = 1; i + N < n; i += N) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - N, in + n - N); + } else { + constexpr bool copy_data = false; + for (size_t i = 1; i + N < n; i += N) { + utf16fix_block(out + i, in + i); + } + + utf16fix_block(out + n - N, in + n - N); + } + + out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) + ? replacement + : out[n - 1]; +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf16/to_well_formed.h */ +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +/* begin file src/generic/validate_utf16.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf16 { +/* + UTF-16 validation + -------------------------------------------------- + + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We are going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ +template +const result validate_utf16_with_errors(const char16_t *input, size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 + // and yields a single vector having only higher bytes of characters. + const auto in = utf16_gather_high_bytes(in0, in1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} + +template +const result validate_utf16_as_ascii_with_errors(const char16_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + size_t pos = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input_vec( + reinterpret_cast(input + pos)); + if simdutf_constexpr (!match_system(big_endian)) { + input_vec.swap_bytes(); + } + uint64_t matches = input_vec.lteq(uint16_t(0x7f)); + if (~matches) { + // Found a match, return the first one + int index = trailing_zeroes(~matches) / 2; + return result(error_code::TOO_LARGE, pos + index); + } + } + + // Scalar tail + while (pos < size) { + + char16_t v = scalar::utf16::swap_if_needed(input[pos]); + if (v > 0x7F) { + return result(error_code::TOO_LARGE, pos); + } + pos++; + } + return result(error_code::SUCCESS, size); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/validate_utf16.h */ +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +/* begin file src/generic/utf32.h */ +#include + +namespace simdutf { +namespace lsx { +namespace { +namespace utf32 { + +template T min(T a, T b) { return a <= b ? a : b; } + +simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, + size_t length) { + using vector_u32 = simd32; + + const char32_t *start = input; + + // we add up to three ones in a single iteration (see the vectorized loop in + // section #2 below) + const size_t max_increment = 3; + + const size_t N = vector_u32::ELEMENTS; + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + const auto v_0000007f = vector_u32::splat(0x0000007f); + const auto v_000007ff = vector_u32::splat(0x000007ff); + const auto v_0000ffff = vector_u32::splat(0x0000ffff); +#else + const auto v_ffffff80 = vector_u32::splat(0xffffff80); + const auto v_fffff800 = vector_u32::splat(0xfffff800); + const auto v_ffff0000 = vector_u32::splat(0xffff0000); + const auto one = vector_u32::splat(1); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + size_t counter = 0; + + // 1. vectorized loop unrolled 4 times + { + // we use vector of uint32 counters, this is why this limit is used + const size_t max_iterations = + std::numeric_limits::max() / (max_increment * 4); + size_t blocks = length / (N * 4); + length -= blocks * (N * 4); + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + simd32 acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in0 = vector_u32(input + 0 * N); + const auto in1 = vector_u32(input + 1 * N); + const auto in2 = vector_u32(input + 2 * N); + const auto in3 = vector_u32(input + 3 * N); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in0 > v_0000007f); + acc -= as_vector_u32(in1 > v_0000007f); + acc -= as_vector_u32(in2 > v_0000007f); + acc -= as_vector_u32(in3 > v_0000007f); + + acc -= as_vector_u32(in0 > v_000007ff); + acc -= as_vector_u32(in1 > v_000007ff); + acc -= as_vector_u32(in2 > v_000007ff); + acc -= as_vector_u32(in3 > v_000007ff); + + acc -= as_vector_u32(in0 > v_0000ffff); + acc -= as_vector_u32(in1 > v_0000ffff); + acc -= as_vector_u32(in2 > v_0000ffff); + acc -= as_vector_u32(in3 > v_0000ffff); +#else + acc += min(one, in0 & v_ffffff80); + acc += min(one, in1 & v_ffffff80); + acc += min(one, in2 & v_ffffff80); + acc += min(one, in3 & v_ffffff80); + + acc += min(one, in0 & v_fffff800); + acc += min(one, in1 & v_fffff800); + acc += min(one, in2 & v_fffff800); + acc += min(one, in3 & v_fffff800); + + acc += min(one, in0 & v_ffff0000); + acc += min(one, in1 & v_ffff0000); + acc += min(one, in2 & v_ffff0000); + acc += min(one, in3 & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += 4 * N; + } + + counter += acc.sum(); + } + } + + // 2. vectorized loop for tail + { + const size_t max_iterations = + std::numeric_limits::max() / max_increment; + size_t blocks = length / N; + length -= blocks * N; + while (blocks != 0) { + const size_t iterations = min(blocks, max_iterations); + blocks -= iterations; + + auto acc = vector_u32::zero(); + for (size_t i = 0; i < iterations; i++) { + const auto in = vector_u32(input); + +#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP + acc -= as_vector_u32(in > v_0000007f); + acc -= as_vector_u32(in > v_000007ff); + acc -= as_vector_u32(in > v_0000ffff); +#else + acc += min(one, in & v_ffffff80); + acc += min(one, in & v_fffff800); + acc += min(one, in & v_ffff0000); +#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP + + input += N; + } + + counter += acc.sum(); + } + } + + const size_t consumed = input - start; + if (consumed != 0) { + // We don't count 0th bytes in the vectorized loops above, this + // is why we need to count them in the end. + counter += consumed; + } + + return counter + scalar::utf32::utf8_length_from_utf32(input, length); +} + +} // namespace utf32 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf32.h */ +#endif // SIMDUTF_FEATURE_UTF32 + +// +// Implementation-specific overrides +// +namespace simdutf { +namespace lsx { + +#if SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + // todo: reimplement as a one-pass algorithm. + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; +} +#endif // SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return lsx::utf8_validation::generic_validate_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return lsx::utf8_validation::generic_validate_utf8_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return lsx::ascii_validation::generic_validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return lsx::ascii_validation::generic_validate_ascii_with_errors(buf, len); +} +#endif // SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +simdutf_warn_unused bool +implementation::validate_utf16le_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return lsx::utf16::validate_utf16_as_ascii_with_errors( + buf, len) + .error == SUCCESS; +} + +simdutf_warn_unused bool +implementation::validate_utf16be_as_ascii(const char16_t *buf, + size_t len) const noexcept { + return lsx::utf16::validate_utf16_as_ascii_with_errors(buf, + len) + .error == SUCCESS; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII +#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const auto res = + lsx::utf16::validate_utf16_with_errors(buf, len); + + if (res.is_err()) { + return false; + } + + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; +} +#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const auto res = + lsx::utf16::validate_utf16_with_errors(buf, len); + + if (res.is_err()) { + return false; + } + + if (res.count != len) { + return scalar::utf16::validate(buf + res.count, + len - res.count); + } + + return true; +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + const result res = + lsx::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + const result scalar_res = + scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + const result res = + lsx::utf16::validate_utf16_with_errors(buf, len); + if (res.count != len) { + const result scalar_res = + scalar::utf16::validate_with_errors(buf + res.count, + len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) const noexcept { + utf16::to_well_formed(input, len, output); +} + +void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) const noexcept { + utf16::to_well_formed(input, len, output); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char32_t *tail = lsx_validate_utf32le(buf, len); + if (tail) { + return scalar::utf32::validate(tail, len - (tail - buf)); + } else { + return false; + } +} +#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING + +#if SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = lsx_validate_utf32le_with_errors(buf, len); + if (res.count != len) { + result scalar_res = + scalar::utf32::validate_with_errors(buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} +#endif // SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lsx_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; + + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lsx_convert_latin1_to_utf16le(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lsx_convert_latin1_to_utf16be(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lsx_convert_latin1_to_utf32(buf, len, utf32_output); + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return lsx::utf8_to_latin1::convert_valid(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16le_to_latin1(buf, len, latin1_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return 0; + } + std::pair ret = + lsx_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} +#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf32_to_utf8(buf, len, utf8_output); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + const uint8_t *data = reinterpret_cast(input); + const uint8_t *data_end = data + length; + uint64_t result = 0; + while (data_end - data > 16) { + uint64_t two_bytes = 0; + __m128i input_vec = __lsx_vld(data, 0); + two_bytes = + __lsx_vpickve2gr_hu(__lsx_vpcnt_h(__lsx_vmskltz_b(input_vec)), 0); + result += 16 + two_bytes; + data += 16; + } + return result + scalar::latin1::utf8_length_from_latin1((const char *)data, + data_end - data); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_bytemask(input, + length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16_bytemask(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8_bytemask(input, length); +} +simdutf_warn_unused result +implementation::utf8_length_from_utf16le_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::LITTLE>(input, length); +} + +simdutf_warn_unused result +implementation::utf8_length_from_utf16be_with_replacement( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16_with_replacement< + endianness::BIG>(input, length); +} + +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return utf32::utf8_length_from_utf32(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const __m128i v_ffff = lsx_splat_u32(0x0000ffff); + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); + const __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); + size_t surrogate_count = __lsx_vpickve2gr_bu( + __lsx_vpcnt_b(__lsx_vmskltz_w(surrogate_bytemask)), 0); + count += 4 + surrogate_count; + } + return count + + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); +} +#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 + +#if SIMDUTF_FEATURE_BASE64 +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + if (options & base64_default_or_url) { + if (options == base64_options::base64_default_or_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else if (options & base64_url) { + if (options == base64_options::base64_url_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } else { + if (options == base64_options::base64_default_accept_garbage) { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } else { + return compress_decode_base64( + output, input, length, options, last_chunk_options); + } + } +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} + +size_t implementation::binary_to_base64_with_lines( + const char *input, size_t length, char *output, size_t line_length, + base64_options options) const noexcept { + return scalar::base64::tail_encode_base64_impl(output, input, length, + options, line_length); +} + +const char *implementation::find(const char *start, const char *end, + char character) const noexcept { + return util_find(start, end, character); +} + +const char16_t *implementation::find(const char16_t *start, const char16_t *end, + char16_t character) const noexcept { + return util_find(start, end, character); +} +#endif // SIMDUTF_FEATURE_BASE64 + +} // namespace lsx +} // namespace simdutf + +/* begin file src/simdutf/lsx/end.h */ +#undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP +/* end file src/simdutf/lsx/end.h */ +/* end file src/lsx/implementation.cpp */ +#endif + +/* begin file src/simdutf_c.cpp */ +/* begin file include/simdutf_c.h */ +/*** + * simdutf_c.h.h - C API for simdutf + * This is currently experimental. + * We are committed to keeping the C API, but there might be mistakes in our + * implementation. Please report any issues you find. + */ + +#ifndef SIMDUTF_C_H +#define SIMDUTF_C_H + +#include +#include +#include + +#ifdef __has_include + #if __has_include() + #include + #else // __has_include() + #define char16_t uint16_t + #define char32_t uint32_t + #endif // __has_include() +#else // __has_include() + #define char16_t uint16_t + #define char32_t uint32_t +#endif // __has_include + +#ifdef __cplusplus +extern "C" { +#endif + +/* C-friendly subset of simdutf errors */ +typedef enum simdutf_error_code { + SIMDUTF_ERROR_SUCCESS = 0, + SIMDUTF_ERROR_HEADER_BITS, + SIMDUTF_ERROR_TOO_SHORT, + SIMDUTF_ERROR_TOO_LONG, + SIMDUTF_ERROR_OVERLONG, + SIMDUTF_ERROR_TOO_LARGE, + SIMDUTF_ERROR_SURROGATE, + SIMDUTF_ERROR_INVALID_BASE64_CHARACTER, + SIMDUTF_ERROR_BASE64_INPUT_REMAINDER, + SIMDUTF_ERROR_BASE64_EXTRA_BITS, + SIMDUTF_ERROR_OUTPUT_BUFFER_TOO_SMALL, + SIMDUTF_ERROR_OTHER +} simdutf_error_code; + +typedef struct simdutf_result { + simdutf_error_code error; + size_t count; /* position of error or number of code units validated */ +} simdutf_result; + +typedef enum simdutf_encoding_type { + SIMDUTF_ENCODING_UNSPECIFIED = 0, + SIMDUTF_ENCODING_UTF8 = 1, + SIMDUTF_ENCODING_UTF16_LE = 2, + SIMDUTF_ENCODING_UTF16_BE = 4, + SIMDUTF_ENCODING_UTF32_LE = 8, + SIMDUTF_ENCODING_UTF32_BE = 16 +} simdutf_encoding_type; + +/* Validate UTF-8: returns true iff input is valid UTF-8 */ +bool simdutf_validate_utf8(const char *buf, size_t len); + +/* Validate UTF-8 with detailed result */ +simdutf_result simdutf_validate_utf8_with_errors(const char *buf, size_t len); + +/* Encoding detection */ +simdutf_encoding_type simdutf_autodetect_encoding(const char *input, + size_t length); +int simdutf_detect_encodings(const char *input, size_t length); + +/* ASCII validation */ +bool simdutf_validate_ascii(const char *buf, size_t len); +simdutf_result simdutf_validate_ascii_with_errors(const char *buf, size_t len); + +/* UTF-16 ASCII checks */ +bool simdutf_validate_utf16_as_ascii(const char16_t *buf, size_t len); +bool simdutf_validate_utf16be_as_ascii(const char16_t *buf, size_t len); +bool simdutf_validate_utf16le_as_ascii(const char16_t *buf, size_t len); + +/* UTF-16/UTF-8/UTF-32 validation (native/endian-specific) */ +bool simdutf_validate_utf16(const char16_t *buf, size_t len); +bool simdutf_validate_utf16le(const char16_t *buf, size_t len); +bool simdutf_validate_utf16be(const char16_t *buf, size_t len); +simdutf_result simdutf_validate_utf16_with_errors(const char16_t *buf, + size_t len); +simdutf_result simdutf_validate_utf16le_with_errors(const char16_t *buf, + size_t len); +simdutf_result simdutf_validate_utf16be_with_errors(const char16_t *buf, + size_t len); + +bool simdutf_validate_utf32(const char32_t *buf, size_t len); +simdutf_result simdutf_validate_utf32_with_errors(const char32_t *buf, + size_t len); + +/* to_well_formed UTF-16 helpers */ +void simdutf_to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output); +void simdutf_to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output); +void simdutf_to_well_formed_utf16(const char16_t *input, size_t len, + char16_t *output); + +/* Counting */ +size_t simdutf_count_utf16(const char16_t *input, size_t length); +size_t simdutf_count_utf16le(const char16_t *input, size_t length); +size_t simdutf_count_utf16be(const char16_t *input, size_t length); +size_t simdutf_count_utf8(const char *input, size_t length); + +/* Length estimators */ +size_t simdutf_utf8_length_from_latin1(const char *input, size_t length); +size_t simdutf_latin1_length_from_utf8(const char *input, size_t length); +size_t simdutf_latin1_length_from_utf16(size_t length); +size_t simdutf_latin1_length_from_utf32(size_t length); +size_t simdutf_utf16_length_from_utf8(const char *input, size_t length); +size_t simdutf_utf32_length_from_utf8(const char *input, size_t length); +size_t simdutf_utf8_length_from_utf16(const char16_t *input, size_t length); +simdutf_result +simdutf_utf8_length_from_utf16_with_replacement(const char16_t *input, + size_t length); +size_t simdutf_utf8_length_from_utf16le(const char16_t *input, size_t length); +size_t simdutf_utf8_length_from_utf16be(const char16_t *input, size_t length); +simdutf_result +simdutf_utf8_length_from_utf16le_with_replacement(const char16_t *input, + size_t length); +simdutf_result +simdutf_utf8_length_from_utf16be_with_replacement(const char16_t *input, + size_t length); + +/* Conversions: latin1 <-> utf8, utf8 <-> utf16/utf32, utf16 <-> utf8, etc. */ +size_t simdutf_convert_latin1_to_utf8(const char *input, size_t length, + char *output); +size_t simdutf_convert_latin1_to_utf8_safe(const char *input, size_t length, + char *output, size_t utf8_len); +size_t simdutf_convert_latin1_to_utf16le(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_latin1_to_utf16be(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_latin1_to_utf32(const char *input, size_t length, + char32_t *output); + +size_t simdutf_convert_utf8_to_latin1(const char *input, size_t length, + char *output); +size_t simdutf_convert_utf8_to_utf16le(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_utf8_to_utf16be(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_utf8_to_utf16(const char *input, size_t length, + char16_t *output); + +size_t simdutf_convert_utf8_to_utf32(const char *input, size_t length, + char32_t *output); +simdutf_result simdutf_convert_utf8_to_latin1_with_errors(const char *input, + size_t length, + char *output); +simdutf_result simdutf_convert_utf8_to_utf16_with_errors(const char *input, + size_t length, + char16_t *output); +simdutf_result simdutf_convert_utf8_to_utf16le_with_errors(const char *input, + size_t length, + char16_t *output); +simdutf_result simdutf_convert_utf8_to_utf16be_with_errors(const char *input, + size_t length, + char16_t *output); +simdutf_result simdutf_convert_utf8_to_utf32_with_errors(const char *input, + size_t length, + char32_t *output); + +/* Conversions assuming valid input */ +size_t simdutf_convert_valid_utf8_to_latin1(const char *input, size_t length, + char *output); +size_t simdutf_convert_valid_utf8_to_utf16le(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_valid_utf8_to_utf16be(const char *input, size_t length, + char16_t *output); +size_t simdutf_convert_valid_utf8_to_utf32(const char *input, size_t length, + char32_t *output); + +/* UTF-16 -> UTF-8 and related conversions */ +size_t simdutf_convert_utf16_to_utf8(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16le_to_utf8(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16be_to_utf8(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16_to_utf8_safe(const char16_t *input, size_t length, + char *output, size_t utf8_len); +size_t simdutf_convert_utf16_to_latin1(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16le_to_latin1(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_utf16be_to_latin1(const char16_t *input, size_t length, + char *output); +simdutf_result +simdutf_convert_utf16_to_latin1_with_errors(const char16_t *input, + size_t length, char *output); +simdutf_result +simdutf_convert_utf16le_to_latin1_with_errors(const char16_t *input, + size_t length, char *output); +simdutf_result +simdutf_convert_utf16be_to_latin1_with_errors(const char16_t *input, + size_t length, char *output); + +simdutf_result simdutf_convert_utf16_to_utf8_with_errors(const char16_t *input, + size_t length, + char *output); +simdutf_result +simdutf_convert_utf16le_to_utf8_with_errors(const char16_t *input, + size_t length, char *output); +simdutf_result +simdutf_convert_utf16be_to_utf8_with_errors(const char16_t *input, + size_t length, char *output); + +size_t simdutf_convert_valid_utf16_to_utf8(const char16_t *input, size_t length, + char *output); +size_t simdutf_convert_valid_utf16_to_latin1(const char16_t *input, + size_t length, char *output); +size_t simdutf_convert_valid_utf16le_to_latin1(const char16_t *input, + size_t length, char *output); +size_t simdutf_convert_valid_utf16be_to_latin1(const char16_t *input, + size_t length, char *output); + +size_t simdutf_convert_valid_utf16le_to_utf8(const char16_t *input, + size_t length, char *output); +size_t simdutf_convert_valid_utf16be_to_utf8(const char16_t *input, + size_t length, char *output); + +/* UTF-16 <-> UTF-32 conversions */ +size_t simdutf_convert_utf16_to_utf32(const char16_t *input, size_t length, + char32_t *output); +size_t simdutf_convert_utf16le_to_utf32(const char16_t *input, size_t length, + char32_t *output); +size_t simdutf_convert_utf16be_to_utf32(const char16_t *input, size_t length, + char32_t *output); +simdutf_result simdutf_convert_utf16_to_utf32_with_errors(const char16_t *input, + size_t length, + char32_t *output); +simdutf_result +simdutf_convert_utf16le_to_utf32_with_errors(const char16_t *input, + size_t length, char32_t *output); +simdutf_result +simdutf_convert_utf16be_to_utf32_with_errors(const char16_t *input, + size_t length, char32_t *output); + +/* Valid UTF-16 conversions */ +size_t simdutf_convert_valid_utf16_to_utf32(const char16_t *input, + size_t length, char32_t *output); +size_t simdutf_convert_valid_utf16le_to_utf32(const char16_t *input, + size_t length, char32_t *output); +size_t simdutf_convert_valid_utf16be_to_utf32(const char16_t *input, + size_t length, char32_t *output); + +/* UTF-32 -> ... conversions */ +size_t simdutf_convert_utf32_to_utf8(const char32_t *input, size_t length, + char *output); +simdutf_result simdutf_convert_utf32_to_utf8_with_errors(const char32_t *input, + size_t length, + char *output); +size_t simdutf_convert_valid_utf32_to_utf8(const char32_t *input, size_t length, + char *output); + +size_t simdutf_convert_utf32_to_utf16(const char32_t *input, size_t length, + char16_t *output); +size_t simdutf_convert_utf32_to_utf16le(const char32_t *input, size_t length, + char16_t *output); +size_t simdutf_convert_utf32_to_utf16be(const char32_t *input, size_t length, + char16_t *output); +simdutf_result +simdutf_convert_utf32_to_latin1_with_errors(const char32_t *input, + size_t length, char *output); + +/* --- Find helpers --- */ +const char *simdutf_find(const char *start, const char *end, char character); +const char16_t *simdutf_find_utf16(const char16_t *start, const char16_t *end, + char16_t character); + +/* --- Base64 enums and helpers --- */ +typedef enum simdutf_base64_options { + SIMDUTF_BASE64_DEFAULT = 0, + SIMDUTF_BASE64_URL = 1, + SIMDUTF_BASE64_DEFAULT_NO_PADDING = 2, + SIMDUTF_BASE64_URL_WITH_PADDING = 3, + SIMDUTF_BASE64_DEFAULT_ACCEPT_GARBAGE = 4, + SIMDUTF_BASE64_URL_ACCEPT_GARBAGE = 5, + SIMDUTF_BASE64_DEFAULT_OR_URL = 8, + SIMDUTF_BASE64_DEFAULT_OR_URL_ACCEPT_GARBAGE = 12 +} simdutf_base64_options; + +typedef enum simdutf_last_chunk_handling_options { + SIMDUTF_LAST_CHUNK_LOOSE = 0, + SIMDUTF_LAST_CHUNK_STRICT = 1, + SIMDUTF_LAST_CHUNK_STOP_BEFORE_PARTIAL = 2, + SIMDUTF_LAST_CHUNK_ONLY_FULL_CHUNKS = 3 +} simdutf_last_chunk_handling_options; + +/* maximal binary length estimators */ +size_t simdutf_maximal_binary_length_from_base64(const char *input, + size_t length); +size_t simdutf_maximal_binary_length_from_base64_utf16(const char16_t *input, + size_t length); + +/* base64 decoding/encoding */ +simdutf_result simdutf_base64_to_binary( + const char *input, size_t length, char *output, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options); +simdutf_result simdutf_base64_to_binary_utf16( + const char16_t *input, size_t length, char *output, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options); + +size_t simdutf_base64_length_from_binary(size_t length, + simdutf_base64_options options); +size_t simdutf_base64_length_from_binary_with_lines( + size_t length, simdutf_base64_options options, size_t line_length); + +size_t simdutf_binary_to_base64(const char *input, size_t length, char *output, + simdutf_base64_options options); +size_t simdutf_binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + simdutf_base64_options options); + +/* safe decoding that provides an in/out outlen parameter */ +simdutf_result simdutf_base64_to_binary_safe( + const char *input, size_t length, char *output, size_t *outlen, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options, + bool decode_up_to_bad_char); +simdutf_result simdutf_base64_to_binary_safe_utf16( + const char16_t *input, size_t length, char *output, size_t *outlen, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options, + bool decode_up_to_bad_char); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SIMDUTF_C_H */ +/* end file include/simdutf_c.h */ + +static simdutf_result to_c_result(const simdutf::result &r) { + simdutf_result out; + out.error = static_cast(r.error); + out.count = r.count; + return out; +} + +/* The C wrapper depends on the library features. Only expose the C API + when all relevant feature is enabled. This helps the + single-header generator to omit the C wrapper when features are + disabled. */ +// clang-format off +#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_ASCII && SIMDUTF_FEATURE_BASE64 && SIMDUTF_FEATURE_DETECT_ENCODING +// clang-format on +extern "C" { + +bool simdutf_validate_utf8(const char *buf, size_t len) { + return simdutf::validate_utf8(buf, len); +} + +simdutf_result simdutf_validate_utf8_with_errors(const char *buf, size_t len) { + return to_c_result(simdutf::validate_utf8_with_errors(buf, len)); +} + +simdutf_encoding_type simdutf_autodetect_encoding(const char *input, + size_t length) { + return static_cast( + simdutf::autodetect_encoding(input, length)); +} + +int simdutf_detect_encodings(const char *input, size_t length) { + return simdutf::detect_encodings(input, length); +} + +bool simdutf_validate_ascii(const char *buf, size_t len) { + return simdutf::validate_ascii(buf, len); +} +simdutf_result simdutf_validate_ascii_with_errors(const char *buf, size_t len) { + return to_c_result(simdutf::validate_ascii_with_errors(buf, len)); +} + +bool simdutf_validate_utf16_as_ascii(const char16_t *buf, size_t len) { + return simdutf::validate_utf16_as_ascii( + reinterpret_cast(buf), len); +} +bool simdutf_validate_utf16be_as_ascii(const char16_t *buf, size_t len) { + return simdutf::validate_utf16be_as_ascii( + reinterpret_cast(buf), len); +} +bool simdutf_validate_utf16le_as_ascii(const char16_t *buf, size_t len) { + return simdutf::validate_utf16le_as_ascii( + reinterpret_cast(buf), len); +} + +bool simdutf_validate_utf16(const char16_t *buf, size_t len) { + return simdutf::validate_utf16(reinterpret_cast(buf), len); +} +bool simdutf_validate_utf16le(const char16_t *buf, size_t len) { + return simdutf::validate_utf16le(reinterpret_cast(buf), + len); +} +bool simdutf_validate_utf16be(const char16_t *buf, size_t len) { + return simdutf::validate_utf16be(reinterpret_cast(buf), + len); +} +simdutf_result simdutf_validate_utf16_with_errors(const char16_t *buf, + size_t len) { + return to_c_result(simdutf::validate_utf16_with_errors( + reinterpret_cast(buf), len)); +} +simdutf_result simdutf_validate_utf16le_with_errors(const char16_t *buf, + size_t len) { + return to_c_result(simdutf::validate_utf16le_with_errors( + reinterpret_cast(buf), len)); +} +simdutf_result simdutf_validate_utf16be_with_errors(const char16_t *buf, + size_t len) { + return to_c_result(simdutf::validate_utf16be_with_errors( + reinterpret_cast(buf), len)); +} + +bool simdutf_validate_utf32(const char32_t *buf, size_t len) { + return simdutf::validate_utf32(reinterpret_cast(buf), len); +} +simdutf_result simdutf_validate_utf32_with_errors(const char32_t *buf, + size_t len) { + return to_c_result(simdutf::validate_utf32_with_errors( + reinterpret_cast(buf), len)); +} + +void simdutf_to_well_formed_utf16le(const char16_t *input, size_t len, + char16_t *output) { + simdutf::to_well_formed_utf16le(reinterpret_cast(input), + len, reinterpret_cast(output)); +} +void simdutf_to_well_formed_utf16be(const char16_t *input, size_t len, + char16_t *output) { + simdutf::to_well_formed_utf16be(reinterpret_cast(input), + len, reinterpret_cast(output)); +} +void simdutf_to_well_formed_utf16(const char16_t *input, size_t len, + char16_t *output) { + simdutf::to_well_formed_utf16(reinterpret_cast(input), len, + reinterpret_cast(output)); +} + +size_t simdutf_count_utf16(const char16_t *input, size_t length) { + return simdutf::count_utf16(reinterpret_cast(input), + length); +} +size_t simdutf_count_utf16le(const char16_t *input, size_t length) { + return simdutf::count_utf16le(reinterpret_cast(input), + length); +} +size_t simdutf_count_utf16be(const char16_t *input, size_t length) { + return simdutf::count_utf16be(reinterpret_cast(input), + length); +} +size_t simdutf_count_utf8(const char *input, size_t length) { + return simdutf::count_utf8(input, length); +} + +size_t simdutf_utf8_length_from_latin1(const char *input, size_t length) { + return simdutf::utf8_length_from_latin1(input, length); +} +size_t simdutf_latin1_length_from_utf8(const char *input, size_t length) { + return simdutf::latin1_length_from_utf8(input, length); +} +size_t simdutf_latin1_length_from_utf16(size_t length) { + return simdutf::latin1_length_from_utf16(length); +} +size_t simdutf_latin1_length_from_utf32(size_t length) { + return simdutf::latin1_length_from_utf32(length); +} +size_t simdutf_utf16_length_from_utf8(const char *input, size_t length) { + return simdutf::utf16_length_from_utf8(input, length); +} +size_t simdutf_utf32_length_from_utf8(const char *input, size_t length) { + return simdutf::utf32_length_from_utf8(input, length); +} +size_t simdutf_utf8_length_from_utf16(const char16_t *input, size_t length) { + return simdutf::utf8_length_from_utf16( + reinterpret_cast(input), length); +} +simdutf_result +simdutf_utf8_length_from_utf16_with_replacement(const char16_t *input, + size_t length) { + return to_c_result(simdutf::utf8_length_from_utf16_with_replacement( + reinterpret_cast(input), length)); +} +size_t simdutf_utf8_length_from_utf16le(const char16_t *input, size_t length) { + return simdutf::utf8_length_from_utf16le( + reinterpret_cast(input), length); +} +size_t simdutf_utf8_length_from_utf16be(const char16_t *input, size_t length) { + return simdutf::utf8_length_from_utf16be( + reinterpret_cast(input), length); +} +simdutf_result +simdutf_utf8_length_from_utf16le_with_replacement(const char16_t *input, + size_t length) { + return to_c_result(simdutf::utf8_length_from_utf16le_with_replacement( + reinterpret_cast(input), length)); +} +simdutf_result +simdutf_utf8_length_from_utf16be_with_replacement(const char16_t *input, + size_t length) { + return to_c_result(simdutf::utf8_length_from_utf16be_with_replacement( + reinterpret_cast(input), length)); +} + +/* Conversions: latin1 <-> utf8, utf8 <-> utf16/utf32, utf16 <-> utf8, etc. */ +size_t simdutf_convert_latin1_to_utf8(const char *input, size_t length, + char *output) { + return simdutf::convert_latin1_to_utf8(input, length, output); +} + +size_t simdutf_convert_latin1_to_utf8_safe(const char *input, size_t length, + char *output, size_t utf8_len) { + return simdutf::convert_latin1_to_utf8_safe(input, length, output, utf8_len); +} +size_t simdutf_convert_latin1_to_utf16le(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_latin1_to_utf16le( + input, length, reinterpret_cast(output)); +} +size_t simdutf_convert_latin1_to_utf16be(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_latin1_to_utf16be( + input, length, reinterpret_cast(output)); +} +size_t simdutf_convert_latin1_to_utf32(const char *input, size_t length, + char32_t *output) { + return simdutf::convert_latin1_to_utf32(input, length, + reinterpret_cast(output)); +} + +size_t simdutf_convert_utf8_to_latin1(const char *input, size_t length, + char *output) { + return simdutf::convert_utf8_to_latin1(input, length, output); +} +size_t simdutf_convert_utf8_to_utf16le(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_utf8_to_utf16le(input, length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf8_to_utf16(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_utf8_to_utf16(input, length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf8_to_utf16be(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_utf8_to_utf16be(input, length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf8_to_utf32(const char *input, size_t length, + char32_t *output) { + return simdutf::convert_utf8_to_utf32(input, length, + reinterpret_cast(output)); +} +simdutf_result simdutf_convert_utf8_to_latin1_with_errors(const char *input, + size_t length, + char *output) { + return to_c_result( + simdutf::convert_utf8_to_latin1_with_errors(input, length, output)); +} +simdutf_result simdutf_convert_utf8_to_utf16_with_errors(const char *input, + size_t length, + char16_t *output) { + return to_c_result(simdutf::convert_utf8_to_utf16_with_errors( + input, length, reinterpret_cast(output))); +} +simdutf_result simdutf_convert_utf8_to_utf16le_with_errors(const char *input, + size_t length, + char16_t *output) { + return to_c_result(simdutf::convert_utf8_to_utf16le_with_errors( + input, length, reinterpret_cast(output))); +} +simdutf_result simdutf_convert_utf8_to_utf16be_with_errors(const char *input, + size_t length, + char16_t *output) { + return to_c_result(simdutf::convert_utf8_to_utf16be_with_errors( + input, length, reinterpret_cast(output))); +} +simdutf_result simdutf_convert_utf8_to_utf32_with_errors(const char *input, + size_t length, + char32_t *output) { + return to_c_result(simdutf::convert_utf8_to_utf32_with_errors( + input, length, reinterpret_cast(output))); +} + +/* Conversions assuming valid input */ +size_t simdutf_convert_valid_utf8_to_latin1(const char *input, size_t length, + char *output) { + return simdutf::convert_valid_utf8_to_latin1(input, length, output); +} +size_t simdutf_convert_valid_utf8_to_utf16le(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_valid_utf8_to_utf16le( + input, length, reinterpret_cast(output)); +} +size_t simdutf_convert_valid_utf8_to_utf16be(const char *input, size_t length, + char16_t *output) { + return simdutf::convert_valid_utf8_to_utf16be( + input, length, reinterpret_cast(output)); +} +size_t simdutf_convert_valid_utf8_to_utf32(const char *input, size_t length, + char32_t *output) { + return simdutf::convert_valid_utf8_to_utf32( + input, length, reinterpret_cast(output)); +} + +/* UTF-16 -> UTF-8 and related conversions */ +size_t simdutf_convert_utf16_to_utf8(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16_to_utf8( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_utf16_to_utf8_safe(const char16_t *input, size_t length, + char *output, size_t utf8_len) { + return simdutf::convert_utf16_to_utf8_safe( + reinterpret_cast(input), length, output, utf8_len); +} +size_t simdutf_convert_utf16_to_latin1(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16_to_latin1( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_utf16le_to_latin1(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16le_to_latin1( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_utf16be_to_latin1(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16be_to_latin1( + reinterpret_cast(input), length, output); +} +simdutf_result +simdutf_convert_utf16_to_latin1_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16_to_latin1_with_errors( + reinterpret_cast(input), length, output)); +} +simdutf_result +simdutf_convert_utf16le_to_latin1_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16le_to_latin1_with_errors( + reinterpret_cast(input), length, output)); +} +simdutf_result +simdutf_convert_utf16be_to_latin1_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16be_to_latin1_with_errors( + reinterpret_cast(input), length, output)); +} + +simdutf_result simdutf_convert_utf16_to_utf8_with_errors(const char16_t *input, + size_t length, + char *output) { + return to_c_result(simdutf::convert_utf16_to_utf8_with_errors( + reinterpret_cast(input), length, output)); +} +simdutf_result +simdutf_convert_utf16le_to_utf8_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16le_to_utf8_with_errors( + reinterpret_cast(input), length, output)); +} +simdutf_result +simdutf_convert_utf16be_to_utf8_with_errors(const char16_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf16be_to_utf8_with_errors( + reinterpret_cast(input), length, output)); +} + +size_t simdutf_convert_utf16le_to_utf8(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16le_to_utf8( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_utf16be_to_utf8(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_utf16be_to_utf8( + reinterpret_cast(input), length, output); +} + +size_t simdutf_convert_valid_utf16_to_utf8(const char16_t *input, size_t length, + char *output) { + return simdutf::convert_valid_utf16_to_utf8( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_valid_utf16_to_latin1(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16_to_latin1( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_valid_utf16le_to_latin1(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16le_to_latin1( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_valid_utf16be_to_latin1(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16be_to_latin1( + reinterpret_cast(input), length, output); +} + +size_t simdutf_convert_valid_utf16le_to_utf8(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16le_to_utf8( + reinterpret_cast(input), length, output); +} +size_t simdutf_convert_valid_utf16be_to_utf8(const char16_t *input, + size_t length, char *output) { + return simdutf::convert_valid_utf16be_to_utf8( + reinterpret_cast(input), length, output); +} + +/* UTF-16 <-> UTF-32 conversions */ +size_t simdutf_convert_utf16_to_utf32(const char16_t *input, size_t length, + char32_t *output) { + return simdutf::convert_utf16_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf16le_to_utf32(const char16_t *input, size_t length, + char32_t *output) { + return simdutf::convert_utf16le_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf16be_to_utf32(const char16_t *input, size_t length, + char32_t *output) { + return simdutf::convert_utf16be_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +simdutf_result simdutf_convert_utf16_to_utf32_with_errors(const char16_t *input, + size_t length, + char32_t *output) { + return to_c_result(simdutf::convert_utf16_to_utf32_with_errors( + reinterpret_cast(input), length, + reinterpret_cast(output))); +} +simdutf_result +simdutf_convert_utf16le_to_utf32_with_errors(const char16_t *input, + size_t length, char32_t *output) { + return to_c_result(simdutf::convert_utf16le_to_utf32_with_errors( + reinterpret_cast(input), length, + reinterpret_cast(output))); +} +simdutf_result +simdutf_convert_utf16be_to_utf32_with_errors(const char16_t *input, + size_t length, char32_t *output) { + return to_c_result(simdutf::convert_utf16be_to_utf32_with_errors( + reinterpret_cast(input), length, + reinterpret_cast(output))); +} + +/* Valid UTF-16 conversions */ +size_t simdutf_convert_valid_utf16_to_utf32(const char16_t *input, + size_t length, char32_t *output) { + return simdutf::convert_valid_utf16_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_valid_utf16le_to_utf32(const char16_t *input, + size_t length, char32_t *output) { + return simdutf::convert_valid_utf16le_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_valid_utf16be_to_utf32(const char16_t *input, + size_t length, char32_t *output) { + return simdutf::convert_valid_utf16be_to_utf32( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} + +/* UTF-32 -> ... conversions */ +size_t simdutf_convert_utf32_to_utf8(const char32_t *input, size_t length, + char *output) { + return simdutf::convert_utf32_to_utf8( + reinterpret_cast(input), length, output); +} +simdutf_result simdutf_convert_utf32_to_utf8_with_errors(const char32_t *input, + size_t length, + char *output) { + return to_c_result(simdutf::convert_utf32_to_utf8_with_errors( + reinterpret_cast(input), length, output)); +} +size_t simdutf_convert_valid_utf32_to_utf8(const char32_t *input, size_t length, + char *output) { + return simdutf::convert_valid_utf32_to_utf8( + reinterpret_cast(input), length, output); +} + +size_t simdutf_convert_utf32_to_utf16(const char32_t *input, size_t length, + char16_t *output) { + return simdutf::convert_utf32_to_utf16( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf32_to_utf16le(const char32_t *input, size_t length, + char16_t *output) { + return simdutf::convert_utf32_to_utf16le( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +size_t simdutf_convert_utf32_to_utf16be(const char32_t *input, size_t length, + char16_t *output) { + return simdutf::convert_utf32_to_utf16be( + reinterpret_cast(input), length, + reinterpret_cast(output)); +} +simdutf_result +simdutf_convert_utf32_to_latin1_with_errors(const char32_t *input, + size_t length, char *output) { + return to_c_result(simdutf::convert_utf32_to_latin1_with_errors( + reinterpret_cast(input), length, output)); +} + +/* --- find helpers --- */ +const char *simdutf_find(const char *start, const char *end, char character) { + return simdutf::find(start, end, character); +} +const char16_t *simdutf_find_utf16(const char16_t *start, const char16_t *end, + char16_t character) { + return simdutf::find(start, end, character); +} + +/* --- base64 helpers --- */ +size_t simdutf_maximal_binary_length_from_base64(const char *input, + size_t length) { + return simdutf::maximal_binary_length_from_base64(input, length); +} +size_t simdutf_maximal_binary_length_from_base64_utf16(const char16_t *input, + size_t length) { + return simdutf::maximal_binary_length_from_base64(input, length); +} + +simdutf_result simdutf_base64_to_binary( + const char *input, size_t length, char *output, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options) { + return to_c_result(simdutf::base64_to_binary( + input, length, output, static_cast(options), + static_cast(last_chunk_options))); +} +simdutf_result simdutf_base64_to_binary_utf16( + const char16_t *input, size_t length, char *output, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options) { + return to_c_result(simdutf::base64_to_binary( + input, length, output, static_cast(options), + static_cast(last_chunk_options))); +} + +size_t simdutf_base64_length_from_binary(size_t length, + simdutf_base64_options options) { + return simdutf::base64_length_from_binary( + length, static_cast(options)); +} +size_t simdutf_base64_length_from_binary_with_lines( + size_t length, simdutf_base64_options options, size_t line_length) { + return simdutf::base64_length_from_binary_with_lines( + length, static_cast(options), line_length); +} + +size_t simdutf_binary_to_base64(const char *input, size_t length, char *output, + simdutf_base64_options options) { + return simdutf::binary_to_base64( + input, length, output, static_cast(options)); +} +size_t simdutf_binary_to_base64_with_lines(const char *input, size_t length, + char *output, size_t line_length, + simdutf_base64_options options) { + return simdutf::binary_to_base64_with_lines( + input, length, output, line_length, + static_cast(options)); +} + +simdutf_result simdutf_base64_to_binary_safe( + const char *input, size_t length, char *output, size_t *outlen, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options, + bool decode_up_to_bad_char) { + size_t local_out = outlen ? *outlen : 0; + simdutf::result r = simdutf::base64_to_binary_safe( + input, length, output, local_out, + static_cast(options), + static_cast(last_chunk_options), + decode_up_to_bad_char); + if (outlen) + *outlen = local_out; + return to_c_result(r); +} +simdutf_result simdutf_base64_to_binary_safe_utf16( + const char16_t *input, size_t length, char *output, size_t *outlen, + simdutf_base64_options options, + simdutf_last_chunk_handling_options last_chunk_options, + bool decode_up_to_bad_char) { + size_t local_out = outlen ? *outlen : 0; + simdutf::result r = simdutf::base64_to_binary_safe( + input, length, output, local_out, + static_cast(options), + static_cast(last_chunk_options), + decode_up_to_bad_char); + if (outlen) + *outlen = local_out; + return to_c_result(r); +} + +} // extern "C" +// clang-format off +#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_ASCII && SIMDUTF_FEATURE_BASE64 && SIMDUTF_FEATURE_DETECT_ENCODING +// clang-format on +/* end file src/simdutf_c.cpp */ +SIMDUTF_POP_DISABLE_WARNINGS +/* end file src/simdutf.cpp */ diff --git a/Extra2D/src/utils/random.cpp b/Extra2D/src/utils/random.cpp index cc2a528..ba78965 100644 --- a/Extra2D/src/utils/random.cpp +++ b/Extra2D/src/utils/random.cpp @@ -3,52 +3,100 @@ namespace extra2d { +/** + * @brief 构造函数,初始化随机数生成器 + * + * 使用当前时间作为默认种子初始化随机数生成器 + */ Random::Random() : floatDist_(0.0f, 1.0f) { // 使用当前时间作为默认种子 randomize(); } -Random &Random::getInstance() { +/** + * @brief 获取Random单例实例 + * @return Random单例的引用 + */ +Random &Random::get() { static Random instance; return instance; } -void Random::setSeed(uint32 seed) { generator_.seed(seed); } +/** + * @brief 设置随机数种子 + * @param s 随机数种子值 + */ +void Random::seed(u32 s) { generator_.seed(s); } +/** + * @brief 使用当前时间随机化种子 + * + * 使用高精度时钟的当前时间作为随机数生成器的种子 + */ void Random::randomize() { auto now = std::chrono::high_resolution_clock::now(); auto time = now.time_since_epoch().count(); - generator_.seed(static_cast(time)); + generator_.seed(static_cast(time)); } -float Random::getFloat() { return floatDist_(generator_); } +/** + * @brief 获取[0, 1)范围内的随机浮点数 + * @return 随机浮点数,范围[0, 1) + */ +f32 Random::randomF32() { return floatDist_(generator_); } -float Random::getFloat(float min, float max) { +/** + * @brief 获取指定范围内的随机浮点数 + * @param min 最小值 + * @param max 最大值 + * @return 随机浮点数,范围[min, max] + */ +f32 Random::randomF32(f32 min, f32 max) { if (min >= max) { return min; } return min + floatDist_(generator_) * (max - min); } -int Random::getInt(int max) { +/** + * @brief 获取[0, max]范围内的随机整数 + * @param max 最大值(包含) + * @return 随机整数,范围[0, max] + */ +i32 Random::randomI32(i32 max) { if (max <= 0) { return 0; } - std::uniform_int_distribution dist(0, max); + std::uniform_int_distribution dist(0, max); return dist(generator_); } -int Random::getInt(int min, int max) { +/** + * @brief 获取指定范围内的随机整数 + * @param min 最小值(包含) + * @param max 最大值(包含) + * @return 随机整数,范围[min, max] + */ +i32 Random::randomI32(i32 min, i32 max) { if (min >= max) { return min; } - std::uniform_int_distribution dist(min, max); + std::uniform_int_distribution dist(min, max); return dist(generator_); } -bool Random::getBool() { return floatDist_(generator_) >= 0.5f; } +/** + * @brief 获取随机布尔值(50%概率) + * @return 随机布尔值 + */ +bool Random::boolean() { return floatDist_(generator_) >= 0.5f; } -bool Random::getBool(float probability) { +/** + * @brief 以指定概率获取随机布尔值 + * @param probability 返回true的概率,范围[0.0, 1.0] + * @return 随机布尔值 + */ +bool Random::boolean(f32 probability) { if (probability <= 0.0f) { return false; } @@ -58,11 +106,19 @@ bool Random::getBool(float probability) { return floatDist_(generator_) < probability; } -float Random::getAngle() { - static const float TWO_PI = 6.28318530718f; +/** + * @brief 获取随机角度值 + * @return 随机角度,范围[0, 2π] + */ +f32 Random::angle() { + static const f32 TWO_PI = 6.28318530718f; return floatDist_(generator_) * TWO_PI; } -float Random::getSigned() { return floatDist_(generator_) * 2.0f - 1.0f; } +/** + * @brief 获取有符号随机数 + * @return 随机浮点数,范围[-1.0, 1.0] + */ +f32 Random::signedF32() { return floatDist_(generator_) * 2.0f - 1.0f; } } // namespace extra2d diff --git a/Extra2D/src/utils/timer.cpp b/Extra2D/src/utils/timer.cpp index ec1ae7d..c3d5fac 100644 --- a/Extra2D/src/utils/timer.cpp +++ b/Extra2D/src/utils/timer.cpp @@ -3,24 +3,24 @@ namespace extra2d { -uint32 Timer::nextId_ = 1; +u32 Timer::nextId_ = 1; -Timer::Timer(float interval, bool repeat, Callback callback) +Timer::Timer(f32 interval, bool repeat, Fn fn) : interval_(interval), elapsed_(0.0f), repeat_(repeat), paused_(false), - valid_(true), callback_(std::move(callback)) { + valid_(true), fn_(std::move(fn)) { id_ = nextId_++; } -bool Timer::update(float deltaTime) { +bool Timer::update(f32 dt) { if (!valid_ || paused_) { return false; } - elapsed_ += deltaTime; + elapsed_ += dt; if (elapsed_ >= interval_) { - if (callback_) { - callback_(); + if (fn_) { + fn_(); } if (repeat_) { @@ -47,33 +47,28 @@ void Timer::resume() { paused_ = false; } void Timer::cancel() { valid_ = false; } -float Timer::getRemaining() const { +f32 Timer::remaining() const { if (!valid_ || paused_) { return 0.0f; } return std::max(0.0f, interval_ - elapsed_); } -// ============================================================================ -// TimerManager 实现 -// ============================================================================ - -uint32 TimerManager::addTimer(float delay, Timer::Callback callback) { - auto timer = std::make_unique(delay, false, std::move(callback)); - uint32 id = timer->getId(); +u32 TimerManager::add(f32 delay, Timer::Fn fn) { + auto timer = std::make_unique(delay, false, std::move(fn)); + u32 id = timer->id(); timers_.emplace(id, std::move(timer)); return id; } -uint32 TimerManager::addRepeatingTimer(float interval, - Timer::Callback callback) { - auto timer = std::make_unique(interval, true, std::move(callback)); - uint32 id = timer->getId(); +u32 TimerManager::addRepeat(f32 interval, Timer::Fn fn) { + auto timer = std::make_unique(interval, true, std::move(fn)); + u32 id = timer->id(); timers_.emplace(id, std::move(timer)); return id; } -void TimerManager::cancelTimer(uint32 timerId) { +void TimerManager::cancel(u32 timerId) { auto it = timers_.find(timerId); if (it != timers_.end()) { it->second->cancel(); @@ -81,31 +76,31 @@ void TimerManager::cancelTimer(uint32 timerId) { } } -void TimerManager::pauseTimer(uint32 timerId) { +void TimerManager::pause(u32 timerId) { auto it = timers_.find(timerId); if (it != timers_.end()) { it->second->pause(); } } -void TimerManager::resumeTimer(uint32 timerId) { +void TimerManager::resume(u32 timerId) { auto it = timers_.find(timerId); if (it != timers_.end()) { it->second->resume(); } } -void TimerManager::update(float deltaTime) { +void TimerManager::update(f32 dt) { timersToRemove_.clear(); for (auto &[id, timer] : timers_) { - timer->update(deltaTime); - if (!timer->isValid()) { + timer->update(dt); + if (!timer->valid()) { timersToRemove_.push_back(id); } } - for (uint32 id : timersToRemove_) { + for (u32 id : timersToRemove_) { timers_.erase(id); } } diff --git a/xmake.lua b/xmake.lua index 33fd5ff..f13e49c 100644 --- a/xmake.lua +++ b/xmake.lua @@ -88,10 +88,6 @@ define_extra2d_engine() -- 示例程序目标(作为子项目) if is_config("examples","true") then includes("examples/hello_world", {rootdir = "examples/hello_world"}) - includes("examples/spatial_index_demo", {rootdir = "examples/spatial_index_demo"}) - includes("examples/collision_demo", {rootdir = "examples/collision_demo"}) - includes("examples/push_box", {rootdir = "examples/push_box"}) - includes("examples/flappy_bird", {rootdir = "examples/flappy_bird"}) end -- ==============================================