From 717112c43703a035d53ae607349327eb4236d9f7 Mon Sep 17 00:00:00 2001 From: ChestnutYueyue <952134128@qq.com> Date: Tue, 3 Mar 2026 11:25:43 +0800 Subject: [PATCH] =?UTF-8?q?perf(=E6=B8=B2=E6=9F=93):=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E6=B8=B2=E6=9F=93=E6=80=A7=E8=83=BD=E5=B9=B6=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E7=BB=9F=E8=AE=A1=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现全局UBO双缓冲机制减少CPU-GPU等待 - 添加渲染统计功能,每60帧输出一次性能数据 - 优化命令列表状态缓存减少冗余OpenGL调用 - 改进材质UBO管理,支持批量更新和CPU缓冲区 - 重构命令队列执行逻辑,支持双缓冲和统计收集 --- include/renderer/command_queue.h | 77 ++++++- include/renderer/rhi/opengl/gl_command_list.h | 86 +++++++- include/renderer/uniform_buffer.h | 46 +++- src/renderer/command_queue.cpp | 201 +++++++++++++----- src/renderer/render_graph.cpp | 10 +- src/renderer/renderer_module.cpp | 11 +- src/renderer/rhi/opengl/gl_command_list.cpp | 181 +++++++++++----- src/renderer/rhi/opengl/gl_pipeline.cpp | 9 +- src/renderer/uniform_buffer.cpp | 84 ++++++-- 9 files changed, 551 insertions(+), 154 deletions(-) diff --git a/include/renderer/command_queue.h b/include/renderer/command_queue.h index 8737139..440a118 100644 --- a/include/renderer/command_queue.h +++ b/include/renderer/command_queue.h @@ -123,9 +123,28 @@ struct CommandBatch { * * 对绘制命令进行排序以优化渲染性能 * 排序优先级:材质(管线)> 深度 > 层 + * 使用对象池复用命令内存,减少每帧分配 */ class CommandSorter { public: + /** + * @brief 默认构造函数 + */ + CommandSorter(); + + /** + * @brief 析构函数 + */ + ~CommandSorter(); + + // 禁止拷贝 + CommandSorter(const CommandSorter&) = delete; + CommandSorter& operator=(const CommandSorter&) = delete; + + // 允许移动 + CommandSorter(CommandSorter&&) noexcept; + CommandSorter& operator=(CommandSorter&&) noexcept; + /** * @brief 添加绘制命令 * @param cmd 绘制命令 @@ -151,16 +170,24 @@ public: * @brief 获取命令数量 * @return 命令数量 */ - uint32_t getCount() const { return static_cast(commands_.size()); } + uint32_t getCount() const { return commandCount_; } /** - * @brief 清空所有命令 + * @brief 清空所有命令(保留内存) */ void clear(); + /** + * @brief 预分配命令缓冲区 + * @param capacity 预分配的容量 + */ + void reserve(uint32_t capacity); + private: - std::vector commands_; - std::vector sortedIndices_; + std::vector commands_; // 命令缓冲区(复用) + std::vector sortedIndices_; // 排序索引缓冲区(复用) + uint32_t commandCount_ = 0; // 当前命令数量 + static constexpr uint32_t INITIAL_CAPACITY = 1024; // 初始容量 }; /** @@ -297,8 +324,9 @@ public: * @brief 执行所有命令 * * 排序、批处理并执行所有提交的命令 + * @param frameIndex 当前帧索引(用于双缓冲) */ - void execute(); + void execute(uint32_t frameIndex); /** * @brief 更新全局 UBO 数据 @@ -306,9 +334,10 @@ public: * @param deltaTime 帧时间 * @param screenWidth 屏幕宽度 * @param screenHeight 屏幕高度 + * @param frameIndex 当前帧索引(用于双缓冲) */ - void updateGlobalUBO(const Mat4& viewProjection, float deltaTime, - uint32_t screenWidth, uint32_t screenHeight); + void updateGlobalUBO(const Mat4& viewProjection, float deltaTime, + uint32_t screenWidth, uint32_t screenHeight, uint32_t frameIndex); /** * @brief 获取当前命令数量 @@ -322,6 +351,17 @@ public: */ RHICommandList* getCommandList() const { return commandList_.get(); } + /** + * @brief 获取渲染统计信息 + * @return 渲染统计 + */ + const RenderStats& getStats() const { return stats_; } + + /** + * @brief 重置统计信息 + */ + void resetStats() { stats_ = {}; } + private: CommandSorter sorter_; CommandBatcher batcher_; @@ -331,6 +371,9 @@ private: // UBO 管理器 std::unique_ptr uboManager_; + // 渲染统计 + RenderStats stats_; + // 全局 UBO 数据 - 必须与着色器中的 std140 布局完全匹配 // layout(std140, binding = 0) uniform GlobalUBO { // mat4 uViewProjection; // 64 bytes, offset 0 @@ -358,7 +401,7 @@ private: // 当前材质 UBO 缓冲区 UniformBuffer* currentMaterialUBO_ = nullptr; - uint32_t currentMaterialUBOOffset_ = 0; + uint32_t materialUBOBufferOffset_ = 0; /** * @brief 获取或创建材质 ID @@ -371,15 +414,27 @@ private: * @brief 执行单个批次 * @param batchIndex 批次索引 * @param batch 命令批次 + * @param frameIndex 当前帧索引(用于双缓冲) */ - void executeBatch(uint32_t batchIndex, const CommandBatch &batch); + void executeBatch(uint32_t batchIndex, const CommandBatch &batch, uint32_t frameIndex); /** - * @brief 分配材质 UBO 空间 + * @brief 分配材质 UBO 空间(使用 CPU 缓冲区) * @param size 需要的空间大小 - * @return 分配的 UBO 指针和偏移量 + * @return 分配的 UBO 指针(nullptr 表示使用 CPU 缓冲区)和偏移量 */ std::pair allocateMaterialUBO(uint32_t size); + + /** + * @brief 将材质 UBO CPU 缓冲区数据刷新到 GPU + */ + void flushMaterialUBOToGPU(); + + /** + * @brief 获取当前材质 UBO + * @return 当前材质 UBO 指针 + */ + UniformBuffer* getCurrentMaterialUBO() const; }; } // namespace extra2d diff --git a/include/renderer/rhi/opengl/gl_command_list.h b/include/renderer/rhi/opengl/gl_command_list.h index f35226b..6b4b174 100644 --- a/include/renderer/rhi/opengl/gl_command_list.h +++ b/include/renderer/rhi/opengl/gl_command_list.h @@ -89,12 +89,88 @@ public: void setUniform(const char* name, const Mat4& value) override; private: + // 状态缓存机制 - 避免冗余的 OpenGL 状态切换 + struct StateCache { + // 管线状态 + GLPipeline* pipeline = nullptr; + GLuint shaderProgram = 0; + + // 视口状态 + Viewport viewport; + bool viewportValid = false; + + // 裁剪状态 + ScissorRect scissor{0, 0, 0, 0}; + bool scissorValid = false; + bool scissorEnabled = false; + + // 缓冲区状态 + GLBuffer* vertexBuffers[4] = {nullptr, nullptr, nullptr, nullptr}; // 支持最多4个顶点缓冲区槽 + uint32_t vertexBufferOffsets[4] = {0, 0, 0, 0}; + GLBuffer* indexBuffer = nullptr; + + // Uniform 缓冲区状态 + struct UBOBinding { + GLBuffer* buffer = nullptr; + uint32_t offset = 0; + uint32_t size = 0; + bool valid = false; + }; + UBOBinding uniformBuffers[4]; // 支持最多4个 UBO 槽 + + // 纹理状态 + struct TextureBinding { + GLTexture* texture = nullptr; + bool valid = false; + }; + TextureBinding textures[8]; // 支持最多8个纹理槽 + + // 帧缓冲状态 + GLFramebuffer* framebuffer = nullptr; + + // 重置所有状态缓存 + void reset() { + pipeline = nullptr; + shaderProgram = 0; + viewportValid = false; + scissorValid = false; + scissorEnabled = false; + for (int i = 0; i < 4; ++i) { + vertexBuffers[i] = nullptr; + vertexBufferOffsets[i] = 0; + uniformBuffers[i] = {}; + } + indexBuffer = nullptr; + for (int i = 0; i < 8; ++i) { + textures[i] = {}; + } + framebuffer = nullptr; + } + }; + bool recording_ = false; - GLPipeline *currentPipeline_ = nullptr; - GLBuffer *currentVertexBuffer_ = nullptr; - GLBuffer *currentIndexBuffer_ = nullptr; - GLFramebuffer *currentFramebuffer_ = nullptr; - GLuint currentShaderProgram_ = 0; + StateCache stateCache_; + + // 统计信息(调试用) + struct Stats { + uint32_t pipelineChanges = 0; + uint32_t textureChanges = 0; + uint32_t bufferChanges = 0; + uint32_t viewportChanges = 0; + uint32_t redundantPipelineChanges = 0; + uint32_t redundantTextureChanges = 0; + uint32_t redundantBufferChanges = 0; + + void reset() { + pipelineChanges = 0; + textureChanges = 0; + bufferChanges = 0; + viewportChanges = 0; + redundantPipelineChanges = 0; + redundantTextureChanges = 0; + redundantBufferChanges = 0; + } + } stats_; }; } // namespace extra2d diff --git a/include/renderer/uniform_buffer.h b/include/renderer/uniform_buffer.h index fdfd359..7159573 100644 --- a/include/renderer/uniform_buffer.h +++ b/include/renderer/uniform_buffer.h @@ -86,6 +86,7 @@ private: * * 管理多个 UBO 的分配和回收 * 支持每帧重置和对象池复用 + * 支持双缓冲机制减少 CPU-GPU 等待 */ class UniformBufferManager { public: @@ -119,10 +120,11 @@ public: void shutdown(); /** - * @brief 获取全局 UBO + * @brief 获取当前帧的全局 UBO(双缓冲) + * @param frameIndex 当前帧索引 * @return 全局 UBO 指针 */ - UniformBuffer* getGlobalUBO(); + UniformBuffer* getGlobalUBO(uint32_t frameIndex); /** * @brief 获取或创建材质 UBO @@ -139,25 +141,57 @@ public: void resetMaterialUBOs(); /** - * @brief 更新全局 UBO 数据 + * @brief 更新全局 UBO 数据(双缓冲) * @param data 数据指针 * @param size 数据大小 + * @param frameIndex 当前帧索引 */ - void updateGlobalUBO(const void* data, uint32_t size); + void updateGlobalUBO(const void* data, uint32_t size, uint32_t frameIndex); + + /** + * @brief 批量更新材质 UBO 数据 + * @param data 材质数据指针 + * @param size 数据大小 + * @param offset 偏移量 + */ + void batchUpdateMaterialUBO(const void* data, uint32_t size, uint32_t offset); + + /** + * @brief 获取材质 UBO 的 CPU 缓冲区指针(用于批量更新) + * @return CPU 缓冲区指针 + */ + uint8_t* getMaterialUBOBuffer() { return materialUBOBuffer_.data(); } + + /** + * @brief 获取材质 UBO 缓冲区大小 + * @return 缓冲区大小 + */ + uint32_t getMaterialUBOBufferSize() const { return static_cast(materialUBOBuffer_.size()); } + + /** + * @brief 刷新材质 UBO 数据到 GPU + */ + void flushMaterialUBO(); private: - // 全局 UBO(用于 viewProjection、time 等全局数据) - std::unique_ptr globalUBO_; + // 全局 UBO 双缓冲(用于 viewProjection、time 等全局数据) + std::array, 2> globalUBOs_; // 材质 UBO 池 std::vector> materialUBOPool_; uint32_t currentUBOIndex_ = 0; + // 材质 UBO CPU 缓冲区(用于批量更新) + std::vector materialUBOBuffer_; + uint32_t materialUBOBufferOffset_ = 0; + UniformBuffer* currentMaterialUBO_ = nullptr; + // 常量 static constexpr uint32_t INITIAL_UBO_POOL_SIZE = 16; static constexpr uint32_t GLOBAL_UBO_SIZE = 256; // 足够存储 viewProjection + time + screenSize static constexpr uint32_t GLOBAL_UBO_BINDING = 0; // 全局 UBO 绑定槽位 static constexpr uint32_t MATERIAL_UBO_BINDING = 1; // 材质 UBO 绑定槽位 + static constexpr uint32_t MATERIAL_UBO_BUFFER_SIZE = 1024 * 1024; // 1MB 材质 UBO 缓冲区 }; } // namespace extra2d diff --git a/src/renderer/command_queue.cpp b/src/renderer/command_queue.cpp index 8fcac87..4d3d7e7 100644 --- a/src/renderer/command_queue.cpp +++ b/src/renderer/command_queue.cpp @@ -21,24 +21,69 @@ namespace extra2d { // CommandSorter 实现 // ======================================== +CommandSorter::CommandSorter() { + // 预分配初始容量 + commands_.reserve(INITIAL_CAPACITY); + sortedIndices_.reserve(INITIAL_CAPACITY); +} + +CommandSorter::~CommandSorter() = default; + +CommandSorter::CommandSorter(CommandSorter&& other) noexcept + : commands_(std::move(other.commands_)), + sortedIndices_(std::move(other.sortedIndices_)), + commandCount_(other.commandCount_) { + other.commandCount_ = 0; +} + +CommandSorter& CommandSorter::operator=(CommandSorter&& other) noexcept { + if (this != &other) { + commands_ = std::move(other.commands_); + sortedIndices_ = std::move(other.sortedIndices_); + commandCount_ = other.commandCount_; + other.commandCount_ = 0; + } + return *this; +} + uint32_t CommandSorter::addCommand(const DrawCommand &cmd) { - uint32_t index = static_cast(commands_.size()); - commands_.push_back(cmd); - sortedIndices_.push_back(index); + uint32_t index = commandCount_; + + // 如果缓冲区不够大,扩展它 + if (index >= commands_.size()) { + commands_.push_back(cmd); + sortedIndices_.push_back(index); + } else { + commands_[index] = cmd; + sortedIndices_[index] = index; + } + + commandCount_++; return index; } void CommandSorter::sort() { + if (commandCount_ == 0) return; + + // 只排序有效范围的索引 + auto indicesBegin = sortedIndices_.begin(); + auto indicesEnd = sortedIndices_.begin() + commandCount_; + // 使用稳定排序保持相同键的命令顺序 - std::stable_sort(sortedIndices_.begin(), sortedIndices_.end(), + std::stable_sort(indicesBegin, indicesEnd, [this](uint32_t a, uint32_t b) { return commands_[a].key < commands_[b].key; }); } void CommandSorter::clear() { - commands_.clear(); - sortedIndices_.clear(); + // 重置计数器,保留内存 + commandCount_ = 0; +} + +void CommandSorter::reserve(uint32_t capacity) { + commands_.reserve(capacity); + sortedIndices_.reserve(capacity); } // ======================================== @@ -117,12 +162,12 @@ CommandQueue::CommandQueue(CommandQueue &&other) noexcept nextMaterialId_(other.nextMaterialId_), materialIds_(std::move(other.materialIds_)), currentMaterialUBO_(other.currentMaterialUBO_), - currentMaterialUBOOffset_(other.currentMaterialUBOOffset_) { + materialUBOBufferOffset_(other.materialUBOBufferOffset_) { other.context_ = nullptr; other.globalUBOData_ = {}; other.nextMaterialId_ = 1; other.currentMaterialUBO_ = nullptr; - other.currentMaterialUBOOffset_ = 0; + other.materialUBOBufferOffset_ = 0; } CommandQueue &CommandQueue::operator=(CommandQueue &&other) noexcept { @@ -139,13 +184,13 @@ CommandQueue &CommandQueue::operator=(CommandQueue &&other) noexcept { nextMaterialId_ = other.nextMaterialId_; materialIds_ = std::move(other.materialIds_); currentMaterialUBO_ = other.currentMaterialUBO_; - currentMaterialUBOOffset_ = other.currentMaterialUBOOffset_; + materialUBOBufferOffset_ = other.materialUBOBufferOffset_; other.context_ = nullptr; other.globalUBOData_ = {}; other.nextMaterialId_ = 1; other.currentMaterialUBO_ = nullptr; - other.currentMaterialUBOOffset_ = 0; + other.materialUBOBufferOffset_ = 0; } return *this; } @@ -204,17 +249,19 @@ void CommandQueue::beginFrame() { batcher_.clear(); materialIds_.clear(); nextMaterialId_ = 1; - materialUBOData_.clear(); - // 重置材质 UBO 分配状态 + // 重置材质 UBO 缓冲区(保留容量,避免重新分配) + materialUBOBufferOffset_ = 0; currentMaterialUBO_ = nullptr; - currentMaterialUBOOffset_ = 0; // 重置 UBO 管理器的材质 UBO 池 if (uboManager_) { uboManager_->resetMaterialUBOs(); } + // 重置统计 + stats_ = {}; + // 开始录制命令 if (commandList_) { commandList_->begin(); @@ -244,24 +291,54 @@ std::pair CommandQueue::allocateMaterialUBO(uint32_t s return {nullptr, 0}; } - // 如果当前 UBO 没有足够的空间,获取一个新的 - if (currentMaterialUBO_ == nullptr || - currentMaterialUBOOffset_ + size > currentMaterialUBO_->getSize()) { - currentMaterialUBO_ = uboManager_->acquireMaterialUBO(size); - currentMaterialUBOOffset_ = 0; + // OpenGL 要求 UBO 偏移对齐到 GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT (通常是 256) + static constexpr uint32_t UBO_ALIGNMENT = 256; + + // 使用批量更新机制:先分配到 CPU 缓冲区 + uint32_t offset = materialUBOBufferOffset_; + // 对齐到 UBO_ALIGNMENT 字节 + offset = (offset + UBO_ALIGNMENT - 1) & ~(UBO_ALIGNMENT - 1); + + uint32_t alignedSize = (size + UBO_ALIGNMENT - 1) & ~(UBO_ALIGNMENT - 1); + + // 检查 CPU 缓冲区是否有足够空间 + if (offset + alignedSize > materialUBOData_.capacity()) { + // 如果缓冲区已满,先刷新到 GPU + flushMaterialUBOToGPU(); + offset = 0; // 重置偏移量 } + // 确保缓冲区足够大 + if (offset + alignedSize > materialUBOData_.size()) { + materialUBOData_.resize(offset + alignedSize); + } + + materialUBOBufferOffset_ = offset + alignedSize; + + return {nullptr, offset}; // 返回 nullptr 表示使用 CPU 缓冲区,offset 是 CPU 缓冲区偏移 +} + +void CommandQueue::flushMaterialUBOToGPU() { + if (materialUBOBufferOffset_ == 0) { + return; + } + + // 获取一个足够大的 UBO + currentMaterialUBO_ = uboManager_->acquireMaterialUBO(materialUBOBufferOffset_); if (!currentMaterialUBO_) { - return {nullptr, 0}; + E2D_LOG_ERROR("Failed to acquire material UBO for flush"); + return; } - uint32_t offset = currentMaterialUBOOffset_; - currentMaterialUBOOffset_ += size; + // 批量更新到 GPU + currentMaterialUBO_->update(materialUBOData_.data(), materialUBOBufferOffset_, 0); - // 对齐到 16 字节(std140 要求) - currentMaterialUBOOffset_ = (currentMaterialUBOOffset_ + 15) & ~15; + // 重置 CPU 缓冲区偏移 + materialUBOBufferOffset_ = 0; +} - return {currentMaterialUBO_, offset}; +UniformBuffer* CommandQueue::getCurrentMaterialUBO() const { + return currentMaterialUBO_; } void CommandQueue::submitDraw(Ptr material, Ptr mesh, @@ -302,28 +379,27 @@ void CommandQueue::submitDraw(Ptr material, Ptr mesh, } } - // 分配材质 UBO 空间并更新数据 + // 分配材质 UBO 空间(使用 CPU 缓冲区) uint32_t materialDataSize = material->getDataSize(); if (materialDataSize > 0) { auto [ubo, offset] = allocateMaterialUBO(materialDataSize); - if (ubo) { - // 复制材质数据到临时缓冲区,以便修改颜色 - std::vector uboData(materialDataSize); - std::memcpy(uboData.data(), material->getData(), materialDataSize); + (void)ubo; // ubo 为 nullptr,使用 CPU 缓冲区 + + // 复制材质数据到 CPU 缓冲区,并修改颜色 + if (offset + materialDataSize <= materialUBOData_.size()) { + std::memcpy(materialUBOData_.data() + offset, material->getData(), materialDataSize); // 将实例颜色应用到 UBO 数据中的 uColor 参数 auto layout = material->getLayout(); if (layout) { const auto* param = layout->getParam("uColor"); if (param && param->type == MaterialParamType::Color) { - std::memcpy(uboData.data() + param->offset, &color.r, sizeof(float) * 4); + std::memcpy(materialUBOData_.data() + offset + param->offset, &color.r, sizeof(float) * 4); } } - ubo->update(uboData.data(), materialDataSize, offset); - cmd.materialUBO = BufferHandle(ubo->getRHIBuffer()); - cmd.materialUBOSize = materialDataSize; cmd.materialUBOOffset = offset; + cmd.materialUBOSize = materialDataSize; } } @@ -366,15 +442,17 @@ void CommandQueue::submitDrawInstanced(Ptr material, Ptr mesh, } } - // 分配材质 UBO 空间并更新数据 + // 分配材质 UBO 空间(使用 CPU 缓冲区) uint32_t materialDataSize = material->getDataSize(); if (materialDataSize > 0) { auto [ubo, offset] = allocateMaterialUBO(materialDataSize); - if (ubo) { - ubo->update(material->getData(), materialDataSize, offset); - cmd.materialUBO = BufferHandle(ubo->getRHIBuffer()); - cmd.materialUBOSize = materialDataSize; + (void)ubo; // ubo 为 nullptr,使用 CPU 缓冲区 + + // 复制材质数据到 CPU 缓冲区 + if (offset + materialDataSize <= materialUBOData_.size()) { + std::memcpy(materialUBOData_.data() + offset, material->getData(), materialDataSize); cmd.materialUBOOffset = offset; + cmd.materialUBOSize = materialDataSize; } } @@ -408,7 +486,7 @@ void CommandQueue::setViewport(int32_t x, int32_t y, int32_t width, } void CommandQueue::updateGlobalUBO(const Mat4& viewProjection, float deltaTime, - uint32_t screenWidth, uint32_t screenHeight) { + uint32_t screenWidth, uint32_t screenHeight, uint32_t frameIndex) { if (!uboManager_) { E2D_LOG_WARN("CommandQueue::updateGlobalUBO: uboManager is null"); return; @@ -425,16 +503,19 @@ void CommandQueue::updateGlobalUBO(const Mat4& viewProjection, float deltaTime, globalUBOData_.screenSize[0] = static_cast(screenWidth); globalUBOData_.screenSize[1] = static_cast(screenHeight); - // 更新 UBO - uboManager_->updateGlobalUBO(&globalUBOData_, sizeof(globalUBOData_)); + // 使用双缓冲更新 UBO + uboManager_->updateGlobalUBO(&globalUBOData_, sizeof(globalUBOData_), frameIndex); } -void CommandQueue::execute() { +void CommandQueue::execute(uint32_t frameIndex) { if (!commandList_) { E2D_LOG_ERROR("CommandQueue::execute: commandList is null"); return; } + // 在排序前刷新材质 UBO 数据到 GPU + flushMaterialUBOToGPU(); + // 排序命令 sorter_.sort(); @@ -443,14 +524,14 @@ void CommandQueue::execute() { // 执行批次 for (uint32_t i = 0; i < batcher_.getBatchCount(); ++i) { - executeBatch(i, batcher_.getBatch(i)); + executeBatch(i, batcher_.getBatch(i), frameIndex); } // 提交命令到 GPU commandList_->submit(); } -void CommandQueue::executeBatch(uint32_t batchIndex, const CommandBatch &batch) { +void CommandQueue::executeBatch(uint32_t batchIndex, const CommandBatch &batch, uint32_t frameIndex) { if (!commandList_) { return; } @@ -458,15 +539,17 @@ void CommandQueue::executeBatch(uint32_t batchIndex, const CommandBatch &batch) // 绑定管线 if (batch.pipeline.isValid()) { commandList_->setPipeline(batch.pipeline.get()); + stats_.pipelineBinds++; } else { E2D_LOG_WARN("Batch has no valid pipeline!"); } - // 绑定全局 UBO (binding = 0) + // 绑定全局 UBO (binding = 0) - 使用双缓冲 if (uboManager_) { - UniformBuffer* globalUBO = uboManager_->getGlobalUBO(); + UniformBuffer* globalUBO = uboManager_->getGlobalUBO(frameIndex); if (globalUBO) { commandList_->setUniformBuffer(0, globalUBO->getRHIBuffer()); + stats_.bufferBinds++; } } @@ -475,16 +558,9 @@ void CommandQueue::executeBatch(uint32_t batchIndex, const CommandBatch &batch) for (uint32_t i = 0; i < batch.textureCount; ++i) { if (batch.textures[i].isValid()) { commandList_->setTexture(i, batch.textures[i].get()); + stats_.textureBinds++; } } - } else { - // 如果没有纹理,绑定默认的白色纹理 - auto* rhiModule = RHIModule::get(); - if (rhiModule && rhiModule->getDevice()) { - // 使用默认纹理(白色像素) - // 注意:这里应该使用 AssetsModule 的默认纹理 - // 暂时跳过,因为我们需要访问 AssetsModule - } } // 执行批次中的所有命令 @@ -494,6 +570,7 @@ void CommandQueue::executeBatch(uint32_t batchIndex, const CommandBatch &batch) // 绑定顶点缓冲区 if (cmd.vertexBuffer.isValid()) { commandList_->setVertexBuffer(0, cmd.vertexBuffer.get(), 0); + stats_.bufferBinds++; } else { E2D_LOG_WARN("Draw command has no valid vertex buffer!"); } @@ -501,16 +578,19 @@ void CommandQueue::executeBatch(uint32_t batchIndex, const CommandBatch &batch) // 绑定实例缓冲区(实例化渲染) if (cmd.isInstanced() && cmd.instanceBuffer.isValid()) { commandList_->setVertexBuffer(1, cmd.instanceBuffer.get(), 0, cmd.instanceBufferStride); + stats_.bufferBinds++; } // 绑定索引缓冲区(如果有) if (cmd.isIndexed() && cmd.indexBuffer.isValid()) { commandList_->setIndexBuffer(cmd.indexBuffer.get(), IndexType::UInt16, 0); + stats_.bufferBinds++; } // 绑定材质 UBO (binding = 1) - if (cmd.materialUBO.isValid()) { - commandList_->setUniformBuffer(1, cmd.materialUBO.get(), cmd.materialUBOOffset, cmd.materialUBOSize); + if (cmd.materialUBOSize > 0 && currentMaterialUBO_) { + commandList_->setUniformBuffer(1, currentMaterialUBO_->getRHIBuffer(), cmd.materialUBOOffset, cmd.materialUBOSize); + stats_.bufferBinds++; } // 设置模型矩阵(仅在非实例化渲染时使用) @@ -522,16 +602,25 @@ void CommandQueue::executeBatch(uint32_t batchIndex, const CommandBatch &batch) if (cmd.isInstanced()) { if (cmd.isIndexed()) { commandList_->drawIndexed(cmd.indexCount, 0, 0, cmd.instanceCount, 0); + stats_.drawCalls++; + stats_.triangles += (cmd.indexCount / 3) * cmd.instanceCount; } else { commandList_->draw(cmd.vertexCount, 0, cmd.instanceCount, 0); + stats_.drawCalls++; + stats_.triangles += (cmd.vertexCount / 3) * cmd.instanceCount; } } else { if (cmd.isIndexed()) { commandList_->drawIndexed(cmd.indexCount, 0, 0, 1, 0); + stats_.drawCalls++; + stats_.triangles += cmd.indexCount / 3; } else { commandList_->draw(cmd.vertexCount, 0, 1, 0); + stats_.drawCalls++; + stats_.triangles += cmd.vertexCount / 3; } } + stats_.vertices += cmd.isInstanced() ? (cmd.vertexCount * cmd.instanceCount) : cmd.vertexCount; } } diff --git a/src/renderer/render_graph.cpp b/src/renderer/render_graph.cpp index 1eb2aa3..b732231 100644 --- a/src/renderer/render_graph.cpp +++ b/src/renderer/render_graph.cpp @@ -179,7 +179,7 @@ bool RenderGraph::compile() { return true; } -void RenderGraph::execute(float deltaTime, const Mat4 &viewProjection) { +void RenderGraph::execute(float deltaTime, const Mat4& viewProjection) { if (!compiled_) { if (!compile()) { E2D_LOG_ERROR("Failed to compile RenderGraph"); @@ -187,9 +187,9 @@ void RenderGraph::execute(float deltaTime, const Mat4 &viewProjection) { } } - // 更新全局 UBO + // 更新全局 UBO(使用双缓冲) commandQueue_.updateGlobalUBO(viewProjection, deltaTime, outputWidth_, - outputHeight_); + outputHeight_, frameIndex_); // 获取 RHI 上下文 auto *rhiModule = RHIModule::get(); @@ -213,8 +213,8 @@ void RenderGraph::execute(float deltaTime, const Mat4 &viewProjection) { } } - // 执行命令队列 - commandQueue_.execute(); + // 执行命令队列(传递帧索引用于双缓冲) + commandQueue_.execute(frameIndex_); // 结束帧 commandQueue_.endFrame(); diff --git a/src/renderer/renderer_module.cpp b/src/renderer/renderer_module.cpp index c4cfe52..ed7fb22 100644 --- a/src/renderer/renderer_module.cpp +++ b/src/renderer/renderer_module.cpp @@ -257,7 +257,16 @@ void RendererModule::onRenderEnd() { // 获取统计信息 if (commandQueue_) { - stats_.drawCalls = commandQueue_->getCommandCount(); + const auto& stats = commandQueue_->getStats(); + stats_.drawCalls = stats.drawCalls; + + // 每60帧输出一次统计信息 + static uint32_t frameCount = 0; + if (++frameCount % 60 == 0) { + E2D_LOG_INFO("Render Stats: DrawCalls={}, Triangles={}, Vertices={}, PipelineBinds={}, TextureBinds={}, BufferBinds={}", + stats.drawCalls, stats.triangles, stats.vertices, + stats.pipelineBinds, stats.textureBinds, stats.bufferBinds); + } } // 渲染完成 diff --git a/src/renderer/rhi/opengl/gl_command_list.cpp b/src/renderer/rhi/opengl/gl_command_list.cpp index e5b70e6..2c2fe9d 100644 --- a/src/renderer/rhi/opengl/gl_command_list.cpp +++ b/src/renderer/rhi/opengl/gl_command_list.cpp @@ -15,6 +15,12 @@ GLCommandList::~GLCommandList() = default; void GLCommandList::begin() { // 开始记录命令 recording_ = true; + + // 重置状态缓存 + stateCache_.reset(); + + // 重置统计 + stats_.reset(); } void GLCommandList::end() { @@ -34,21 +40,21 @@ void GLCommandList::beginRenderPass(RHIFramebuffer *framebuffer, if (framebuffer) { auto *glFramebuffer = static_cast(framebuffer); glFramebuffer->bind(); - currentFramebuffer_ = glFramebuffer; + stateCache_.framebuffer = glFramebuffer; } else { glBindFramebuffer(GL_FRAMEBUFFER, 0); - currentFramebuffer_ = nullptr; + stateCache_.framebuffer = nullptr; } // 禁用2D渲染不需要的OpenGL状态 glDisable(GL_DEPTH_TEST); // 禁用深度测试 glDisable(GL_CULL_FACE); // 禁用面剔除 glDisable(GL_STENCIL_TEST); // 禁用模板测试 - + // 启用混合(2D渲染通常需要透明度混合) glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - + // 禁用裁剪测试(除非需要) glDisable(GL_SCISSOR_TEST); @@ -60,13 +66,27 @@ void GLCommandList::beginRenderPass(RHIFramebuffer *framebuffer, void GLCommandList::endRenderPass() { // 结束渲染通道 - currentFramebuffer_ = nullptr; + stateCache_.framebuffer = nullptr; } void GLCommandList::setViewport(const Viewport &viewport) { + // 状态缓存检查 + if (stateCache_.viewportValid && + stateCache_.viewport.x == viewport.x && + stateCache_.viewport.y == viewport.y && + stateCache_.viewport.width == viewport.width && + stateCache_.viewport.height == viewport.height) { + stats_.redundantPipelineChanges++; + return; + } + glViewport(static_cast(viewport.x), static_cast(viewport.y), static_cast(viewport.width), static_cast(viewport.height)); + + stateCache_.viewport = viewport; + stateCache_.viewportValid = true; + stats_.viewportChanges++; } void GLCommandList::setScissor(const ScissorRect &scissor) { @@ -74,28 +94,40 @@ void GLCommandList::setScissor(const ScissorRect &scissor) { } void GLCommandList::setPipeline(RHIPipeline *pipeline) { - if (pipeline) { - auto *glPipeline = static_cast(pipeline); - glPipeline->bind(); - currentPipeline_ = glPipeline; - - // 获取 shader program - currentShaderProgram_ = glPipeline->getGLProgram(); + if (!pipeline) return; + + auto *glPipeline = static_cast(pipeline); + + // 状态缓存检查 + if (stateCache_.pipeline == glPipeline) { + stats_.redundantPipelineChanges++; + return; } + + glPipeline->bind(); + stateCache_.pipeline = glPipeline; + stateCache_.shaderProgram = glPipeline->getGLProgram(); + stats_.pipelineChanges++; } void GLCommandList::setVertexBuffer(uint32_t slot, RHIBuffer *buffer, uint32_t offset, uint32_t stride) { - if (!buffer || !currentPipeline_) { + if (!buffer || slot >= 4) return; + + auto *glBuffer = static_cast(buffer); + + // 状态缓存检查 + if (stateCache_.vertexBuffers[slot] == glBuffer && + stateCache_.vertexBufferOffsets[slot] == offset) { + stats_.redundantBufferChanges++; return; } - auto *glBuffer = static_cast(buffer); glBindBuffer(GL_ARRAY_BUFFER, glBuffer->getGLBuffer()); - // 只配置属于当前 slot 的属性 - const auto &layout = currentPipeline_->getVertexLayout(); - // 使用传入的步长,如果为0则使用布局中的步长 + // 配置属于当前 slot 的属性 + // 注意:虽然 VAO 缓存了部分配置,但我们需要绑定具体的缓冲区 + const auto &layout = stateCache_.pipeline ? stateCache_.pipeline->getVertexLayout() : VertexLayout{}; uint32_t actualStride = stride > 0 ? stride : layout.stride; for (const auto &attr : layout.attributes) { @@ -132,44 +164,95 @@ void GLCommandList::setVertexBuffer(uint32_t slot, RHIBuffer *buffer, // 设置实例化除数 glVertexAttribDivisor(attr.location, attr.divisor); } + + stateCache_.vertexBuffers[slot] = glBuffer; + stateCache_.vertexBufferOffsets[slot] = offset; + stats_.bufferChanges++; } void GLCommandList::setIndexBuffer(RHIBuffer *buffer, IndexType type, uint32_t offset) { - if (buffer) { - auto *glBuffer = static_cast(buffer); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, glBuffer->getGLBuffer()); - currentIndexBuffer_ = glBuffer; + if (!buffer) return; + + auto *glBuffer = static_cast(buffer); + + // 状态缓存检查 + if (stateCache_.indexBuffer == glBuffer) { + stats_.redundantBufferChanges++; + return; } + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, glBuffer->getGLBuffer()); + stateCache_.indexBuffer = glBuffer; + stats_.bufferChanges++; } void GLCommandList::setUniformBuffer(uint32_t slot, RHIBuffer *buffer) { - if (buffer) { - auto *glBuffer = static_cast(buffer); - glBindBufferBase(GL_UNIFORM_BUFFER, slot, glBuffer->getGLBuffer()); + if (!buffer || slot >= 4) return; + + auto *glBuffer = static_cast(buffer); + + // 状态缓存检查 + if (stateCache_.uniformBuffers[slot].valid && + stateCache_.uniformBuffers[slot].buffer == glBuffer && + stateCache_.uniformBuffers[slot].offset == 0) { + return; } + + glBindBufferBase(GL_UNIFORM_BUFFER, slot, glBuffer->getGLBuffer()); + + stateCache_.uniformBuffers[slot].buffer = glBuffer; + stateCache_.uniformBuffers[slot].offset = 0; + stateCache_.uniformBuffers[slot].size = 0; + stateCache_.uniformBuffers[slot].valid = true; } void GLCommandList::setUniformBuffer(uint32_t slot, RHIBuffer *buffer, uint32_t offset, uint32_t size) { - if (buffer) { - auto *glBuffer = static_cast(buffer); - GLuint glBufferId = glBuffer->getGLBuffer(); - if (size == 0) { - // 绑定整个缓冲区 - glBindBufferBase(GL_UNIFORM_BUFFER, slot, glBufferId); - } else { - // 绑定缓冲区范围 - glBindBufferRange(GL_UNIFORM_BUFFER, slot, glBufferId, offset, size); - } + if (!buffer || slot >= 4) return; + + auto *glBuffer = static_cast(buffer); + GLuint glBufferId = glBuffer->getGLBuffer(); + + // 状态缓存检查 + if (stateCache_.uniformBuffers[slot].valid && + stateCache_.uniformBuffers[slot].buffer == glBuffer && + stateCache_.uniformBuffers[slot].offset == offset && + stateCache_.uniformBuffers[slot].size == size) { + return; } + + if (size == 0) { + // 绑定整个缓冲区 + glBindBufferBase(GL_UNIFORM_BUFFER, slot, glBufferId); + } else { + // 绑定缓冲区范围 + glBindBufferRange(GL_UNIFORM_BUFFER, slot, glBufferId, offset, size); + } + + stateCache_.uniformBuffers[slot].buffer = glBuffer; + stateCache_.uniformBuffers[slot].offset = offset; + stateCache_.uniformBuffers[slot].size = size; + stateCache_.uniformBuffers[slot].valid = true; } void GLCommandList::setTexture(uint32_t slot, RHITexture *texture) { - if (texture) { - auto *glTexture = static_cast(texture); - glActiveTexture(GL_TEXTURE0 + slot); - glBindTexture(GL_TEXTURE_2D, glTexture->getGLTexture()); + if (!texture || slot >= 8) return; + + auto *glTexture = static_cast(texture); + + // 状态缓存检查 + if (stateCache_.textures[slot].valid && + stateCache_.textures[slot].texture == glTexture) { + stats_.redundantTextureChanges++; + return; } + + glActiveTexture(GL_TEXTURE0 + slot); + glBindTexture(GL_TEXTURE_2D, glTexture->getGLTexture()); + + stateCache_.textures[slot].texture = glTexture; + stateCache_.textures[slot].valid = true; + stats_.textureChanges++; } void GLCommandList::setSampler(uint32_t slot, TextureFilter minFilter, @@ -192,7 +275,7 @@ void GLCommandList::drawIndexed(uint32_t indexCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t instanceCount, uint32_t firstInstance) { GLenum indexType = GL_UNSIGNED_SHORT; - if (currentIndexBuffer_) { + if (stateCache_.indexBuffer) { // 根据缓冲区类型确定索引类型 // 这里简化处理,使用 GL_UNSIGNED_SHORT } @@ -237,8 +320,8 @@ bool GLCommandList::isRecording() const { } void GLCommandList::setUniform(const char* name, float value) { - if (currentShaderProgram_ != 0) { - GLint location = glGetUniformLocation(currentShaderProgram_, name); + if (stateCache_.shaderProgram != 0) { + GLint location = glGetUniformLocation(stateCache_.shaderProgram, name); if (location != -1) { glUniform1f(location, value); } @@ -246,8 +329,8 @@ void GLCommandList::setUniform(const char* name, float value) { } void GLCommandList::setUniform(const char* name, const Vec2& value) { - if (currentShaderProgram_ != 0) { - GLint location = glGetUniformLocation(currentShaderProgram_, name); + if (stateCache_.shaderProgram != 0) { + GLint location = glGetUniformLocation(stateCache_.shaderProgram, name); if (location != -1) { glUniform2f(location, value.x, value.y); } @@ -255,8 +338,8 @@ void GLCommandList::setUniform(const char* name, const Vec2& value) { } void GLCommandList::setUniform(const char* name, const Vec3& value) { - if (currentShaderProgram_ != 0) { - GLint location = glGetUniformLocation(currentShaderProgram_, name); + if (stateCache_.shaderProgram != 0) { + GLint location = glGetUniformLocation(stateCache_.shaderProgram, name); if (location != -1) { glUniform3f(location, value.x, value.y, value.z); } @@ -264,8 +347,8 @@ void GLCommandList::setUniform(const char* name, const Vec3& value) { } void GLCommandList::setUniform(const char* name, const Color& value) { - if (currentShaderProgram_ != 0) { - GLint location = glGetUniformLocation(currentShaderProgram_, name); + if (stateCache_.shaderProgram != 0) { + GLint location = glGetUniformLocation(stateCache_.shaderProgram, name); if (location != -1) { glUniform4f(location, value.r, value.g, value.b, value.a); } @@ -273,8 +356,8 @@ void GLCommandList::setUniform(const char* name, const Color& value) { } void GLCommandList::setUniform(const char* name, const Mat4& value) { - if (currentShaderProgram_ != 0) { - GLint location = glGetUniformLocation(currentShaderProgram_, name); + if (stateCache_.shaderProgram != 0) { + GLint location = glGetUniformLocation(stateCache_.shaderProgram, name); if (location != -1) { glUniformMatrix4fv(location, 1, GL_FALSE, glm::value_ptr(value)); } diff --git a/src/renderer/rhi/opengl/gl_pipeline.cpp b/src/renderer/rhi/opengl/gl_pipeline.cpp index 5009809..5d956b9 100644 --- a/src/renderer/rhi/opengl/gl_pipeline.cpp +++ b/src/renderer/rhi/opengl/gl_pipeline.cpp @@ -62,9 +62,10 @@ void GLPipeline::bind() { if (shaderProgram_ != 0) { glUseProgram(shaderProgram_); } - - // 注意:不再绑定VAO,因为我们使用动态顶点属性配置 - // 顶点属性在 GLCommandList::setVertexBuffer 中配置 + + // 注意:VAO 需要与具体的顶点缓冲区绑定才能工作 + // 所以我们不在此处绑定 VAO,而是在渲染时动态配置顶点属性 + // 这样可以支持不同的 Mesh 使用相同的 Pipeline // 应用混合状态 if (desc_.blendState.enabled) { @@ -83,7 +84,7 @@ void GLPipeline::bind() { } else { glDisable(GL_DEPTH_TEST); } - + // 应用光栅化状态 if (desc_.rasterizerState.cullEnabled) { glEnable(GL_CULL_FACE); diff --git a/src/renderer/uniform_buffer.cpp b/src/renderer/uniform_buffer.cpp index a893d88..40b80e0 100644 --- a/src/renderer/uniform_buffer.cpp +++ b/src/renderer/uniform_buffer.cpp @@ -87,10 +87,15 @@ UniformBufferManager::~UniformBufferManager() { shutdown(); } UniformBufferManager::UniformBufferManager( UniformBufferManager &&other) noexcept - : globalUBO_(std::move(other.globalUBO_)), + : globalUBOs_(std::move(other.globalUBOs_)), materialUBOPool_(std::move(other.materialUBOPool_)), - currentUBOIndex_(other.currentUBOIndex_) { + currentUBOIndex_(other.currentUBOIndex_), + materialUBOBuffer_(std::move(other.materialUBOBuffer_)), + materialUBOBufferOffset_(other.materialUBOBufferOffset_), + currentMaterialUBO_(other.currentMaterialUBO_) { other.currentUBOIndex_ = 0; + other.materialUBOBufferOffset_ = 0; + other.currentMaterialUBO_ = nullptr; } UniformBufferManager & @@ -98,40 +103,59 @@ UniformBufferManager::operator=(UniformBufferManager &&other) noexcept { if (this != &other) { shutdown(); - globalUBO_ = std::move(other.globalUBO_); + globalUBOs_ = std::move(other.globalUBOs_); materialUBOPool_ = std::move(other.materialUBOPool_); currentUBOIndex_ = other.currentUBOIndex_; + materialUBOBuffer_ = std::move(other.materialUBOBuffer_); + materialUBOBufferOffset_ = other.materialUBOBufferOffset_; + currentMaterialUBO_ = other.currentMaterialUBO_; other.currentUBOIndex_ = 0; + other.materialUBOBufferOffset_ = 0; + other.currentMaterialUBO_ = nullptr; } return *this; } bool UniformBufferManager::initialize() { - // 创建全局 UBO - globalUBO_ = std::make_unique(); - if (!globalUBO_->create(UniformBufferManager::GLOBAL_UBO_SIZE, - UniformBufferManager::GLOBAL_UBO_BINDING)) { - E2D_LOG_ERROR("Failed to create global UBO"); - return false; + // 创建全局 UBO 双缓冲 + for (size_t i = 0; i < globalUBOs_.size(); ++i) { + globalUBOs_[i] = std::make_unique(); + if (!globalUBOs_[i]->create(UniformBufferManager::GLOBAL_UBO_SIZE, + UniformBufferManager::GLOBAL_UBO_BINDING)) { + E2D_LOG_ERROR("Failed to create global UBO {}", i); + return false; + } } // 预分配材质 UBO 池 materialUBOPool_.reserve(INITIAL_UBO_POOL_SIZE); - E2D_LOG_INFO("UniformBufferManager initialized"); + // 预分配材质 UBO CPU 缓冲区 + materialUBOBuffer_.resize(MATERIAL_UBO_BUFFER_SIZE); + + E2D_LOG_INFO("UniformBufferManager initialized with double buffering"); return true; } void UniformBufferManager::shutdown() { materialUBOPool_.clear(); - globalUBO_.reset(); + for (auto& ubo : globalUBOs_) { + ubo.reset(); + } currentUBOIndex_ = 0; + materialUBOBuffer_.clear(); + materialUBOBufferOffset_ = 0; + currentMaterialUBO_ = nullptr; E2D_LOG_INFO("UniformBufferManager shutdown"); } -UniformBuffer *UniformBufferManager::getGlobalUBO() { return globalUBO_.get(); } +UniformBuffer *UniformBufferManager::getGlobalUBO(uint32_t frameIndex) { + // 使用双缓冲,根据帧索引选择 UBO + size_t index = frameIndex % 2; + return globalUBOs_[index].get(); +} UniformBuffer *UniformBufferManager::acquireMaterialUBO(uint32_t size) { // 查找或创建合适大小的 UBO @@ -156,13 +180,39 @@ UniformBuffer *UniformBufferManager::acquireMaterialUBO(uint32_t size) { return result; } -void UniformBufferManager::resetMaterialUBOs() { currentUBOIndex_ = 0; } +void UniformBufferManager::resetMaterialUBOs() { + currentUBOIndex_ = 0; + materialUBOBufferOffset_ = 0; + currentMaterialUBO_ = nullptr; +} -void UniformBufferManager::updateGlobalUBO(const void *data, uint32_t size) { - if (globalUBO_) { - globalUBO_->update(data, size); - globalUBO_->bind(UniformBufferManager::GLOBAL_UBO_BINDING); +void UniformBufferManager::updateGlobalUBO(const void *data, uint32_t size, uint32_t frameIndex) { + // 使用双缓冲更新全局 UBO + size_t index = frameIndex % 2; + if (globalUBOs_[index]) { + globalUBOs_[index]->update(data, size); + globalUBOs_[index]->bind(UniformBufferManager::GLOBAL_UBO_BINDING); } } +void UniformBufferManager::batchUpdateMaterialUBO(const void *data, uint32_t size, uint32_t offset) { + if (!data || size == 0 || offset + size > materialUBOBuffer_.size()) { + E2D_LOG_WARN("Invalid batch update parameters"); + return; + } + + // 复制数据到 CPU 缓冲区 + std::memcpy(materialUBOBuffer_.data() + offset, data, size); +} + +void UniformBufferManager::flushMaterialUBO() { + if (materialUBOBufferOffset_ == 0 || !currentMaterialUBO_) { + return; + } + + // 一次性将 CPU 缓冲区数据更新到 GPU + currentMaterialUBO_->update(materialUBOBuffer_.data(), materialUBOBufferOffset_, 0); + materialUBOBufferOffset_ = 0; +} + } // namespace extra2d