You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1726 lines
92 KiB

/****************************************************************************
Copyright (c) 2020-2023 Xiamen Yaji Software Co., Ltd.
http://www.cocos.com
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
****************************************************************************/
#include <algorithm>
#include <boost/functional/hash.hpp>
#include <thread>
#include "VKStd.h"
#include "base/std/container/map.h"
#include "base/std/container/unordered_map.h"
#include "base/std/container/unordered_set.h"
#include "VKCommands.h"
#include "VKDevice.h"
#include "VKGPUObjects.h"
#include "VKPipelineCache.h"
#include "gfx-base/GFXDef.h"
#include "states/VKBufferBarrier.h"
#include "states/VKGeneralBarrier.h"
#include "states/VKTextureBarrier.h"
#include "gfx-base/SPIRVUtils.h"
namespace cc {
namespace gfx {
namespace {
constexpr bool ENABLE_LAZY_ALLOCATION = true;
} // namespace
CCVKGPUCommandBufferPool *CCVKGPUDevice::getCommandBufferPool() {
static thread_local size_t threadID = std::hash<std::thread::id>{}(std::this_thread::get_id());
if (!_commandBufferPools.count(threadID)) {
_commandBufferPools[threadID] = ccnew CCVKGPUCommandBufferPool(this);
}
return _commandBufferPools[threadID];
}
CCVKGPUDescriptorSetPool *CCVKGPUDevice::getDescriptorSetPool(uint32_t layoutID) {
if (_descriptorSetPools.find(layoutID) == _descriptorSetPools.end()) {
_descriptorSetPools[layoutID] = std::make_unique<CCVKGPUDescriptorSetPool>();
}
return _descriptorSetPools[layoutID].get();
}
void insertVkDynamicStates(ccstd::vector<VkDynamicState> *out, const ccstd::vector<DynamicStateFlagBit> &dynamicStates) {
for (DynamicStateFlagBit dynamicState : dynamicStates) {
switch (dynamicState) {
case DynamicStateFlagBit::LINE_WIDTH: out->push_back(VK_DYNAMIC_STATE_LINE_WIDTH); break;
case DynamicStateFlagBit::DEPTH_BIAS: out->push_back(VK_DYNAMIC_STATE_DEPTH_BIAS); break;
case DynamicStateFlagBit::BLEND_CONSTANTS: out->push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS); break;
case DynamicStateFlagBit::DEPTH_BOUNDS: out->push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); break;
case DynamicStateFlagBit::STENCIL_WRITE_MASK: out->push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); break;
case DynamicStateFlagBit::STENCIL_COMPARE_MASK:
out->push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
out->push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK);
break;
default: {
CC_ABORT();
break;
}
}
}
}
void cmdFuncCCVKGetDeviceQueue(CCVKDevice *device, CCVKGPUQueue *gpuQueue) {
if (gpuQueue->possibleQueueFamilyIndices.empty()) {
uint32_t queueType = 0U;
switch (gpuQueue->type) {
case QueueType::GRAPHICS: queueType = VK_QUEUE_GRAPHICS_BIT; break;
case QueueType::COMPUTE: queueType = VK_QUEUE_COMPUTE_BIT; break;
case QueueType::TRANSFER: queueType = VK_QUEUE_TRANSFER_BIT; break;
}
const CCVKGPUContext *context = device->gpuContext();
uint32_t queueCount = utils::toUint(context->queueFamilyProperties.size());
for (uint32_t i = 0U; i < queueCount; ++i) {
const VkQueueFamilyProperties &properties = context->queueFamilyProperties[i];
if (properties.queueCount > 0 && (properties.queueFlags & queueType)) {
gpuQueue->possibleQueueFamilyIndices.push_back(i);
}
}
}
vkGetDeviceQueue(device->gpuDevice()->vkDevice, gpuQueue->possibleQueueFamilyIndices[0], 0, &gpuQueue->vkQueue);
gpuQueue->queueFamilyIndex = gpuQueue->possibleQueueFamilyIndices[0];
}
void cmdFuncCCVKCreateQueryPool(CCVKDevice *device, CCVKGPUQueryPool *gpuQueryPool) {
VkQueryPoolCreateInfo queryPoolInfo = {};
queryPoolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
queryPoolInfo.queryType = mapVkQueryType(gpuQueryPool->type);
queryPoolInfo.queryCount = gpuQueryPool->maxQueryObjects;
VK_CHECK(vkCreateQueryPool(device->gpuDevice()->vkDevice, &queryPoolInfo, nullptr, &gpuQueryPool->vkPool));
}
void cmdFuncCCVKCreateTexture(CCVKDevice *device, CCVKGPUTexture *gpuTexture) {
if (!gpuTexture->size) return;
gpuTexture->aspectMask = mapVkImageAspectFlags(gpuTexture->format);
auto createFn = [device, gpuTexture](VkImage *pVkImage, VmaAllocation *pVmaAllocation) {
VkFormat vkFormat = mapVkFormat(gpuTexture->format, device->gpuDevice());
VkFormatFeatureFlags features = mapVkFormatFeatureFlags(gpuTexture->usage);
VkFormatProperties formatProperties;
vkGetPhysicalDeviceFormatProperties(device->gpuContext()->physicalDevice, vkFormat, &formatProperties);
if (!(formatProperties.optimalTilingFeatures & features)) {
const char *formatName = GFX_FORMAT_INFOS[toNumber(gpuTexture->format)].name.c_str();
CC_LOG_ERROR("cmdFuncCCVKCreateTexture: The specified usage for %s is not supported on this platform", formatName);
return;
}
VkImageUsageFlags usageFlags = mapVkImageUsageFlags(gpuTexture->usage, gpuTexture->flags);
VkImageCreateInfo createInfo{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
createInfo.flags = mapVkImageCreateFlags(gpuTexture->type);
createInfo.imageType = mapVkImageType(gpuTexture->type);
createInfo.format = vkFormat;
createInfo.extent = {gpuTexture->width, gpuTexture->height, gpuTexture->depth};
createInfo.mipLevels = gpuTexture->mipLevels;
createInfo.arrayLayers = gpuTexture->arrayLayers;
createInfo.samples = static_cast<VkSampleCountFlagBits>(gpuTexture->samples);
createInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
createInfo.usage = usageFlags;
createInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VmaAllocationCreateInfo allocInfo{};
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VmaAllocationInfo res;
const VkFlags lazilyAllocatedFilterFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
if (hasFlag(gpuTexture->flags, TextureFlagBit::LAZILY_ALLOCATED) &&
(lazilyAllocatedFilterFlags & usageFlags) == usageFlags) {
allocInfo.usage = VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED;
VkResult result = vmaCreateImage(device->gpuDevice()->memoryAllocator, &createInfo, &allocInfo,
pVkImage, pVmaAllocation, &res);
if (!result) {
gpuTexture->memoryAllocated = false;
return;
}
// feature not present, fallback to device memory
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
}
gpuTexture->memoryAllocated = true;
VK_CHECK(vmaCreateImage(device->gpuDevice()->memoryAllocator, &createInfo, &allocInfo,
pVkImage, pVmaAllocation, &res));
};
if (gpuTexture->swapchain) {
size_t backBufferCount = gpuTexture->swapchain->swapchainImages.size();
gpuTexture->swapchainVkImages.resize(backBufferCount);
if (GFX_FORMAT_INFOS[toNumber(gpuTexture->format)].hasDepth) {
gpuTexture->swapchainVmaAllocations.resize(backBufferCount);
for (size_t i = 0; i < backBufferCount; ++i) {
createFn(&gpuTexture->swapchainVkImages[i], &gpuTexture->swapchainVmaAllocations[i]);
}
} else {
for (size_t i = 0; i < backBufferCount; ++i) {
gpuTexture->swapchainVkImages[i] = gpuTexture->swapchain->swapchainImages[i];
}
}
gpuTexture->memoryAllocated = false;
} else if (hasFlag(gpuTexture->flags, TextureFlagBit::EXTERNAL_OES) || hasFlag(gpuTexture->flags, TextureFlagBit::EXTERNAL_NORMAL)) {
gpuTexture->vkImage = gpuTexture->externalVKImage;
} else {
createFn(&gpuTexture->vkImage, &gpuTexture->vmaAllocation);
}
}
void cmdFuncCCVKCreateTextureView(CCVKDevice *device, CCVKGPUTextureView *gpuTextureView) {
if (!gpuTextureView->gpuTexture) return;
auto createFn = [device, gpuTextureView](VkImage vkImage, VkImageView *pVkImageView) {
auto format = gpuTextureView->format;
auto mapAspect = [](CCVKGPUTextureView *gpuTextureView) {
auto aspectMask = gpuTextureView->gpuTexture->aspectMask;
if (gpuTextureView->gpuTexture->format == Format::DEPTH_STENCIL) {
uint32_t planeIndex = gpuTextureView->basePlane;
uint32_t planeCount = gpuTextureView->planeCount;
aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT << planeIndex;
CC_ASSERT(planeIndex + planeCount <= 2);
CC_ASSERT(planeCount > 0);
while (planeCount && --planeCount) {
aspectMask |= (aspectMask << 1);
}
}
return aspectMask;
};
VkImageViewCreateInfo createInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
createInfo.image = vkImage;
createInfo.viewType = mapVkImageViewType(gpuTextureView->type);
createInfo.subresourceRange.aspectMask = mapAspect(gpuTextureView);
createInfo.subresourceRange.baseMipLevel = gpuTextureView->baseLevel;
createInfo.subresourceRange.levelCount = gpuTextureView->levelCount;
createInfo.subresourceRange.baseArrayLayer = gpuTextureView->baseLayer;
createInfo.subresourceRange.layerCount = gpuTextureView->layerCount;
createInfo.format = mapVkFormat(format, device->gpuDevice());
VK_CHECK(vkCreateImageView(device->gpuDevice()->vkDevice, &createInfo, nullptr, pVkImageView));
};
if (gpuTextureView->gpuTexture->swapchain) {
size_t backBufferCount = gpuTextureView->gpuTexture->swapchain->swapchainImages.size();
gpuTextureView->swapchainVkImageViews.resize(backBufferCount);
for (size_t i = 0; i < backBufferCount; ++i) {
createFn(gpuTextureView->gpuTexture->swapchainVkImages[i], &gpuTextureView->swapchainVkImageViews[i]);
}
} else if (gpuTextureView->gpuTexture->vkImage) {
createFn(gpuTextureView->gpuTexture->vkImage, &gpuTextureView->vkImageView);
}
}
void cmdFuncCCVKCreateSampler(CCVKDevice *device, CCVKGPUSampler *gpuSampler) {
VkSamplerCreateInfo createInfo{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
CCVKGPUContext *context = device->gpuContext();
float maxAnisotropy = context->physicalDeviceProperties.limits.maxSamplerAnisotropy;
createInfo.magFilter = VK_FILTERS[toNumber(gpuSampler->magFilter)];
createInfo.minFilter = VK_FILTERS[toNumber(gpuSampler->minFilter)];
createInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODES[toNumber(gpuSampler->mipFilter)];
createInfo.addressModeU = VK_SAMPLER_ADDRESS_MODES[toNumber(gpuSampler->addressU)];
createInfo.addressModeV = VK_SAMPLER_ADDRESS_MODES[toNumber(gpuSampler->addressV)];
createInfo.addressModeW = VK_SAMPLER_ADDRESS_MODES[toNumber(gpuSampler->addressW)];
createInfo.mipLodBias = 0.F;
createInfo.anisotropyEnable = gpuSampler->maxAnisotropy && context->physicalDeviceFeatures.samplerAnisotropy;
createInfo.maxAnisotropy = std::min(maxAnisotropy, static_cast<float>(gpuSampler->maxAnisotropy));
createInfo.compareEnable = gpuSampler->cmpFunc != ComparisonFunc::ALWAYS;
createInfo.compareOp = VK_CMP_FUNCS[toNumber(gpuSampler->cmpFunc)];
// From UNASSIGNED-BestPractices-vkCreateSampler-lod-clamping:
// Should use image views with baseMipLevel & levelCount in favor of this
createInfo.minLod = 0.0;
createInfo.maxLod = VK_LOD_CLAMP_NONE;
VK_CHECK(vkCreateSampler(device->gpuDevice()->vkDevice, &createInfo, nullptr, &gpuSampler->vkSampler));
}
void cmdFuncCCVKCreateBuffer(CCVKDevice *device, CCVKGPUBuffer *gpuBuffer) {
if (!gpuBuffer->size) {
return;
}
gpuBuffer->instanceSize = 0U;
VkBufferCreateInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bufferInfo.size = gpuBuffer->size;
bufferInfo.usage = mapVkBufferUsageFlagBits(gpuBuffer->usage);
VmaAllocationCreateInfo allocInfo{};
if (gpuBuffer->memUsage == MemoryUsage::HOST) {
bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
allocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
} else if (gpuBuffer->memUsage == MemoryUsage::DEVICE) {
bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
} else if (gpuBuffer->memUsage == (MemoryUsage::HOST | MemoryUsage::DEVICE)) {
gpuBuffer->instanceSize = roundUp(gpuBuffer->size, device->getCapabilities().uboOffsetAlignment);
bufferInfo.size = gpuBuffer->instanceSize * device->gpuDevice()->backBufferCount;
allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
allocInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
bufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
}
VmaAllocationInfo res;
VK_CHECK(vmaCreateBuffer(device->gpuDevice()->memoryAllocator, &bufferInfo, &allocInfo,
&gpuBuffer->vkBuffer, &gpuBuffer->vmaAllocation, &res));
gpuBuffer->mappedData = reinterpret_cast<uint8_t *>(res.pMappedData);
// add special access types directly from usage
if (hasFlag(gpuBuffer->usage, BufferUsageBit::VERTEX)) gpuBuffer->renderAccessTypes.push_back(THSVS_ACCESS_VERTEX_BUFFER);
if (hasFlag(gpuBuffer->usage, BufferUsageBit::INDEX)) gpuBuffer->renderAccessTypes.push_back(THSVS_ACCESS_INDEX_BUFFER);
if (hasFlag(gpuBuffer->usage, BufferUsageBit::INDIRECT)) gpuBuffer->renderAccessTypes.push_back(THSVS_ACCESS_INDIRECT_BUFFER);
}
struct AttachmentStatistics final {
enum class SubpassUsage {
COLOR = 0x1,
COLOR_RESOLVE = 0x2,
DEPTH = 0x4,
DEPTH_RESOLVE = 0x8,
INPUT = 0x10,
SHADING_RATE = 0x20,
};
struct SubpassRef final {
VkImageLayout layout{VK_IMAGE_LAYOUT_UNDEFINED};
SubpassUsage usage{SubpassUsage::COLOR};
bool hasDepth() const { return usage == SubpassUsage::DEPTH || usage == SubpassUsage::DEPTH_RESOLVE; }
};
uint32_t loadSubpass{VK_SUBPASS_EXTERNAL};
uint32_t storeSubpass{VK_SUBPASS_EXTERNAL};
ccstd::map<uint32_t, SubpassRef> records; // ordered
void clear() {
loadSubpass = VK_SUBPASS_EXTERNAL;
storeSubpass = VK_SUBPASS_EXTERNAL;
records.clear();
}
};
CC_ENUM_BITWISE_OPERATORS(AttachmentStatistics::SubpassUsage)
struct SubpassDependencyManager final {
ccstd::vector<VkSubpassDependency2> subpassDependencies;
void clear() {
subpassDependencies.clear();
_hashes.clear();
}
void append(const VkSubpassDependency2 &info) {
if (_hashes.count(info)) return;
subpassDependencies.push_back(info);
_hashes.insert(info);
}
private:
// only the src/dst attributes differs
struct DependencyHasher {
ccstd::hash_t operator()(const VkSubpassDependency2 &info) const {
static_assert(std::is_trivially_copyable<VkSubpassDependency2>::value && sizeof(VkSubpassDependency2) % 8 == 0, "VkSubpassDependency2 must be 8 bytes aligned and trivially copyable");
return ccstd::hash_range(reinterpret_cast<const uint64_t *>(&info.srcSubpass),
reinterpret_cast<const uint64_t *>(&info.dependencyFlags));
}
};
struct DependencyComparer {
size_t operator()(const VkSubpassDependency2 &lhs, const VkSubpassDependency2 &rhs) const {
auto size = static_cast<size_t>(reinterpret_cast<const uint8_t *>(&lhs.dependencyFlags) - reinterpret_cast<const uint8_t *>(&lhs.srcSubpass));
return !memcmp(&lhs.srcSubpass, &rhs.srcSubpass, size);
}
};
ccstd::unordered_set<VkSubpassDependency2, DependencyHasher, DependencyComparer> _hashes;
};
std::pair<VkImageLayout, VkImageLayout> getInitialFinalLayout(CCVKDevice *device, CCVKGeneralBarrier *barrier, bool depthSetncil) {
const auto *gpuBarrier = barrier ? barrier->gpuBarrier() : (depthSetncil ? &device->gpuDevice()->defaultDepthStencilBarrier : &device->gpuDevice()->defaultColorBarrier);
ThsvsImageBarrier imageBarrier = {};
imageBarrier.prevAccessCount = utils::toUint(gpuBarrier->prevAccesses.size());
imageBarrier.pPrevAccesses = gpuBarrier->prevAccesses.data();
imageBarrier.nextAccessCount = utils::toUint(gpuBarrier->nextAccesses.size());
imageBarrier.pNextAccesses = gpuBarrier->nextAccesses.data();
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.prevLayout = barrier ? getAccessLayout(barrier->getInfo().prevAccesses) : THSVS_IMAGE_LAYOUT_OPTIMAL;
imageBarrier.nextLayout = barrier ? getAccessLayout(barrier->getInfo().nextAccesses) : THSVS_IMAGE_LAYOUT_OPTIMAL;
VkPipelineStageFlags srcStages = {};
VkPipelineStageFlags dstStages = {};
VkImageMemoryBarrier vkImageBarrier = {};
thsvsGetVulkanImageMemoryBarrier(imageBarrier, &srcStages, &dstStages, &vkImageBarrier);
return {vkImageBarrier.oldLayout, vkImageBarrier.newLayout};
}
void cmdFuncCCVKCreateRenderPass(CCVKDevice *device, CCVKGPURenderPass *gpuRenderPass) {
static ccstd::vector<VkSubpassDescriptionDepthStencilResolve> depthStencilResolves;
static ccstd::vector<VkAttachmentDescription2> attachmentDescriptions;
static ccstd::vector<VkAttachmentReference2> attachmentReferences;
static ccstd::vector<VkSubpassDescription2> subpassDescriptions;
static ccstd::vector<CCVKAccessInfo> beginAccessInfos;
static ccstd::vector<CCVKAccessInfo> endAccessInfos;
static ccstd::vector<AttachmentStatistics> attachmentStatistics;
static SubpassDependencyManager dependencyManager;
ccstd::vector<VkFragmentShadingRateAttachmentInfoKHR> shadingRateReferences;
const size_t colorAttachmentCount = gpuRenderPass->colorAttachments.size();
const size_t hasDepthStencil = gpuRenderPass->depthStencilAttachment.format != Format::UNKNOWN ? 1 : 0;
const size_t hasDepthResolve = gpuRenderPass->depthStencilResolveAttachment.format != Format::UNKNOWN ? 1 : 0;
auto attachmentCount = static_cast<uint32_t>(colorAttachmentCount + hasDepthStencil + hasDepthResolve);
uint32_t depthIndex = colorAttachmentCount;
uint32_t stencilIndex = colorAttachmentCount + 1;
const bool hasStencil = GFX_FORMAT_INFOS[toNumber(gpuRenderPass->depthStencilAttachment.format)].hasStencil;
attachmentDescriptions.assign(attachmentCount, {VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2});
gpuRenderPass->clearValues.resize(attachmentCount);
beginAccessInfos.resize(attachmentCount);
endAccessInfos.resize(attachmentCount);
shadingRateReferences.resize(gpuRenderPass->subpasses.size(), {VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR});
for (size_t i = 0U; i < colorAttachmentCount; ++i) {
const auto &attachment{gpuRenderPass->colorAttachments[i]};
auto [initialLayout, finalLayout] =
getInitialFinalLayout(device, static_cast<CCVKGeneralBarrier *>(attachment.barrier), false);
VkFormat vkFormat = mapVkFormat(attachment.format, device->gpuDevice());
attachmentDescriptions[i].format = vkFormat;
attachmentDescriptions[i].samples = static_cast<VkSampleCountFlagBits>(attachment.sampleCount);
attachmentDescriptions[i].loadOp = mapVkLoadOp(attachment.loadOp);
attachmentDescriptions[i].storeOp = mapVkStoreOp(attachment.storeOp);
attachmentDescriptions[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescriptions[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[i].initialLayout = attachment.loadOp == gfx::LoadOp::DISCARD ? VK_IMAGE_LAYOUT_UNDEFINED : initialLayout;
attachmentDescriptions[i].finalLayout = finalLayout;
}
if (hasDepthStencil) {
const DepthStencilAttachment &attachment = gpuRenderPass->depthStencilAttachment;
auto [initialLayout, finalLayout] = getInitialFinalLayout(device, static_cast<CCVKGeneralBarrier *>(attachment.barrier), true);
VkFormat vkFormat = mapVkFormat(attachment.format, device->gpuDevice());
attachmentDescriptions[depthIndex].format = vkFormat;
attachmentDescriptions[depthIndex].samples = static_cast<VkSampleCountFlagBits>(attachment.sampleCount);
attachmentDescriptions[depthIndex].loadOp = mapVkLoadOp(attachment.depthLoadOp);
attachmentDescriptions[depthIndex].storeOp = mapVkStoreOp(attachment.depthStoreOp);
attachmentDescriptions[depthIndex].stencilLoadOp = hasStencil ? mapVkLoadOp(attachment.stencilLoadOp) : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescriptions[depthIndex].stencilStoreOp = hasStencil ? mapVkStoreOp(attachment.stencilStoreOp) : VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[depthIndex].initialLayout = attachment.depthLoadOp == gfx::LoadOp::DISCARD ? VK_IMAGE_LAYOUT_UNDEFINED : initialLayout;
attachmentDescriptions[depthIndex].finalLayout = finalLayout;
}
if (hasDepthResolve) {
const DepthStencilAttachment &attachment = gpuRenderPass->depthStencilResolveAttachment;
auto [initialLayout, finalLayout] = getInitialFinalLayout(device, static_cast<CCVKGeneralBarrier *>(attachment.barrier), true);
VkFormat vkFormat = mapVkFormat(attachment.format, device->gpuDevice());
attachmentDescriptions[stencilIndex].format = vkFormat;
attachmentDescriptions[stencilIndex].samples = VK_SAMPLE_COUNT_1_BIT;
attachmentDescriptions[stencilIndex].loadOp = mapVkLoadOp(attachment.depthLoadOp);
attachmentDescriptions[stencilIndex].storeOp = mapVkStoreOp(attachment.depthStoreOp);
attachmentDescriptions[stencilIndex].stencilLoadOp = hasStencil ? mapVkLoadOp(attachment.stencilLoadOp) : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescriptions[stencilIndex].stencilStoreOp = hasStencil ? mapVkStoreOp(attachment.stencilStoreOp) : VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[stencilIndex].initialLayout = attachment.depthLoadOp == gfx::LoadOp::DISCARD ? VK_IMAGE_LAYOUT_UNDEFINED : initialLayout;
attachmentDescriptions[stencilIndex].finalLayout = finalLayout;
}
size_t subpassCount = gpuRenderPass->subpasses.size();
attachmentReferences.clear();
gpuRenderPass->sampleCounts.clear();
for (const auto &subpassInfo : gpuRenderPass->subpasses) {
VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_1_BIT;
for (uint32_t input : subpassInfo.inputs) {
bool appearsInOutput = std::find(subpassInfo.colors.begin(), subpassInfo.colors.end(), input) != subpassInfo.colors.end();
VkImageLayout layout = appearsInOutput ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
VkImageAspectFlags aspectFlag = VK_IMAGE_ASPECT_COLOR_BIT;
if (input == gpuRenderPass->colorAttachments.size()) {
layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
aspectFlag = VK_IMAGE_ASPECT_STENCIL_BIT | VK_IMAGE_ASPECT_DEPTH_BIT;
}
attachmentReferences.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, input, layout, aspectFlag});
}
for (uint32_t color : subpassInfo.colors) {
const VkAttachmentDescription2 &attachment = attachmentDescriptions[color];
bool appearsInInput = std::find(subpassInfo.inputs.begin(), subpassInfo.inputs.end(), color) != subpassInfo.inputs.end();
VkImageLayout layout = appearsInInput ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachmentReferences.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, color, layout, VK_IMAGE_ASPECT_COLOR_BIT});
sampleCount = std::max(sampleCount, attachment.samples);
}
for (uint32_t resolveIn : subpassInfo.resolves) {
VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
auto resolve = resolveIn == gfx::INVALID_BINDING ? VK_ATTACHMENT_UNUSED : resolveIn;
CC_ASSERT(INVALID_BINDING == VK_ATTACHMENT_UNUSED);
attachmentReferences.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, resolve, layout, VK_IMAGE_ASPECT_COLOR_BIT});
}
if (subpassInfo.depthStencil != INVALID_BINDING) {
const VkAttachmentDescription2 &attachment = attachmentDescriptions[subpassInfo.depthStencil];
sampleCount = std::max(sampleCount, attachment.samples);
bool appearsInInput = std::find(subpassInfo.inputs.begin(), subpassInfo.inputs.end(), subpassInfo.depthStencil) != subpassInfo.inputs.end();
VkImageAspectFlags aspect = hasStencil ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
VkImageLayout layout = appearsInInput ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachmentReferences.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, subpassInfo.depthStencil, layout, aspect});
}
if (subpassInfo.depthStencilResolve != INVALID_BINDING) {
VkImageAspectFlags aspect = hasStencil ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
VkImageLayout layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachmentReferences.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, subpassInfo.depthStencilResolve, layout, aspect});
}
if (subpassInfo.shadingRate != INVALID_BINDING && subpassInfo.shadingRate < colorAttachmentCount) {
// layout is guaranteed
attachmentDescriptions[subpassInfo.shadingRate].initialLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
attachmentDescriptions[subpassInfo.shadingRate].finalLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
const ColorAttachment &desc = gpuRenderPass->colorAttachments[subpassInfo.shadingRate];
attachmentReferences.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, subpassInfo.shadingRate, VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, VK_IMAGE_ASPECT_COLOR_BIT});
}
gpuRenderPass->sampleCounts.push_back(sampleCount);
}
size_t offset{0U};
subpassDescriptions.assign(subpassCount, {VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2}); // init to zeros first
depthStencilResolves.resize(subpassCount, {VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE});
const VkPhysicalDeviceDepthStencilResolveProperties &prop{device->gpuContext()->physicalDeviceDepthStencilResolveProperties};
for (uint32_t i = 0U; i < gpuRenderPass->subpasses.size(); ++i) {
const SubpassInfo &subpassInfo = gpuRenderPass->subpasses[i];
VkSubpassDescription2 &desc = subpassDescriptions[i];
desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
if (!subpassInfo.inputs.empty()) {
desc.inputAttachmentCount = utils::toUint(subpassInfo.inputs.size());
desc.pInputAttachments = attachmentReferences.data() + offset;
offset += subpassInfo.inputs.size();
}
if (!subpassInfo.colors.empty()) {
desc.colorAttachmentCount = utils::toUint(subpassInfo.colors.size());
desc.pColorAttachments = attachmentReferences.data() + offset;
offset += subpassInfo.colors.size();
if (!subpassInfo.resolves.empty()) {
desc.pResolveAttachments = attachmentReferences.data() + offset;
offset += subpassInfo.resolves.size();
}
}
if (!subpassInfo.preserves.empty()) {
desc.preserveAttachmentCount = utils::toUint(subpassInfo.preserves.size());
desc.pPreserveAttachments = subpassInfo.preserves.data();
}
if (subpassInfo.depthStencil != INVALID_BINDING) {
desc.pDepthStencilAttachment = attachmentReferences.data() + offset++;
} else {
desc.pDepthStencilAttachment = nullptr;
}
if (subpassInfo.depthStencilResolve != INVALID_BINDING) {
VkSubpassDescriptionDepthStencilResolve &resolveDesc{depthStencilResolves[i]};
VkResolveModeFlagBits depthResolveMode = VK_RESOLVE_MODES[toNumber(subpassInfo.depthResolveMode)];
VkResolveModeFlagBits stencilResolveMode = VK_RESOLVE_MODES[toNumber(subpassInfo.stencilResolveMode)];
if ((depthResolveMode & prop.supportedDepthResolveModes) == 0) {
depthResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
CC_LOG_WARNING("render pass depth resolve mode [%u] not supported, use Sample0 instead.", toNumber(subpassInfo.depthResolveMode));
}
if ((stencilResolveMode & prop.supportedStencilResolveModes) == 0) {
stencilResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
CC_LOG_WARNING("render pass stencil resolve mode [%u] not supported, use Sample0 instead.", toNumber(subpassInfo.stencilResolveMode));
}
if (!prop.independentResolveNone && stencilResolveMode != depthResolveMode) {
stencilResolveMode = depthResolveMode;
} else if (prop.independentResolveNone && !prop.independentResolve && stencilResolveMode &&
depthResolveMode && stencilResolveMode != depthResolveMode) {
stencilResolveMode = VK_RESOLVE_MODE_NONE;
}
resolveDesc.depthResolveMode = depthResolveMode;
resolveDesc.stencilResolveMode = stencilResolveMode;
resolveDesc.pDepthStencilResolveAttachment = attachmentReferences.data() + offset++;
desc.pNext = &resolveDesc;
}
if (subpassInfo.shadingRate != INVALID_BINDING) {
VkFragmentShadingRateAttachmentInfoKHR &attachment = shadingRateReferences[i];
attachment.pFragmentShadingRateAttachment = attachmentReferences.data() + offset++;
attachment.shadingRateAttachmentTexelSize = {16, 16}; // todo
desc.pNext = &attachment;
}
}
size_t dependencyCount = gpuRenderPass->dependencies.size();
gpuRenderPass->hasSelfDependency.resize(subpassCount, false);
dependencyManager.clear();
bool manuallyDeduce = true;
if constexpr (ENABLE_GRAPH_AUTO_BARRIER) {
// single pass front and rear cost 2 slot.
manuallyDeduce = dependencyCount <= 2;
} else {
manuallyDeduce = dependencyCount == 0;
}
if (!manuallyDeduce) {
// offset = 0U;
ccstd::unordered_set<const GFXObject *> subpassExternalFilter;
for (uint32_t i = 0U; i < dependencyCount; ++i) {
const auto &dependency{gpuRenderPass->dependencies[i]};
VkSubpassDependency2 vkDependency{VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2};
vkDependency.srcSubpass = dependency.srcSubpass;
vkDependency.dstSubpass = dependency.dstSubpass;
vkDependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
if (dependency.srcSubpass == dependency.dstSubpass && dependency.srcSubpass < subpassCount) {
gpuRenderPass->hasSelfDependency[dependency.srcSubpass] = true;
}
auto addStageAccessMask = [&vkDependency](const SubpassDependency &deps) {
ccstd::vector<ThsvsAccessType> prevAccesses;
ccstd::vector<ThsvsAccessType> nextAccesses;
getAccessTypes(deps.prevAccesses, prevAccesses);
getAccessTypes(deps.nextAccesses, nextAccesses);
ThsvsImageBarrier imageBarrier = {};
imageBarrier.prevAccessCount = utils::toUint(prevAccesses.size());
imageBarrier.pPrevAccesses = prevAccesses.data();
imageBarrier.nextAccessCount = utils::toUint(nextAccesses.size());
imageBarrier.pNextAccesses = nextAccesses.data();
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.prevLayout = getAccessLayout(deps.prevAccesses);
imageBarrier.nextLayout = getAccessLayout(deps.nextAccesses);
VkImageMemoryBarrier vkImageBarrier = {};
thsvsGetVulkanImageMemoryBarrier(imageBarrier, &vkDependency.srcStageMask, &vkDependency.dstStageMask, &vkImageBarrier);
vkDependency.srcAccessMask = vkImageBarrier.srcAccessMask;
vkDependency.dstAccessMask = vkImageBarrier.dstAccessMask;
dependencyManager.append(vkDependency);
};
if (vkDependency.srcStageMask == 0) {
vkDependency.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
}
addStageAccessMask(dependency);
}
} else {
// try to deduce dependencies if not specified
// first, gather necessary statistics for each attachment
auto updateLifeCycle = [](AttachmentStatistics &statistics, uint32_t index, VkImageLayout layout, AttachmentStatistics::SubpassUsage usage) {
if (statistics.records.count(index)) {
statistics.records[index].usage |= usage;
} else {
statistics.records[index] = {layout, usage};
}
if (statistics.loadSubpass == VK_SUBPASS_EXTERNAL) statistics.loadSubpass = index;
statistics.storeSubpass = index;
};
auto calculateLifeCycle = [&](uint32_t targetAttachment, AttachmentStatistics &statistics) {
for (uint32_t j = 0U; j < utils::toUint(subpassCount); ++j) {
auto &subpass = subpassDescriptions[j];
for (size_t k = 0U; k < subpass.colorAttachmentCount; ++k) {
if (subpass.pColorAttachments[k].attachment == targetAttachment) {
updateLifeCycle(statistics, j, subpass.pColorAttachments[k].layout, AttachmentStatistics::SubpassUsage::COLOR);
}
if (subpass.pResolveAttachments && subpass.pResolveAttachments[k].attachment == targetAttachment) {
updateLifeCycle(statistics, j, subpass.pResolveAttachments[k].layout, AttachmentStatistics::SubpassUsage::COLOR_RESOLVE);
}
}
for (size_t k = 0U; k < subpass.inputAttachmentCount; ++k) {
if (subpass.pInputAttachments[k].attachment == targetAttachment) {
updateLifeCycle(statistics, j, subpass.pInputAttachments[k].layout, AttachmentStatistics::SubpassUsage::INPUT);
}
}
const auto *vrsDesc = static_cast<const VkFragmentShadingRateAttachmentInfoKHR *>(subpass.pNext);
if (vrsDesc != nullptr && vrsDesc->sType == VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR && vrsDesc->pFragmentShadingRateAttachment->attachment == targetAttachment) {
updateLifeCycle(statistics, j, vrsDesc->pFragmentShadingRateAttachment->layout, AttachmentStatistics::SubpassUsage::SHADING_RATE);
}
if (subpass.pDepthStencilAttachment && subpass.pDepthStencilAttachment->attachment == targetAttachment) {
updateLifeCycle(statistics, j, subpass.pDepthStencilAttachment->layout, AttachmentStatistics::SubpassUsage::DEPTH);
}
if (depthStencilResolves[j].pDepthStencilResolveAttachment &&
depthStencilResolves[j].pDepthStencilResolveAttachment->attachment == targetAttachment) {
updateLifeCycle(statistics, j, depthStencilResolves[j].pDepthStencilResolveAttachment->layout, AttachmentStatistics::SubpassUsage::DEPTH_RESOLVE);
}
}
};
attachmentStatistics.resize(attachmentCount);
for (uint32_t i = 0U; i < utils ::toUint(attachmentCount); ++i) {
attachmentStatistics[i].clear();
calculateLifeCycle(i, attachmentStatistics[i]);
CC_ASSERT(attachmentStatistics[i].loadSubpass != VK_SUBPASS_EXTERNAL &&
attachmentStatistics[i].storeSubpass != VK_SUBPASS_EXTERNAL);
}
// wait for resources to become available (begin accesses)
auto beginDependencyCheck = [](VkSubpassDependency2 &dependency, uint32_t attachment, const AttachmentStatistics::SubpassRef &ref) {
const VkAttachmentDescription2 &desc = attachmentDescriptions[attachment];
const CCVKAccessInfo &info = beginAccessInfos[attachment];
if (desc.initialLayout != ref.layout || info.hasWriteAccess || desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
VkPipelineStageFlagBits dstStage{ref.hasDepth() ? VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT : VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
VkAccessFlagBits dstAccessRead{ref.hasDepth() ? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT : VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
VkAccessFlagBits dstAccessWrite{ref.hasDepth() ? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT : VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT};
dependency.srcStageMask |= info.stageMask;
dependency.dstStageMask |= dstStage;
dependency.srcAccessMask |= info.hasWriteAccess ? info.accessMask : 0;
dependency.dstAccessMask |= dstAccessRead;
if (desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR || desc.initialLayout != ref.layout) dependency.dstAccessMask |= dstAccessWrite;
return true;
}
return false;
};
VkSubpassDependency2 beginDependency;
uint32_t lastLoadSubpass{VK_SUBPASS_EXTERNAL};
bool beginDependencyValid{false};
for (uint32_t i = 0U; i < attachmentCount; ++i) {
auto &statistics = attachmentStatistics[i];
if (lastLoadSubpass != statistics.loadSubpass) {
if (beginDependencyValid) dependencyManager.append(beginDependency);
beginDependency = {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, nullptr,
VK_SUBPASS_EXTERNAL, statistics.loadSubpass,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT};
lastLoadSubpass = statistics.loadSubpass;
beginDependencyValid = false;
}
beginDependencyValid |= beginDependencyCheck(beginDependency, i, statistics.records[statistics.loadSubpass]);
}
if (beginDependencyValid) dependencyManager.append(beginDependency);
// make rendering result visible (end accesses)
auto endDependencyCheck = [](VkSubpassDependency2 &dependency, uint32_t attachment, const AttachmentStatistics::SubpassRef &ref) {
const VkAttachmentDescription2 &desc = attachmentDescriptions[attachment];
const CCVKAccessInfo &info = endAccessInfos[attachment];
if (desc.initialLayout != ref.layout || info.hasWriteAccess || desc.storeOp == VK_ATTACHMENT_STORE_OP_STORE) {
VkPipelineStageFlagBits srcStage{ref.hasDepth() ? VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT : VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
VkAccessFlagBits srcAccess{ref.hasDepth() ? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT : VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT};
dependency.srcStageMask |= srcStage;
dependency.srcAccessMask |= srcAccess;
dependency.dstStageMask |= info.stageMask;
dependency.dstAccessMask |= info.accessMask;
return true;
}
return false;
};
VkSubpassDependency2 endDependency;
uint32_t lastStoreSubpass{VK_SUBPASS_EXTERNAL};
bool endDependencyValid{false};
for (uint32_t i = 0U; i < attachmentCount; ++i) {
auto &statistics = attachmentStatistics[i];
if (lastStoreSubpass != statistics.storeSubpass) {
if (endDependencyValid) dependencyManager.append(endDependency);
endDependency = {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, nullptr,
statistics.storeSubpass, VK_SUBPASS_EXTERNAL,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT};
lastStoreSubpass = statistics.storeSubpass;
endDependencyValid = false;
}
endDependencyValid |= endDependencyCheck(endDependency, i, statistics.records[statistics.storeSubpass]);
}
if (endDependencyValid) dependencyManager.append(endDependency);
// other transitioning dependencies
auto mapAccessFlags = [](AttachmentStatistics::SubpassUsage usage) {
// there may be more kind of dependencies
if (hasFlag(usage, AttachmentStatistics::SubpassUsage::INPUT)) {
return std::make_pair(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_INPUT_ATTACHMENT_READ_BIT);
}
return std::make_pair(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
};
auto genDependency = [&](uint32_t srcIdx, AttachmentStatistics::SubpassUsage srcUsage,
uint32_t dstIdx, AttachmentStatistics::SubpassUsage dstUsage) {
VkSubpassDependency2 dependency{VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, nullptr, srcIdx, dstIdx};
std::tie(dependency.srcStageMask, dependency.srcAccessMask) = mapAccessFlags(srcUsage);
std::tie(dependency.dstStageMask, dependency.dstAccessMask) = mapAccessFlags(dstUsage);
dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
return dependency;
};
for (size_t i = 0U; i < attachmentCount; ++i) {
auto &statistics{attachmentStatistics[i]};
const AttachmentStatistics::SubpassRef *prevRef{nullptr};
uint32_t prevIdx{0U};
for (const auto &it : statistics.records) {
if (prevRef && prevRef->usage != it.second.usage) {
dependencyManager.append(genDependency(prevIdx, prevRef->usage, it.first, it.second.usage));
}
prevIdx = it.first;
prevRef = &it.second;
}
}
}
VkRenderPassCreateInfo2 renderPassCreateInfo{VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2};
renderPassCreateInfo.attachmentCount = utils::toUint(attachmentDescriptions.size());
renderPassCreateInfo.pAttachments = attachmentDescriptions.data();
renderPassCreateInfo.subpassCount = utils::toUint(subpassDescriptions.size());
renderPassCreateInfo.pSubpasses = subpassDescriptions.data();
renderPassCreateInfo.dependencyCount = utils::toUint(dependencyManager.subpassDependencies.size());
renderPassCreateInfo.pDependencies = dependencyManager.subpassDependencies.data();
VK_CHECK(device->gpuDevice()->createRenderPass2(device->gpuDevice()->vkDevice, &renderPassCreateInfo,
nullptr, &gpuRenderPass->vkRenderPass));
}
void cmdFuncCCVKCreateFramebuffer(CCVKDevice *device, CCVKGPUFramebuffer *gpuFramebuffer) {
size_t colorViewCount = gpuFramebuffer->gpuColorViews.size();
const auto *gpuRenderPass = gpuFramebuffer->gpuRenderPass.get();
const size_t hasDepthStencil = gpuRenderPass->depthStencilAttachment.format != Format::UNKNOWN ? 1 : 0;
const size_t hasDepthResolve = gpuRenderPass->depthStencilResolveAttachment.format != Format::UNKNOWN ? 1 : 0;
auto attachmentCount = static_cast<uint32_t>(colorViewCount + hasDepthStencil + hasDepthResolve);
ccstd::vector<VkImageView> attachments(attachmentCount);
VkFramebufferCreateInfo createInfo{VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO};
createInfo.width = createInfo.height = UINT_MAX;
uint32_t swapchainImageIndices = 0;
for (size_t i = 0U; i < colorViewCount; ++i) {
const CCVKGPUTextureView *texView = gpuFramebuffer->gpuColorViews[i];
if (texView->gpuTexture->swapchain) {
gpuFramebuffer->swapchain = texView->gpuTexture->swapchain;
swapchainImageIndices |= (1 << i);
} else {
attachments[i] = gpuFramebuffer->gpuColorViews[i]->vkImageView;
}
if (!hasFlag(texView->gpuTexture->usage, TextureUsageBit::SHADING_RATE)) {
createInfo.width = std::min(createInfo.width, std::max(1U, gpuFramebuffer->gpuColorViews[i]->gpuTexture->width >> gpuFramebuffer->gpuColorViews[i]->baseLevel));
createInfo.height = std::min(createInfo.height, std::max(1U, gpuFramebuffer->gpuColorViews[i]->gpuTexture->height >> gpuFramebuffer->gpuColorViews[i]->baseLevel));
}
}
if (hasDepthStencil) {
if (gpuFramebuffer->gpuDepthStencilView->gpuTexture->swapchain) {
gpuFramebuffer->swapchain = gpuFramebuffer->gpuDepthStencilView->gpuTexture->swapchain;
swapchainImageIndices |= (1 << colorViewCount);
} else {
attachments[colorViewCount] = gpuFramebuffer->gpuDepthStencilView->vkImageView;
}
createInfo.width = std::min(createInfo.width, std::max(1U, gpuFramebuffer->gpuDepthStencilView->gpuTexture->width >> gpuFramebuffer->gpuDepthStencilView->baseLevel));
createInfo.height = std::min(createInfo.height, std::max(1U, gpuFramebuffer->gpuDepthStencilView->gpuTexture->height >> gpuFramebuffer->gpuDepthStencilView->baseLevel));
}
if (hasDepthResolve) {
attachments[colorViewCount + 1] = gpuFramebuffer->gpuDepthStencilResolveView->vkImageView;
}
gpuFramebuffer->isOffscreen = !swapchainImageIndices;
gpuFramebuffer->width = createInfo.width;
gpuFramebuffer->height = createInfo.height;
if (gpuFramebuffer->isOffscreen) {
createInfo.renderPass = gpuFramebuffer->gpuRenderPass->vkRenderPass;
createInfo.attachmentCount = utils::toUint(attachments.size());
createInfo.pAttachments = attachments.data();
createInfo.layers = 1;
VK_CHECK(vkCreateFramebuffer(device->gpuDevice()->vkDevice, &createInfo, nullptr, &gpuFramebuffer->vkFramebuffer));
} else {
size_t swapChainImageCount = gpuFramebuffer->swapchain->swapchainImages.size();
gpuFramebuffer->vkFrameBuffers.resize(swapChainImageCount);
createInfo.renderPass = gpuFramebuffer->gpuRenderPass->vkRenderPass;
createInfo.attachmentCount = utils::toUint(attachments.size());
createInfo.pAttachments = attachments.data();
createInfo.layers = 1;
for (size_t i = 0U; i < swapChainImageCount; ++i) {
for (size_t j = 0U; j < colorViewCount; ++j) {
if (swapchainImageIndices & (1 << j)) {
attachments[j] = gpuFramebuffer->gpuColorViews[j]->swapchainVkImageViews[i];
}
}
if (swapchainImageIndices & (1 << colorViewCount)) {
attachments[colorViewCount] = gpuFramebuffer->gpuDepthStencilView->swapchainVkImageViews[i];
}
VK_CHECK(vkCreateFramebuffer(device->gpuDevice()->vkDevice, &createInfo, nullptr, &gpuFramebuffer->vkFrameBuffers[i]));
}
}
}
void cmdFuncCCVKCreateShader(CCVKDevice *device, CCVKGPUShader *gpuShader) {
SPIRVUtils *spirv = SPIRVUtils::getInstance();
for (CCVKGPUShaderStage &stage : gpuShader->gpuStages) {
spirv->compileGLSL(stage.type, "#version 450\n" + stage.source);
if (stage.type == ShaderStageFlagBit::VERTEX) spirv->compressInputLocations(gpuShader->attributes);
VkShaderModuleCreateInfo createInfo{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
createInfo.codeSize = spirv->getOutputSize();
createInfo.pCode = spirv->getOutputData();
VK_CHECK(vkCreateShaderModule(device->gpuDevice()->vkDevice, &createInfo, nullptr, &stage.vkShader));
}
CC_LOG_INFO("Shader '%s' compilation succeeded.", gpuShader->name.c_str());
}
void cmdFuncCCVKCreateDescriptorSetLayout(CCVKDevice *device, CCVKGPUDescriptorSetLayout *gpuDescriptorSetLayout) {
CCVKGPUDevice *gpuDevice = device->gpuDevice();
size_t bindingCount = gpuDescriptorSetLayout->bindings.size();
gpuDescriptorSetLayout->vkBindings.resize(bindingCount);
for (size_t i = 0U; i < bindingCount; ++i) {
const DescriptorSetLayoutBinding &binding = gpuDescriptorSetLayout->bindings[i];
VkDescriptorSetLayoutBinding &vkBinding = gpuDescriptorSetLayout->vkBindings[i];
vkBinding.stageFlags = mapVkShaderStageFlags(binding.stageFlags);
vkBinding.descriptorType = mapVkDescriptorType(binding.descriptorType);
vkBinding.binding = binding.binding;
vkBinding.descriptorCount = binding.count;
}
VkDescriptorSetLayoutCreateInfo setCreateInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
setCreateInfo.bindingCount = utils::toUint(bindingCount);
setCreateInfo.pBindings = gpuDescriptorSetLayout->vkBindings.data();
VK_CHECK(vkCreateDescriptorSetLayout(gpuDevice->vkDevice, &setCreateInfo, nullptr, &gpuDescriptorSetLayout->vkDescriptorSetLayout));
CCVKGPUDescriptorSetPool *pool = gpuDevice->getDescriptorSetPool(gpuDescriptorSetLayout->id);
pool->link(gpuDevice, gpuDescriptorSetLayout->maxSetsPerPool, gpuDescriptorSetLayout->vkBindings, gpuDescriptorSetLayout->vkDescriptorSetLayout);
gpuDescriptorSetLayout->defaultDescriptorSet = pool->request();
if (gpuDevice->useDescriptorUpdateTemplate && bindingCount) {
const ccstd::vector<VkDescriptorSetLayoutBinding> &bindings = gpuDescriptorSetLayout->vkBindings;
ccstd::vector<VkDescriptorUpdateTemplateEntry> entries(bindingCount);
for (size_t j = 0U, k = 0U; j < bindingCount; ++j) {
const VkDescriptorSetLayoutBinding &binding = bindings[j];
if (binding.descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
entries[j].dstBinding = binding.binding;
entries[j].dstArrayElement = 0;
entries[j].descriptorCount = binding.descriptorCount;
entries[j].descriptorType = binding.descriptorType;
entries[j].offset = sizeof(CCVKDescriptorInfo) * k;
entries[j].stride = sizeof(CCVKDescriptorInfo);
k += binding.descriptorCount;
}
}
VkDescriptorUpdateTemplateCreateInfo createInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO};
createInfo.descriptorUpdateEntryCount = utils::toUint(bindingCount);
createInfo.pDescriptorUpdateEntries = entries.data();
createInfo.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
createInfo.descriptorSetLayout = gpuDescriptorSetLayout->vkDescriptorSetLayout;
if (gpuDevice->minorVersion > 0) {
VK_CHECK(vkCreateDescriptorUpdateTemplate(gpuDevice->vkDevice, &createInfo, nullptr, &gpuDescriptorSetLayout->vkDescriptorUpdateTemplate));
} else {
VK_CHECK(vkCreateDescriptorUpdateTemplateKHR(gpuDevice->vkDevice, &createInfo, nullptr, &gpuDescriptorSetLayout->vkDescriptorUpdateTemplate));
}
}
}
void cmdFuncCCVKCreatePipelineLayout(CCVKDevice *device, CCVKGPUPipelineLayout *gpuPipelineLayout) {
CCVKGPUDevice *gpuDevice = device->gpuDevice();
size_t layoutCount = gpuPipelineLayout->setLayouts.size();
ccstd::vector<VkDescriptorSetLayout> descriptorSetLayouts(layoutCount);
for (uint32_t i = 0; i < layoutCount; ++i) {
descriptorSetLayouts[i] = gpuPipelineLayout->setLayouts[i]->vkDescriptorSetLayout;
}
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
pipelineLayoutCreateInfo.setLayoutCount = utils::toUint(layoutCount);
pipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayouts.data();
VK_CHECK(vkCreatePipelineLayout(gpuDevice->vkDevice, &pipelineLayoutCreateInfo, nullptr, &gpuPipelineLayout->vkPipelineLayout));
}
void cmdFuncCCVKCreateComputePipelineState(CCVKDevice *device, CCVKGPUPipelineState *gpuPipelineState) {
VkComputePipelineCreateInfo createInfo{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
///////////////////// Shader Stage /////////////////////
const auto &stages = gpuPipelineState->gpuShader->gpuStages;
VkPipelineShaderStageCreateInfo stageInfo{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
stageInfo.stage = mapVkShaderStageFlagBits(stages[0].type);
stageInfo.module = stages[0].vkShader;
stageInfo.pName = "main";
createInfo.stage = stageInfo;
createInfo.layout = gpuPipelineState->gpuPipelineLayout->vkPipelineLayout;
///////////////////// Creation /////////////////////
auto *pipelineCache = device->pipelineCache();
CC_ASSERT(pipelineCache != nullptr);
pipelineCache->setDirty();
VK_CHECK(vkCreateComputePipelines(device->gpuDevice()->vkDevice, pipelineCache->getHandle(),
1, &createInfo, nullptr, &gpuPipelineState->vkPipeline));
}
void cmdFuncCCVKCreateGraphicsPipelineState(CCVKDevice *device, CCVKGPUPipelineState *gpuPipelineState) {
static ccstd::vector<VkPipelineShaderStageCreateInfo> stageInfos;
static ccstd::vector<VkVertexInputBindingDescription> bindingDescriptions;
static ccstd::vector<VkVertexInputAttributeDescription> attributeDescriptions;
static ccstd::vector<uint32_t> offsets;
static ccstd::vector<VkDynamicState> dynamicStates;
static ccstd::vector<VkPipelineColorBlendAttachmentState> blendTargets;
VkGraphicsPipelineCreateInfo createInfo{VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO};
///////////////////// Shader Stage /////////////////////
const auto &stages = gpuPipelineState->gpuShader->gpuStages;
const size_t stageCount = stages.size();
stageInfos.resize(stageCount, {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO});
for (size_t i = 0U; i < stageCount; ++i) {
stageInfos[i].stage = mapVkShaderStageFlagBits(stages[i].type);
stageInfos[i].module = stages[i].vkShader;
stageInfos[i].pName = "main";
}
createInfo.stageCount = utils::toUint(stageCount);
createInfo.pStages = stageInfos.data();
///////////////////// Input State /////////////////////
const AttributeList &attributes = gpuPipelineState->inputState.attributes;
const size_t attributeCount = attributes.size();
uint32_t bindingCount = 1U;
for (size_t i = 0U; i < attributeCount; ++i) {
const Attribute &attr = attributes[i];
bindingCount = std::max(bindingCount, attr.stream + 1);
}
bindingDescriptions.resize(bindingCount);
for (uint32_t i = 0U; i < bindingCount; ++i) {
bindingDescriptions[i].binding = i;
bindingDescriptions[i].stride = 0;
bindingDescriptions[i].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
}
for (size_t i = 0U; i < attributeCount; ++i) {
const Attribute &attr = attributes[i];
bindingDescriptions[attr.stream].stride += GFX_FORMAT_INFOS[toNumber(attr.format)].size;
if (attr.isInstanced) {
bindingDescriptions[attr.stream].inputRate = VK_VERTEX_INPUT_RATE_INSTANCE;
}
}
const AttributeList &shaderAttrs = gpuPipelineState->gpuShader->attributes;
const size_t shaderAttrCount = shaderAttrs.size();
attributeDescriptions.resize(shaderAttrCount);
for (size_t i = 0; i < shaderAttrCount; ++i) {
bool attributeFound = false;
offsets.assign(bindingCount, 0);
for (const Attribute &attr : attributes) {
if (shaderAttrs[i].name == attr.name) {
attributeDescriptions[i].location = shaderAttrs[i].location;
attributeDescriptions[i].binding = attr.stream;
attributeDescriptions[i].format = mapVkFormat(attr.format, device->gpuDevice());
attributeDescriptions[i].offset = offsets[attr.stream];
attributeFound = true;
break;
}
offsets[attr.stream] += GFX_FORMAT_INFOS[toNumber(attr.format)].size;
}
if (!attributeFound) { // handle absent attribute
attributeDescriptions[i].location = shaderAttrs[i].location;
attributeDescriptions[i].binding = 0;
attributeDescriptions[i].format = mapVkFormat(shaderAttrs[i].format, device->gpuDevice());
attributeDescriptions[i].offset = 0; // reuse the first attribute as dummy data
}
}
VkPipelineVertexInputStateCreateInfo vertexInput{VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO};
vertexInput.vertexBindingDescriptionCount = bindingCount;
vertexInput.pVertexBindingDescriptions = bindingDescriptions.data();
vertexInput.vertexAttributeDescriptionCount = utils::toUint(shaderAttrCount);
vertexInput.pVertexAttributeDescriptions = attributeDescriptions.data();
createInfo.pVertexInputState = &vertexInput;
///////////////////// Input Asembly State /////////////////////
VkPipelineInputAssemblyStateCreateInfo inputAssembly{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO};
inputAssembly.topology = VK_PRIMITIVE_MODES[toNumber(gpuPipelineState->primitive)];
createInfo.pInputAssemblyState = &inputAssembly;
///////////////////// Dynamic State /////////////////////
dynamicStates.assign({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR});
insertVkDynamicStates(&dynamicStates, gpuPipelineState->dynamicStates);
VkPipelineDynamicStateCreateInfo dynamicState{VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO};
dynamicState.dynamicStateCount = utils::toUint(dynamicStates.size());
dynamicState.pDynamicStates = dynamicStates.data();
createInfo.pDynamicState = &dynamicState;
///////////////////// Viewport State /////////////////////
VkPipelineViewportStateCreateInfo viewportState{VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO};
viewportState.viewportCount = 1; // dynamic by default
viewportState.scissorCount = 1; // dynamic by default
createInfo.pViewportState = &viewportState;
///////////////////// Rasterization State /////////////////////
VkPipelineRasterizationStateCreateInfo rasterizationState{VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO};
// rasterizationState.depthClampEnable;
rasterizationState.rasterizerDiscardEnable = gpuPipelineState->rs.isDiscard;
rasterizationState.polygonMode = VK_POLYGON_MODES[toNumber(gpuPipelineState->rs.polygonMode)];
rasterizationState.cullMode = VK_CULL_MODES[toNumber(gpuPipelineState->rs.cullMode)];
rasterizationState.frontFace = gpuPipelineState->rs.isFrontFaceCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE;
rasterizationState.depthBiasEnable = gpuPipelineState->rs.depthBiasEnabled;
rasterizationState.depthBiasConstantFactor = gpuPipelineState->rs.depthBias;
rasterizationState.depthBiasClamp = gpuPipelineState->rs.depthBiasClamp;
rasterizationState.depthBiasSlopeFactor = gpuPipelineState->rs.depthBiasSlop;
rasterizationState.lineWidth = gpuPipelineState->rs.lineWidth;
createInfo.pRasterizationState = &rasterizationState;
///////////////////// Multisample State /////////////////////
VkPipelineMultisampleStateCreateInfo multisampleState{VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO};
multisampleState.rasterizationSamples = gpuPipelineState->gpuRenderPass->sampleCounts[gpuPipelineState->subpass];
multisampleState.alphaToCoverageEnable = gpuPipelineState->bs.isA2C;
// multisampleState.sampleShadingEnable;
// multisampleState.minSampleShading;
// multisampleState.pSampleMask;
// multisampleState.alphaToOneEnable;
createInfo.pMultisampleState = &multisampleState;
///////////////////// Depth Stencil State /////////////////////
VkPipelineDepthStencilStateCreateInfo depthStencilState = {VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO};
depthStencilState.depthTestEnable = gpuPipelineState->dss.depthTest;
depthStencilState.depthWriteEnable = gpuPipelineState->dss.depthWrite;
depthStencilState.depthCompareOp = VK_CMP_FUNCS[toNumber(gpuPipelineState->dss.depthFunc)];
depthStencilState.stencilTestEnable = gpuPipelineState->dss.stencilTestFront;
depthStencilState.front = {
VK_STENCIL_OPS[toNumber(gpuPipelineState->dss.stencilFailOpFront)],
VK_STENCIL_OPS[toNumber(gpuPipelineState->dss.stencilPassOpFront)],
VK_STENCIL_OPS[toNumber(gpuPipelineState->dss.stencilZFailOpFront)],
VK_CMP_FUNCS[toNumber(gpuPipelineState->dss.stencilFuncFront)],
gpuPipelineState->dss.stencilReadMaskFront,
gpuPipelineState->dss.stencilWriteMaskFront,
gpuPipelineState->dss.stencilRefFront,
};
depthStencilState.back = {
VK_STENCIL_OPS[toNumber(gpuPipelineState->dss.stencilFailOpBack)],
VK_STENCIL_OPS[toNumber(gpuPipelineState->dss.stencilPassOpBack)],
VK_STENCIL_OPS[toNumber(gpuPipelineState->dss.stencilZFailOpBack)],
VK_CMP_FUNCS[toNumber(gpuPipelineState->dss.stencilFuncBack)],
gpuPipelineState->dss.stencilReadMaskBack,
gpuPipelineState->dss.stencilWriteMaskBack,
gpuPipelineState->dss.stencilRefBack,
};
// depthStencilState.depthBoundsTestEnable;
// depthStencilState.minDepthBounds;
// depthStencilState.maxDepthBounds;
createInfo.pDepthStencilState = &depthStencilState;
///////////////////// Blend State /////////////////////
size_t blendTargetCount = gpuPipelineState->gpuRenderPass->subpasses[gpuPipelineState->subpass].colors.size();
blendTargets.resize(blendTargetCount, {});
for (size_t i = 0U; i < blendTargetCount; ++i) {
BlendTarget &target = i >= gpuPipelineState->bs.targets.size()
? gpuPipelineState->bs.targets[0]
: gpuPipelineState->bs.targets[i];
blendTargets[i].blendEnable = target.blend;
blendTargets[i].srcColorBlendFactor = VK_BLEND_FACTORS[toNumber(target.blendSrc)];
blendTargets[i].dstColorBlendFactor = VK_BLEND_FACTORS[toNumber(target.blendDst)];
blendTargets[i].colorBlendOp = VK_BLEND_OPS[toNumber(target.blendEq)];
blendTargets[i].srcAlphaBlendFactor = VK_BLEND_FACTORS[toNumber(target.blendSrcAlpha)];
blendTargets[i].dstAlphaBlendFactor = VK_BLEND_FACTORS[toNumber(target.blendDstAlpha)];
blendTargets[i].alphaBlendOp = VK_BLEND_OPS[toNumber(target.blendAlphaEq)];
blendTargets[i].colorWriteMask = mapVkColorComponentFlags(target.blendColorMask);
}
Color &blendColor = gpuPipelineState->bs.blendColor;
VkPipelineColorBlendStateCreateInfo colorBlendState{VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO};
// colorBlendState.logicOpEnable;
// colorBlendState.logicOp;
colorBlendState.attachmentCount = utils::toUint(blendTargetCount);
colorBlendState.pAttachments = blendTargets.data();
colorBlendState.blendConstants[0] = blendColor.x;
colorBlendState.blendConstants[1] = blendColor.y;
colorBlendState.blendConstants[2] = blendColor.z;
colorBlendState.blendConstants[3] = blendColor.w;
createInfo.pColorBlendState = &colorBlendState;
///////////////////// ShadingRate /////////////////////
VkPipelineFragmentShadingRateStateCreateInfoKHR shadingRateInfo = {VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR};
if (device->getCapabilities().supportVariableRateShading &&
gpuPipelineState->gpuRenderPass->hasShadingAttachment(gpuPipelineState->subpass)) {
shadingRateInfo.fragmentSize = {1, 1}; // perDraw && perVertex shading rate not support.
shadingRateInfo.combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
shadingRateInfo.combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR;
createInfo.pNext = &shadingRateInfo;
}
///////////////////// References /////////////////////
createInfo.layout = gpuPipelineState->gpuPipelineLayout->vkPipelineLayout;
createInfo.renderPass = gpuPipelineState->gpuRenderPass->vkRenderPass;
createInfo.subpass = gpuPipelineState->subpass;
///////////////////// Creation /////////////////////
auto *pipelineCache = device->pipelineCache();
CC_ASSERT(pipelineCache != nullptr);
pipelineCache->setDirty();
VK_CHECK(vkCreateGraphicsPipelines(device->gpuDevice()->vkDevice, pipelineCache->getHandle(),
1, &createInfo, nullptr, &gpuPipelineState->vkPipeline));
}
void cmdFuncCCVKCreateGeneralBarrier(CCVKDevice * /*device*/, CCVKGPUGeneralBarrier *gpuGeneralBarrier) {
gpuGeneralBarrier->barrier.prevAccessCount = utils::toUint(gpuGeneralBarrier->prevAccesses.size());
gpuGeneralBarrier->barrier.pPrevAccesses = gpuGeneralBarrier->prevAccesses.data();
gpuGeneralBarrier->barrier.nextAccessCount = utils::toUint(gpuGeneralBarrier->nextAccesses.size());
gpuGeneralBarrier->barrier.pNextAccesses = gpuGeneralBarrier->nextAccesses.data();
thsvsGetVulkanMemoryBarrier(gpuGeneralBarrier->barrier, &gpuGeneralBarrier->srcStageMask, &gpuGeneralBarrier->dstStageMask, &gpuGeneralBarrier->vkBarrier);
}
namespace {
void bufferUpload(const CCVKGPUBufferView &stagingBuffer, CCVKGPUBuffer &gpuBuffer, VkBufferCopy region, const CCVKGPUCommandBuffer *gpuCommandBuffer) {
#if BARRIER_DEDUCTION_LEVEL >= BARRIER_DEDUCTION_LEVEL_BASIC
if (gpuBuffer.transferAccess) {
// guard against WAW hazard
VkMemoryBarrier vkBarrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
vkBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
vkBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
vkCmdPipelineBarrier(gpuCommandBuffer->vkCommandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 1, &vkBarrier, 0, nullptr, 0, nullptr);
}
#endif
vkCmdCopyBuffer(gpuCommandBuffer->vkCommandBuffer, stagingBuffer.gpuBuffer->vkBuffer, gpuBuffer.vkBuffer, 1, &region);
};
} // namespace
void cmdFuncCCVKUpdateBuffer(CCVKDevice *device, CCVKGPUBuffer *gpuBuffer, const void *buffer, uint32_t size, const CCVKGPUCommandBuffer *cmdBuffer) {
if (!gpuBuffer) return;
const void *dataToUpload = nullptr;
size_t sizeToUpload = 0U;
if (hasFlag(gpuBuffer->usage, BufferUsageBit::INDIRECT)) {
size_t drawInfoCount = size / sizeof(DrawInfo);
const auto *drawInfo = static_cast<const DrawInfo *>(buffer);
if (drawInfoCount > 0) {
if (drawInfo->indexCount) {
for (size_t i = 0; i < drawInfoCount; ++i) {
gpuBuffer->indexedIndirectCmds[i].indexCount = drawInfo->indexCount;
gpuBuffer->indexedIndirectCmds[i].instanceCount = std::max(drawInfo->instanceCount, 1U);
gpuBuffer->indexedIndirectCmds[i].firstIndex = drawInfo->firstIndex;
gpuBuffer->indexedIndirectCmds[i].vertexOffset = drawInfo->vertexOffset;
gpuBuffer->indexedIndirectCmds[i].firstInstance = drawInfo->firstInstance;
drawInfo++;
}
dataToUpload = gpuBuffer->indexedIndirectCmds.data();
sizeToUpload = drawInfoCount * sizeof(VkDrawIndexedIndirectCommand);
gpuBuffer->isDrawIndirectByIndex = true;
} else {
for (size_t i = 0; i < drawInfoCount; ++i) {
gpuBuffer->indirectCmds[i].vertexCount = drawInfo->vertexCount;
gpuBuffer->indirectCmds[i].instanceCount = std::max(drawInfo->instanceCount, 1U);
gpuBuffer->indirectCmds[i].firstVertex = drawInfo->firstVertex;
gpuBuffer->indirectCmds[i].firstInstance = drawInfo->firstInstance;
drawInfo++;
}
dataToUpload = gpuBuffer->indirectCmds.data();
sizeToUpload = drawInfoCount * sizeof(VkDrawIndirectCommand);
gpuBuffer->isDrawIndirectByIndex = false;
}
}
} else {
dataToUpload = buffer;
sizeToUpload = size;
}
// back buffer instances update command
uint32_t backBufferIndex = device->gpuDevice()->curBackBufferIndex;
if (gpuBuffer->instanceSize) {
device->gpuBufferHub()->record(gpuBuffer, backBufferIndex, sizeToUpload, !cmdBuffer);
if (!cmdBuffer) {
uint8_t *dst = gpuBuffer->mappedData + backBufferIndex * gpuBuffer->instanceSize;
memcpy(dst, dataToUpload, sizeToUpload);
return;
}
}
// upload buffer by chunks
uint32_t chunkSize = std::min(static_cast<VkDeviceSize>(sizeToUpload), CCVKGPUStagingBufferPool::CHUNK_SIZE);
uint32_t chunkOffset = 0U;
while (sizeToUpload) {
uint32_t chunkSizeToUpload = std::min(chunkSize, static_cast<uint32_t>(sizeToUpload));
sizeToUpload -= chunkSizeToUpload;
IntrusivePtr<CCVKGPUBufferView> stagingBuffer = device->gpuStagingBufferPool()->alloc(chunkSizeToUpload);
memcpy(stagingBuffer->mappedData(), static_cast<const char *>(dataToUpload) + chunkOffset, chunkSizeToUpload);
VkBufferCopy region{
stagingBuffer->offset,
gpuBuffer->getStartOffset(backBufferIndex) + chunkOffset,
chunkSizeToUpload,
};
chunkOffset += chunkSizeToUpload;
if (cmdBuffer) {
bufferUpload(*stagingBuffer, *gpuBuffer, region, cmdBuffer);
} else {
device->gpuTransportHub()->checkIn(
// capture by ref is safe here since the transport function will be executed immediately in the same thread
[&stagingBuffer, &gpuBuffer, region](CCVKGPUCommandBuffer *gpuCommandBuffer) {
bufferUpload(*stagingBuffer, *gpuBuffer, region, gpuCommandBuffer);
});
}
}
gpuBuffer->transferAccess = THSVS_ACCESS_TRANSFER_WRITE;
device->gpuBarrierManager()->checkIn(gpuBuffer);
}
void cmdFuncCCVKCopyBuffersToTexture(CCVKDevice *device, const uint8_t *const *buffers, CCVKGPUTexture *gpuTexture,
const BufferTextureCopy *regions, uint32_t count, const CCVKGPUCommandBuffer *gpuCommandBuffer) {
ccstd::vector<ThsvsAccessType> &curTypes = gpuTexture->currentAccessTypes;
ThsvsImageBarrier barrier{};
barrier.image = gpuTexture->vkImage;
barrier.discardContents = false;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
barrier.subresourceRange.aspectMask = gpuTexture->aspectMask;
barrier.prevAccessCount = utils::toUint(curTypes.size());
barrier.pPrevAccesses = curTypes.data();
barrier.nextAccessCount = 1;
barrier.pNextAccesses = getAccessType(AccessFlagBit::TRANSFER_WRITE);
if (gpuTexture->transferAccess != THSVS_ACCESS_TRANSFER_WRITE) {
cmdFuncCCVKImageMemoryBarrier(gpuCommandBuffer, barrier);
} else {
// guard against WAW hazard
VkMemoryBarrier vkBarrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
vkBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
vkBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
vkCmdPipelineBarrier(gpuCommandBuffer->vkCommandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 1, &vkBarrier, 0, nullptr, 0, nullptr);
}
uint32_t optimalOffsetAlignment = device->gpuContext()->physicalDeviceProperties.limits.optimalBufferCopyOffsetAlignment;
uint32_t optimalRowPitchAlignment = device->gpuContext()->physicalDeviceProperties.limits.optimalBufferCopyRowPitchAlignment;
uint32_t offsetAlignment = lcm(GFX_FORMAT_INFOS[toNumber(gpuTexture->format)].size, optimalRowPitchAlignment);
auto blockSize = formatAlignment(gpuTexture->format);
uint32_t idx = 0;
for (size_t i = 0U; i < count; ++i) {
const BufferTextureCopy &region{regions[i]};
Offset offset{
region.texOffset.x == 0 ? 0 : utils::alignTo(region.texOffset.x, static_cast<int32_t>(blockSize.first)),
region.texOffset.y == 0 ? 0 : utils::alignTo(region.texOffset.y, static_cast<int32_t>(blockSize.second)),
region.texOffset.z,
};
Extent extent{
utils::alignTo(region.texExtent.width, static_cast<uint32_t>(blockSize.first)),
utils::alignTo(region.texExtent.height, static_cast<uint32_t>(blockSize.second)),
region.texExtent.depth,
};
Extent stride{
region.buffStride > 0 ? region.buffStride : extent.width,
region.buffTexHeight > 0 ? region.buffTexHeight : extent.height,
0, // useless
};
uint32_t layerCount = region.texSubres.layerCount;
uint32_t baseLayer = region.texSubres.baseArrayLayer;
uint32_t mipLevel = region.texSubres.mipLevel;
uint32_t rowPitchSize = formatSize(gpuTexture->format, extent.width, 1, 1);
rowPitchSize = utils::alignTo(rowPitchSize, optimalRowPitchAlignment);
// what if the optimal alignment is smaller than a block size
uint32_t rowPitch = rowPitchSize / formatSize(gpuTexture->format, 1, 1, 1) * blockSize.first;
uint32_t destRowSize = formatSize(gpuTexture->format, extent.width, 1, 1);
uint32_t destSliceSize = formatSize(gpuTexture->format, extent.width, extent.height, 1);
uint32_t buffStrideSize = formatSize(gpuTexture->format, stride.width, 1, 1);
uint32_t buffSliceSize = formatSize(gpuTexture->format, stride.width, stride.height, 1);
// calculate the max height to upload per staging buffer chunk
uint32_t chunkHeight = extent.height;
size_t chunkSize = rowPitchSize * (extent.height / blockSize.second);
while (chunkSize > CCVKGPUStagingBufferPool::CHUNK_SIZE) {
chunkHeight = utils::alignTo((chunkHeight - 1) / 2 + 1, blockSize.second);
chunkSize = rowPitchSize * (chunkHeight / blockSize.second);
}
uint32_t destOffset = 0;
uint32_t buffOffset = 0;
uint32_t destWidth = (region.texExtent.width + offset.x == (gpuTexture->width >> mipLevel)) ? region.texExtent.width : extent.width;
uint32_t destHeight = (region.texExtent.height + offset.y == (gpuTexture->height >> mipLevel)) ? region.texExtent.height : extent.height;
int32_t heightOffset = 0;
uint32_t stepHeight = 0;
for (uint32_t l = 0; l < layerCount; l++) {
for (uint32_t depth = 0; depth < extent.depth; ++depth) {
buffOffset = region.buffOffset + depth * buffSliceSize;
// upload in chunks
for (uint32_t h = 0U; h < extent.height; h += chunkHeight) {
destOffset = 0;
heightOffset = static_cast<int32_t>(h);
stepHeight = std::min(chunkHeight, extent.height - h);
uint32_t stagingBufferSize = rowPitchSize * (stepHeight / blockSize.second);
IntrusivePtr<CCVKGPUBufferView> stagingBuffer = device->gpuStagingBufferPool()->alloc(stagingBufferSize, offsetAlignment);
for (uint32_t j = 0; j < stepHeight; j += blockSize.second) {
memcpy(stagingBuffer->mappedData() + destOffset, buffers[idx] + buffOffset, destRowSize);
destOffset += rowPitchSize;
buffOffset += buffStrideSize;
}
VkBufferImageCopy stagingRegion;
stagingRegion.bufferOffset = stagingBuffer->offset;
stagingRegion.bufferRowLength = rowPitch;
stagingRegion.bufferImageHeight = stepHeight;
stagingRegion.imageSubresource = {gpuTexture->aspectMask, mipLevel, l + baseLayer, 1};
stagingRegion.imageOffset = {offset.x, offset.y + heightOffset, offset.z + static_cast<int>(depth)};
stagingRegion.imageExtent = {destWidth, std::min(stepHeight, destHeight - heightOffset), 1};
vkCmdCopyBufferToImage(gpuCommandBuffer->vkCommandBuffer, stagingBuffer->gpuBuffer->vkBuffer, gpuTexture->vkImage,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &stagingRegion);
}
}
idx++;
}
}
if (hasFlag(gpuTexture->flags, TextureFlags::GEN_MIPMAP)) {
VkFormatProperties formatProperties;
vkGetPhysicalDeviceFormatProperties(device->gpuContext()->physicalDevice, mapVkFormat(gpuTexture->format, device->gpuDevice()), &formatProperties);
VkFormatFeatureFlags mipmapFeatures = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
if (formatProperties.optimalTilingFeatures & mipmapFeatures) {
int width = static_cast<int>(gpuTexture->width);
int height = static_cast<int>(gpuTexture->height);
VkImageBlit blitInfo{};
blitInfo.srcSubresource.aspectMask = gpuTexture->aspectMask;
blitInfo.srcSubresource.layerCount = gpuTexture->arrayLayers;
blitInfo.dstSubresource.aspectMask = gpuTexture->aspectMask;
blitInfo.dstSubresource.layerCount = gpuTexture->arrayLayers;
blitInfo.srcOffsets[1] = {width, height, 1};
blitInfo.dstOffsets[1] = {std::max(width >> 1, 1), std::max(height >> 1, 1), 1};
barrier.subresourceRange.levelCount = 1;
barrier.prevAccessCount = 1;
barrier.pPrevAccesses = getAccessType(AccessFlagBit::TRANSFER_WRITE);
barrier.pNextAccesses = getAccessType(AccessFlagBit::TRANSFER_READ);
for (uint32_t i = 1U; i < gpuTexture->mipLevels; ++i) {
barrier.subresourceRange.baseMipLevel = i - 1;
cmdFuncCCVKImageMemoryBarrier(gpuCommandBuffer, barrier);
blitInfo.srcSubresource.mipLevel = i - 1;
blitInfo.dstSubresource.mipLevel = i;
vkCmdBlitImage(gpuCommandBuffer->vkCommandBuffer, gpuTexture->vkImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
gpuTexture->vkImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blitInfo, VK_FILTER_LINEAR);
const int32_t w = blitInfo.srcOffsets[1].x = blitInfo.dstOffsets[1].x;
const int32_t h = blitInfo.srcOffsets[1].y = blitInfo.dstOffsets[1].y;
blitInfo.dstOffsets[1].x = std::max(w >> 1, 1);
blitInfo.dstOffsets[1].y = std::max(h >> 1, 1);
}
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = gpuTexture->mipLevels - 1;
barrier.pPrevAccesses = getAccessType(AccessFlagBit::TRANSFER_READ);
barrier.pNextAccesses = getAccessType(AccessFlagBit::TRANSFER_WRITE);
cmdFuncCCVKImageMemoryBarrier(gpuCommandBuffer, barrier);
} else {
const char *formatName = GFX_FORMAT_INFOS[toNumber(gpuTexture->format)].name.c_str();
CC_LOG_WARNING("cmdFuncCCVKCopyBuffersToTexture: generate mipmap for %s is not supported on this platform", formatName);
}
}
curTypes.assign({THSVS_ACCESS_TRANSFER_WRITE});
gpuTexture->transferAccess = THSVS_ACCESS_TRANSFER_WRITE;
device->gpuBarrierManager()->checkIn(gpuTexture);
}
void cmdFuncCCVKCopyTextureToBuffers(CCVKDevice *device, CCVKGPUTexture *srcTexture, CCVKGPUBufferView *destBuffer,
const BufferTextureCopy *regions, uint32_t count, const CCVKGPUCommandBuffer *gpuCommandBuffer) {
ccstd::vector<ThsvsAccessType> &curTypes = srcTexture->currentAccessTypes;
ThsvsImageBarrier barrier{};
barrier.image = srcTexture->vkImage;
barrier.discardContents = false;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
barrier.subresourceRange.aspectMask = srcTexture->aspectMask;
barrier.prevAccessCount = utils::toUint(curTypes.size());
barrier.pPrevAccesses = curTypes.data();
barrier.nextAccessCount = 1;
barrier.pNextAccesses = getAccessType(AccessFlagBit::TRANSFER_READ);
if (srcTexture->transferAccess != THSVS_ACCESS_TRANSFER_READ) {
cmdFuncCCVKImageMemoryBarrier(gpuCommandBuffer, barrier);
}
ccstd::vector<VkBufferImageCopy> stagingRegions(count);
VkDeviceSize offset = 0;
for (size_t i = 0U; i < count; ++i) {
const BufferTextureCopy &region = regions[i];
VkBufferImageCopy &stagingRegion = stagingRegions[i];
stagingRegion.bufferOffset = destBuffer->offset + offset;
stagingRegion.bufferRowLength = region.buffStride;
stagingRegion.bufferImageHeight = region.buffTexHeight;
stagingRegion.imageSubresource = {srcTexture->aspectMask, region.texSubres.mipLevel, region.texSubres.baseArrayLayer, region.texSubres.layerCount};
stagingRegion.imageOffset = {region.texOffset.x, region.texOffset.y, region.texOffset.z};
stagingRegion.imageExtent = {region.texExtent.width, region.texExtent.height, region.texExtent.depth};
uint32_t w = region.buffStride > 0 ? region.buffStride : region.texExtent.width;
uint32_t h = region.buffTexHeight > 0 ? region.buffTexHeight : region.texExtent.height;
uint32_t regionSize = formatSize(srcTexture->format, w, h, region.texExtent.depth);
offset += regionSize;
}
vkCmdCopyImageToBuffer(gpuCommandBuffer->vkCommandBuffer, srcTexture->vkImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
destBuffer->gpuBuffer->vkBuffer, utils::toUint(stagingRegions.size()), stagingRegions.data());
curTypes.assign({THSVS_ACCESS_TRANSFER_READ});
srcTexture->transferAccess = THSVS_ACCESS_TRANSFER_READ;
device->gpuBarrierManager()->checkIn(srcTexture);
}
void cmdFuncCCVKDestroyQueryPool(CCVKGPUDevice *gpuDevice, CCVKGPUQueryPool *gpuQueryPool) {
if (gpuQueryPool->vkPool != VK_NULL_HANDLE) {
vkDestroyQueryPool(gpuDevice->vkDevice, gpuQueryPool->vkPool, nullptr);
gpuQueryPool->vkPool = VK_NULL_HANDLE;
}
}
void cmdFuncCCVKDestroyRenderPass(CCVKGPUDevice *gpuDevice, CCVKGPURenderPass *gpuRenderPass) {
if (gpuRenderPass->vkRenderPass != VK_NULL_HANDLE) {
vkDestroyRenderPass(gpuDevice->vkDevice, gpuRenderPass->vkRenderPass, nullptr);
gpuRenderPass->vkRenderPass = VK_NULL_HANDLE;
}
}
void cmdFuncCCVKDestroySampler(CCVKGPUDevice *gpuDevice, CCVKGPUSampler *gpuSampler) {
if (gpuSampler->vkSampler != VK_NULL_HANDLE) {
vkDestroySampler(gpuDevice->vkDevice, gpuSampler->vkSampler, nullptr);
gpuSampler->vkSampler = VK_NULL_HANDLE;
}
}
void cmdFuncCCVKDestroyShader(CCVKGPUDevice *gpuDevice, CCVKGPUShader *gpuShader) {
for (CCVKGPUShaderStage &stage : gpuShader->gpuStages) {
vkDestroyShaderModule(gpuDevice->vkDevice, stage.vkShader, nullptr);
stage.vkShader = VK_NULL_HANDLE;
}
}
void cmdFuncCCVKDestroyDescriptorSetLayout(CCVKGPUDevice *gpuDevice, CCVKGPUDescriptorSetLayout *gpuDescriptorSetLayout) {
if (gpuDescriptorSetLayout->vkDescriptorUpdateTemplate != VK_NULL_HANDLE) {
if (gpuDevice->minorVersion > 0) {
vkDestroyDescriptorUpdateTemplate(gpuDevice->vkDevice, gpuDescriptorSetLayout->vkDescriptorUpdateTemplate, nullptr);
} else {
vkDestroyDescriptorUpdateTemplateKHR(gpuDevice->vkDevice, gpuDescriptorSetLayout->vkDescriptorUpdateTemplate, nullptr);
}
gpuDescriptorSetLayout->vkDescriptorUpdateTemplate = VK_NULL_HANDLE;
}
if (gpuDescriptorSetLayout->vkDescriptorSetLayout != VK_NULL_HANDLE) {
vkDestroyDescriptorSetLayout(gpuDevice->vkDevice, gpuDescriptorSetLayout->vkDescriptorSetLayout, nullptr);
gpuDescriptorSetLayout->vkDescriptorSetLayout = VK_NULL_HANDLE;
}
}
void cmdFuncCCVKDestroyPipelineLayout(CCVKGPUDevice *gpuDevice, CCVKGPUPipelineLayout *gpuPipelineLayout) {
if (gpuPipelineLayout->vkPipelineLayout != VK_NULL_HANDLE) {
vkDestroyPipelineLayout(gpuDevice->vkDevice, gpuPipelineLayout->vkPipelineLayout, nullptr);
gpuPipelineLayout->vkPipelineLayout = VK_NULL_HANDLE;
}
}
void cmdFuncCCVKDestroyPipelineState(CCVKGPUDevice *gpuDevice, CCVKGPUPipelineState *gpuPipelineState) {
if (gpuPipelineState->vkPipeline != VK_NULL_HANDLE) {
vkDestroyPipeline(gpuDevice->vkDevice, gpuPipelineState->vkPipeline, nullptr);
gpuPipelineState->vkPipeline = VK_NULL_HANDLE;
}
}
void cmdFuncCCVKImageMemoryBarrier(const CCVKGPUCommandBuffer *gpuCommandBuffer, const ThsvsImageBarrier &imageBarrier) {
VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
VkPipelineStageFlags tempSrcStageMask = 0;
VkPipelineStageFlags tempDstStageMask = 0;
VkImageMemoryBarrier vkBarrier;
thsvsGetVulkanImageMemoryBarrier(imageBarrier, &tempSrcStageMask, &tempDstStageMask, &vkBarrier);
srcStageMask |= tempSrcStageMask;
dstStageMask |= tempDstStageMask;
vkCmdPipelineBarrier(gpuCommandBuffer->vkCommandBuffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, &vkBarrier);
}
const CCVKGPUGeneralBarrier *CCVKGPURenderPass::getBarrier(size_t index, CCVKGPUDevice *gpuDevice) const {
if (index < colorAttachments.size()) {
return colorAttachments[index].barrier ? static_cast<CCVKGeneralBarrier *>(colorAttachments[index].barrier)->gpuBarrier() : &gpuDevice->defaultColorBarrier;
}
return depthStencilAttachment.barrier ? static_cast<CCVKGeneralBarrier *>(depthStencilAttachment.barrier)->gpuBarrier() : &gpuDevice->defaultDepthStencilBarrier;
}
bool CCVKGPURenderPass::hasShadingAttachment(uint32_t subPassId) const {
CC_ASSERT(subPassId < subpasses.size());
return subpasses[subPassId].shadingRate != INVALID_BINDING;
}
void CCVKGPUBarrierManager::update(CCVKGPUTransportHub *transportHub) {
if (_buffersToBeChecked.empty() && _texturesToBeChecked.empty()) return;
static ccstd::vector<ThsvsAccessType> prevAccesses;
static ccstd::vector<ThsvsAccessType> nextAccesses;
static ccstd::vector<VkImageMemoryBarrier> vkImageBarriers;
VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
vkImageBarriers.clear();
prevAccesses.clear();
nextAccesses.clear();
for (CCVKGPUBuffer *gpuBuffer : _buffersToBeChecked) {
ccstd::vector<ThsvsAccessType> &render = gpuBuffer->renderAccessTypes;
if (gpuBuffer->transferAccess == THSVS_ACCESS_NONE) continue;
if (std::find(prevAccesses.begin(), prevAccesses.end(), gpuBuffer->transferAccess) == prevAccesses.end()) {
prevAccesses.push_back(gpuBuffer->transferAccess);
}
nextAccesses.insert(nextAccesses.end(), render.begin(), render.end());
gpuBuffer->transferAccess = THSVS_ACCESS_NONE;
}
VkMemoryBarrier vkBarrier;
VkMemoryBarrier *pVkBarrier = nullptr;
if (!prevAccesses.empty()) {
ThsvsGlobalBarrier globalBarrier{};
globalBarrier.prevAccessCount = utils::toUint(prevAccesses.size());
globalBarrier.pPrevAccesses = prevAccesses.data();
globalBarrier.nextAccessCount = utils::toUint(nextAccesses.size());
globalBarrier.pNextAccesses = nextAccesses.data();
VkPipelineStageFlags tempSrcStageMask = 0;
VkPipelineStageFlags tempDstStageMask = 0;
thsvsGetVulkanMemoryBarrier(globalBarrier, &tempSrcStageMask, &tempDstStageMask, &vkBarrier);
srcStageMask |= tempSrcStageMask;
dstStageMask |= tempDstStageMask;
pVkBarrier = &vkBarrier;
}
ThsvsImageBarrier imageBarrier{};
imageBarrier.discardContents = false;
imageBarrier.prevLayout = THSVS_IMAGE_LAYOUT_OPTIMAL;
imageBarrier.nextLayout = THSVS_IMAGE_LAYOUT_OPTIMAL;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
imageBarrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
imageBarrier.prevAccessCount = 1;
for (CCVKGPUTexture *gpuTexture : _texturesToBeChecked) {
ccstd::vector<ThsvsAccessType> &render = gpuTexture->renderAccessTypes;
if (gpuTexture->transferAccess == THSVS_ACCESS_NONE || render.empty()) continue;
ccstd::vector<ThsvsAccessType> &current = gpuTexture->currentAccessTypes;
imageBarrier.pPrevAccesses = &gpuTexture->transferAccess;
imageBarrier.nextAccessCount = utils::toUint(render.size());
imageBarrier.pNextAccesses = render.data();
imageBarrier.image = gpuTexture->vkImage;
imageBarrier.subresourceRange.aspectMask = gpuTexture->aspectMask;
VkPipelineStageFlags tempSrcStageMask = 0;
VkPipelineStageFlags tempDstStageMask = 0;
vkImageBarriers.emplace_back();
thsvsGetVulkanImageMemoryBarrier(imageBarrier, &tempSrcStageMask, &tempDstStageMask, &(vkImageBarriers.back()));
srcStageMask |= tempSrcStageMask;
dstStageMask |= tempDstStageMask;
// don't override any other access changes since this barrier always happens first
if (current.size() == 1 && current[0] == gpuTexture->transferAccess) {
current = render;
}
gpuTexture->transferAccess = THSVS_ACCESS_NONE;
}
if (pVkBarrier || !vkImageBarriers.empty()) {
transportHub->checkIn([&](CCVKGPUCommandBuffer *gpuCommandBuffer) {
vkCmdPipelineBarrier(gpuCommandBuffer->vkCommandBuffer, srcStageMask, dstStageMask, 0,
pVkBarrier ? 1 : 0, pVkBarrier, 0, nullptr, utils::toUint(vkImageBarriers.size()), vkImageBarriers.data());
});
}
_buffersToBeChecked.clear();
_texturesToBeChecked.clear();
}
void CCVKGPUBufferHub::flush(CCVKGPUTransportHub *transportHub) {
auto &buffers = _buffersToBeUpdated[_device->curBackBufferIndex];
if (buffers.empty()) return;
bool needTransferCmds = false;
for (auto &buffer : buffers) {
if (buffer.second.canMemcpy) {
uint8_t *src = buffer.first->mappedData + buffer.second.srcIndex * buffer.first->instanceSize;
uint8_t *dst = buffer.first->mappedData + _device->curBackBufferIndex * buffer.first->instanceSize;
memcpy(dst, src, buffer.second.size);
} else {
needTransferCmds = true;
}
}
if (needTransferCmds) {
transportHub->checkIn([&](const CCVKGPUCommandBuffer *gpuCommandBuffer) {
VkBufferCopy region;
for (auto &buffer : buffers) {
if (buffer.second.canMemcpy) continue;
region.srcOffset = buffer.first->getStartOffset(buffer.second.srcIndex);
region.dstOffset = buffer.first->getStartOffset(_device->curBackBufferIndex);
region.size = buffer.second.size;
vkCmdCopyBuffer(gpuCommandBuffer->vkCommandBuffer, buffer.first->vkBuffer, buffer.first->vkBuffer, 1, &region);
}
});
}
buffers.clear();
}
} // namespace gfx
} // namespace cc