You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1120 lines
49 KiB
1120 lines
49 KiB
/****************************************************************************
|
|
Copyright (c) 2020-2023 Xiamen Yaji Software Co., Ltd.
|
|
|
|
http://www.cocos.com
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights to
|
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
of the Software, and to permit persons to whom the Software is furnished to do so,
|
|
subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
****************************************************************************/
|
|
|
|
#include "VKDevice.h"
|
|
#include "VKBuffer.h"
|
|
#include "VKCommandBuffer.h"
|
|
#include "VKCommands.h"
|
|
#include "VKDescriptorSet.h"
|
|
#include "VKDescriptorSetLayout.h"
|
|
#include "VKFramebuffer.h"
|
|
#include "VKGPUObjects.h"
|
|
#include "VKInputAssembler.h"
|
|
#include "VKPipelineCache.h"
|
|
#include "VKPipelineLayout.h"
|
|
#include "VKPipelineState.h"
|
|
#include "VKQueryPool.h"
|
|
#include "VKQueue.h"
|
|
#include "VKRenderPass.h"
|
|
#include "VKShader.h"
|
|
#include "VKSwapchain.h"
|
|
#include "VKTexture.h"
|
|
#include "VKUtils.h"
|
|
#include "base/Utils.h"
|
|
#include "gfx-base/GFXDef-common.h"
|
|
#include "states/VKBufferBarrier.h"
|
|
#include "states/VKGeneralBarrier.h"
|
|
#include "states/VKSampler.h"
|
|
#include "states/VKTextureBarrier.h"
|
|
|
|
#include "application/ApplicationManager.h"
|
|
#include "gfx-base/SPIRVUtils.h"
|
|
#include "platform/interfaces/modules/IXRInterface.h"
|
|
#include "profiler/Profiler.h"
|
|
|
|
#if CC_SWAPPY_ENABLED
|
|
#include "swappy/swappyVk.h"
|
|
#endif
|
|
|
|
CC_DISABLE_WARNINGS()
|
|
#define VMA_IMPLEMENTATION
|
|
#include "vk_mem_alloc.h"
|
|
#define THSVS_ERROR_CHECK_MIXED_IMAGE_LAYOUT
|
|
// remote potential hazard because of programmable blend
|
|
//#define THSVS_ERROR_CHECK_POTENTIAL_HAZARD
|
|
#define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION
|
|
#include "thsvs_simpler_vulkan_synchronization.h"
|
|
CC_ENABLE_WARNINGS()
|
|
|
|
namespace cc {
|
|
namespace gfx {
|
|
|
|
static VkResult VKAPI_PTR vkCreateRenderPass2KHRFallback(
|
|
VkDevice device,
|
|
const VkRenderPassCreateInfo2 *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkRenderPass *pRenderPass);
|
|
|
|
CCVKDevice *CCVKDevice::instance = nullptr;
|
|
|
|
CCVKDevice *CCVKDevice::getInstance() {
|
|
return CCVKDevice::instance;
|
|
}
|
|
|
|
CCVKDevice::CCVKDevice() {
|
|
_api = API::VULKAN;
|
|
_deviceName = "Vulkan";
|
|
|
|
_caps.supportQuery = true;
|
|
_caps.clipSpaceMinZ = 0.0F;
|
|
_caps.screenSpaceSignY = -1.0F;
|
|
_caps.clipSpaceSignY = -1.0F;
|
|
CCVKDevice::instance = this;
|
|
}
|
|
|
|
CCVKDevice::~CCVKDevice() {
|
|
CCVKDevice::instance = nullptr;
|
|
}
|
|
|
|
bool CCVKDevice::doInit(const DeviceInfo & /*info*/) {
|
|
_xr = CC_GET_XR_INTERFACE();
|
|
if (_xr) {
|
|
_xr->preGFXDeviceInitialize(_api);
|
|
}
|
|
_gpuContext = std::make_unique<CCVKGPUContext>();
|
|
if (!_gpuContext->initialize()) {
|
|
return false;
|
|
}
|
|
|
|
const VkPhysicalDeviceFeatures2 &deviceFeatures2 = _gpuContext->physicalDeviceFeatures2;
|
|
const VkPhysicalDeviceFeatures &deviceFeatures = deviceFeatures2.features;
|
|
// const VkPhysicalDeviceVulkan11Features &deviceVulkan11Features = _gpuContext->physicalDeviceVulkan11Features;
|
|
// const VkPhysicalDeviceVulkan12Features &deviceVulkan12Features = _gpuContext->physicalDeviceVulkan12Features;
|
|
|
|
///////////////////// Device Creation /////////////////////
|
|
|
|
_gpuDevice = std::make_unique<CCVKGPUDevice>();
|
|
_gpuDevice->minorVersion = _gpuContext->minorVersion;
|
|
|
|
// only enable the absolute essentials
|
|
ccstd::vector<const char *> requestedLayers{};
|
|
ccstd::vector<const char *> requestedExtensions{
|
|
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
|
};
|
|
requestedExtensions.push_back(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME);
|
|
#if CC_DEBUG
|
|
requestedExtensions.push_back(VK_EXT_DEBUG_MARKER_EXTENSION_NAME);
|
|
#endif
|
|
if (_gpuDevice->minorVersion < 2) {
|
|
requestedExtensions.push_back(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME);
|
|
}
|
|
if (_gpuDevice->minorVersion < 1) {
|
|
requestedExtensions.push_back(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME);
|
|
requestedExtensions.push_back(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME);
|
|
requestedExtensions.push_back(VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME);
|
|
}
|
|
|
|
VkPhysicalDeviceFeatures2 requestedFeatures2{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
|
|
VkPhysicalDeviceVulkan11Features requestedVulkan11Features{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES};
|
|
VkPhysicalDeviceVulkan12Features requestedVulkan12Features{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
|
|
// features should be enabled like this:
|
|
requestedFeatures2.features.textureCompressionASTC_LDR = deviceFeatures.textureCompressionASTC_LDR;
|
|
requestedFeatures2.features.textureCompressionBC = deviceFeatures.textureCompressionBC;
|
|
requestedFeatures2.features.textureCompressionETC2 = deviceFeatures.textureCompressionETC2;
|
|
requestedFeatures2.features.samplerAnisotropy = deviceFeatures.samplerAnisotropy;
|
|
requestedFeatures2.features.depthBounds = deviceFeatures.depthBounds;
|
|
requestedFeatures2.features.multiDrawIndirect = deviceFeatures.multiDrawIndirect;
|
|
// requestedFeatures2.features.se
|
|
requestedVulkan12Features.separateDepthStencilLayouts = _gpuContext->physicalDeviceVulkan12Features.separateDepthStencilLayouts;
|
|
|
|
VkPhysicalDeviceFragmentShadingRateFeaturesKHR shadingRateRequest = {};
|
|
shadingRateRequest.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR;
|
|
shadingRateRequest.attachmentFragmentShadingRate = _gpuContext->physicalDeviceFragmentShadingRateFeatures.attachmentFragmentShadingRate;
|
|
shadingRateRequest.pipelineFragmentShadingRate = _gpuContext->physicalDeviceFragmentShadingRateFeatures.pipelineFragmentShadingRate;
|
|
|
|
requestedVulkan12Features.pNext = &shadingRateRequest;
|
|
|
|
if (_gpuContext->validationEnabled) {
|
|
requestedLayers.push_back("VK_LAYER_KHRONOS_validation");
|
|
}
|
|
|
|
// check extensions
|
|
uint32_t availableLayerCount;
|
|
VK_CHECK(vkEnumerateDeviceLayerProperties(_gpuContext->physicalDevice, &availableLayerCount, nullptr));
|
|
_gpuDevice->layers.resize(availableLayerCount);
|
|
VK_CHECK(vkEnumerateDeviceLayerProperties(_gpuContext->physicalDevice, &availableLayerCount, _gpuDevice->layers.data()));
|
|
|
|
uint32_t availableExtensionCount;
|
|
VK_CHECK(vkEnumerateDeviceExtensionProperties(_gpuContext->physicalDevice, nullptr, &availableExtensionCount, nullptr));
|
|
_gpuDevice->extensions.resize(availableExtensionCount);
|
|
VK_CHECK(vkEnumerateDeviceExtensionProperties(_gpuContext->physicalDevice, nullptr, &availableExtensionCount, _gpuDevice->extensions.data()));
|
|
|
|
#if CC_SWAPPY_ENABLED
|
|
uint32_t swappyRequiredExtensionCount = 0;
|
|
SwappyVk_determineDeviceExtensions(_gpuContext->physicalDevice, availableExtensionCount,
|
|
_gpuDevice->extensions.data(), &swappyRequiredExtensionCount, nullptr);
|
|
ccstd::vector<char *> swappyRequiredExtensions(swappyRequiredExtensionCount);
|
|
ccstd::vector<char> swappyRequiredExtensionsData(swappyRequiredExtensionCount * (VK_MAX_EXTENSION_NAME_SIZE + 1));
|
|
for (uint32_t i = 0; i < swappyRequiredExtensionCount; i++) {
|
|
swappyRequiredExtensions[i] = &swappyRequiredExtensionsData[i * (VK_MAX_EXTENSION_NAME_SIZE + 1)];
|
|
}
|
|
SwappyVk_determineDeviceExtensions(_gpuContext->physicalDevice, availableExtensionCount,
|
|
_gpuDevice->extensions.data(), &swappyRequiredExtensionCount, swappyRequiredExtensions.data());
|
|
ccstd::vector<ccstd::string> swappyRequiredExtList(swappyRequiredExtensionCount);
|
|
|
|
for (size_t i = 0; i < swappyRequiredExtensionCount; ++i) {
|
|
swappyRequiredExtList[i] = swappyRequiredExtensions[i];
|
|
requestedExtensions.push_back(swappyRequiredExtList[i].c_str());
|
|
}
|
|
#endif
|
|
|
|
// just filter out the unsupported layers & extensions
|
|
for (const char *layer : requestedLayers) {
|
|
if (isLayerSupported(layer, _gpuDevice->layers)) {
|
|
_layers.push_back(layer);
|
|
}
|
|
}
|
|
for (const char *extension : requestedExtensions) {
|
|
if (isExtensionSupported(extension, _gpuDevice->extensions)) {
|
|
_extensions.push_back(extension);
|
|
}
|
|
}
|
|
|
|
// prepare the device queues
|
|
uint32_t queueFamilyPropertiesCount = utils::toUint(_gpuContext->queueFamilyProperties.size());
|
|
ccstd::vector<VkDeviceQueueCreateInfo> queueCreateInfos(queueFamilyPropertiesCount, {VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO});
|
|
ccstd::vector<ccstd::vector<float>> queuePriorities(queueFamilyPropertiesCount);
|
|
|
|
for (uint32_t queueFamilyIndex = 0U; queueFamilyIndex < queueFamilyPropertiesCount; ++queueFamilyIndex) {
|
|
const VkQueueFamilyProperties &queueFamilyProperty = _gpuContext->queueFamilyProperties[queueFamilyIndex];
|
|
|
|
queuePriorities[queueFamilyIndex].resize(queueFamilyProperty.queueCount, 1.0F);
|
|
|
|
VkDeviceQueueCreateInfo &queueCreateInfo = queueCreateInfos[queueFamilyIndex];
|
|
|
|
queueCreateInfo.queueFamilyIndex = queueFamilyIndex;
|
|
queueCreateInfo.queueCount = queueFamilyProperty.queueCount;
|
|
queueCreateInfo.pQueuePriorities = queuePriorities[queueFamilyIndex].data();
|
|
}
|
|
|
|
VkDeviceCreateInfo deviceCreateInfo{VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO};
|
|
|
|
deviceCreateInfo.queueCreateInfoCount = utils::toUint(queueCreateInfos.size());
|
|
deviceCreateInfo.pQueueCreateInfos = queueCreateInfos.data();
|
|
deviceCreateInfo.enabledLayerCount = utils::toUint(_layers.size());
|
|
deviceCreateInfo.ppEnabledLayerNames = _layers.data();
|
|
deviceCreateInfo.enabledExtensionCount = utils::toUint(_extensions.size());
|
|
deviceCreateInfo.ppEnabledExtensionNames = _extensions.data();
|
|
if (_gpuDevice->minorVersion < 1 && !_gpuContext->checkExtension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME)) {
|
|
deviceCreateInfo.pEnabledFeatures = &requestedFeatures2.features;
|
|
} else {
|
|
deviceCreateInfo.pNext = &requestedFeatures2;
|
|
if (_gpuDevice->minorVersion >= 2) {
|
|
requestedFeatures2.pNext = &requestedVulkan11Features;
|
|
requestedVulkan11Features.pNext = &requestedVulkan12Features;
|
|
}
|
|
}
|
|
|
|
if (_xr) {
|
|
_gpuDevice->vkDevice = _xr->createXRVulkanDevice(&deviceCreateInfo);
|
|
} else {
|
|
VK_CHECK(vkCreateDevice(_gpuContext->physicalDevice, &deviceCreateInfo, nullptr, &_gpuDevice->vkDevice));
|
|
}
|
|
volkLoadDevice(_gpuDevice->vkDevice);
|
|
|
|
SPIRVUtils::getInstance()->initialize(static_cast<int>(_gpuDevice->minorVersion));
|
|
|
|
///////////////////// Gather Device Properties /////////////////////
|
|
|
|
auto findPreferredDepthFormat = [this](const VkFormat *formats, uint32_t count, VkFormat *pFormat) {
|
|
for (uint32_t i = 0; i < count; ++i) {
|
|
VkFormat format = formats[i];
|
|
VkFormatProperties formatProperties;
|
|
vkGetPhysicalDeviceFormatProperties(_gpuContext->physicalDevice, format, &formatProperties);
|
|
// Format must support depth stencil attachment for optimal tiling
|
|
if (formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
|
|
if (formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) {
|
|
*pFormat = format;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
VkFormat depthFormatPriorityList[]{
|
|
VK_FORMAT_D32_SFLOAT,
|
|
VK_FORMAT_X8_D24_UNORM_PACK32,
|
|
VK_FORMAT_D16_UNORM,
|
|
};
|
|
findPreferredDepthFormat(depthFormatPriorityList, 3, &_gpuDevice->depthFormat);
|
|
|
|
VkFormat depthStencilFormatPriorityList[]{
|
|
VK_FORMAT_D24_UNORM_S8_UINT,
|
|
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
|
VK_FORMAT_D16_UNORM_S8_UINT,
|
|
};
|
|
findPreferredDepthFormat(depthStencilFormatPriorityList, 3, &_gpuDevice->depthStencilFormat);
|
|
|
|
initDeviceFeature();
|
|
initFormatFeature();
|
|
|
|
ccstd::string compressedFmts;
|
|
|
|
if (getFormatFeatures(Format::BC1_SRGB_ALPHA) != FormatFeature::NONE) {
|
|
compressedFmts += "dxt ";
|
|
}
|
|
|
|
if (getFormatFeatures(Format::ETC2_RGBA8) != FormatFeature::NONE) {
|
|
compressedFmts += "etc2 ";
|
|
}
|
|
|
|
if (getFormatFeatures(Format::ASTC_RGBA_4X4) != FormatFeature::NONE) {
|
|
compressedFmts += "astc ";
|
|
}
|
|
|
|
if (getFormatFeatures(Format::PVRTC_RGBA2) != FormatFeature::NONE) {
|
|
compressedFmts += "pvrtc ";
|
|
}
|
|
|
|
_gpuDevice->useMultiDrawIndirect = deviceFeatures.multiDrawIndirect;
|
|
_gpuDevice->useDescriptorUpdateTemplate = _gpuDevice->minorVersion > 0 || checkExtension(VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME);
|
|
|
|
if (_gpuDevice->minorVersion > 1) {
|
|
_gpuDevice->createRenderPass2 = vkCreateRenderPass2;
|
|
} else if (checkExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME)) {
|
|
_gpuDevice->createRenderPass2 = vkCreateRenderPass2KHR;
|
|
} else {
|
|
_gpuDevice->createRenderPass2 = vkCreateRenderPass2KHRFallback;
|
|
}
|
|
|
|
const VkPhysicalDeviceLimits &limits = _gpuContext->physicalDeviceProperties.limits;
|
|
_caps.maxVertexAttributes = limits.maxVertexInputAttributes;
|
|
_caps.maxVertexUniformVectors = limits.maxUniformBufferRange / 16;
|
|
_caps.maxFragmentUniformVectors = limits.maxUniformBufferRange / 16;
|
|
_caps.maxUniformBufferBindings = limits.maxDescriptorSetUniformBuffers;
|
|
_caps.maxUniformBlockSize = limits.maxUniformBufferRange;
|
|
_caps.maxShaderStorageBlockSize = limits.maxStorageBufferRange;
|
|
_caps.maxShaderStorageBufferBindings = limits.maxDescriptorSetStorageBuffers;
|
|
_caps.maxTextureUnits = limits.maxDescriptorSetSampledImages;
|
|
_caps.maxVertexTextureUnits = limits.maxPerStageDescriptorSampledImages;
|
|
_caps.maxColorRenderTargets = limits.maxColorAttachments;
|
|
_caps.maxTextureSize = limits.maxImageDimension2D;
|
|
_caps.maxCubeMapTextureSize = limits.maxImageDimensionCube;
|
|
_caps.maxArrayTextureLayers = limits.maxImageArrayLayers;
|
|
_caps.max3DTextureSize = limits.maxImageDimension3D;
|
|
_caps.uboOffsetAlignment = utils::toUint(limits.minUniformBufferOffsetAlignment);
|
|
// compute shaders
|
|
_caps.maxComputeSharedMemorySize = limits.maxComputeSharedMemorySize;
|
|
_caps.maxComputeWorkGroupInvocations = limits.maxComputeWorkGroupInvocations;
|
|
_caps.maxComputeWorkGroupCount = {limits.maxComputeWorkGroupCount[0], limits.maxComputeWorkGroupCount[1], limits.maxComputeWorkGroupCount[2]};
|
|
_caps.maxComputeWorkGroupSize = {limits.maxComputeWorkGroupSize[0], limits.maxComputeWorkGroupSize[1], limits.maxComputeWorkGroupSize[2]};
|
|
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
// UNASSIGNED-BestPractices-vkCreateComputePipelines-compute-work-group-size
|
|
_caps.maxComputeWorkGroupInvocations = std::min(_caps.maxComputeWorkGroupInvocations, 64U);
|
|
#endif // defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
initExtensionCapability();
|
|
|
|
///////////////////// Resource Initialization /////////////////////
|
|
|
|
QueueInfo queueInfo;
|
|
queueInfo.type = QueueType::GRAPHICS;
|
|
_queue = createQueue(queueInfo);
|
|
|
|
QueryPoolInfo queryPoolInfo{QueryType::OCCLUSION, DEFAULT_MAX_QUERY_OBJECTS, false};
|
|
_queryPool = createQueryPool(queryPoolInfo);
|
|
|
|
CommandBufferInfo cmdBuffInfo;
|
|
cmdBuffInfo.type = CommandBufferType::PRIMARY;
|
|
cmdBuffInfo.queue = _queue;
|
|
_cmdBuff = createCommandBuffer(cmdBuffInfo);
|
|
|
|
VmaAllocatorCreateInfo allocatorInfo{};
|
|
allocatorInfo.physicalDevice = _gpuContext->physicalDevice;
|
|
allocatorInfo.device = _gpuDevice->vkDevice;
|
|
allocatorInfo.instance = _gpuContext->vkInstance;
|
|
|
|
VmaVulkanFunctions vmaVulkanFunc{};
|
|
vmaVulkanFunc.vkAllocateMemory = vkAllocateMemory;
|
|
vmaVulkanFunc.vkBindBufferMemory = vkBindBufferMemory;
|
|
vmaVulkanFunc.vkBindImageMemory = vkBindImageMemory;
|
|
vmaVulkanFunc.vkCreateBuffer = vkCreateBuffer;
|
|
vmaVulkanFunc.vkCreateImage = vkCreateImage;
|
|
vmaVulkanFunc.vkDestroyBuffer = vkDestroyBuffer;
|
|
vmaVulkanFunc.vkDestroyImage = vkDestroyImage;
|
|
vmaVulkanFunc.vkFlushMappedMemoryRanges = vkFlushMappedMemoryRanges;
|
|
vmaVulkanFunc.vkFreeMemory = vkFreeMemory;
|
|
vmaVulkanFunc.vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements;
|
|
vmaVulkanFunc.vkGetImageMemoryRequirements = vkGetImageMemoryRequirements;
|
|
vmaVulkanFunc.vkGetPhysicalDeviceMemoryProperties = vkGetPhysicalDeviceMemoryProperties;
|
|
vmaVulkanFunc.vkGetPhysicalDeviceProperties = vkGetPhysicalDeviceProperties;
|
|
vmaVulkanFunc.vkInvalidateMappedMemoryRanges = vkInvalidateMappedMemoryRanges;
|
|
vmaVulkanFunc.vkMapMemory = vkMapMemory;
|
|
vmaVulkanFunc.vkUnmapMemory = vkUnmapMemory;
|
|
vmaVulkanFunc.vkCmdCopyBuffer = vkCmdCopyBuffer;
|
|
|
|
if (_gpuDevice->minorVersion > 0) {
|
|
allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
|
|
vmaVulkanFunc.vkGetBufferMemoryRequirements2KHR = vkGetBufferMemoryRequirements2;
|
|
vmaVulkanFunc.vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2;
|
|
vmaVulkanFunc.vkBindBufferMemory2KHR = vkBindBufferMemory2;
|
|
vmaVulkanFunc.vkBindImageMemory2KHR = vkBindImageMemory2;
|
|
} else {
|
|
if (checkExtension(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME) &&
|
|
checkExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME)) {
|
|
allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
|
|
vmaVulkanFunc.vkGetBufferMemoryRequirements2KHR = vkGetBufferMemoryRequirements2KHR;
|
|
vmaVulkanFunc.vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2KHR;
|
|
}
|
|
if (checkExtension(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME)) {
|
|
vmaVulkanFunc.vkBindBufferMemory2KHR = vkBindBufferMemory2KHR;
|
|
vmaVulkanFunc.vkBindImageMemory2KHR = vkBindImageMemory2KHR;
|
|
}
|
|
}
|
|
if (checkExtension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME)) {
|
|
if (_gpuDevice->minorVersion > 0) {
|
|
vmaVulkanFunc.vkGetPhysicalDeviceMemoryProperties2KHR = vkGetPhysicalDeviceMemoryProperties2;
|
|
} else if (checkExtension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME)) {
|
|
vmaVulkanFunc.vkGetPhysicalDeviceMemoryProperties2KHR = vkGetPhysicalDeviceMemoryProperties2KHR;
|
|
}
|
|
}
|
|
|
|
allocatorInfo.pVulkanFunctions = &vmaVulkanFunc;
|
|
|
|
VK_CHECK(vmaCreateAllocator(&allocatorInfo, &_gpuDevice->memoryAllocator));
|
|
|
|
uint32_t backBufferCount = _gpuDevice->backBufferCount;
|
|
for (uint32_t i = 0U; i < backBufferCount; i++) {
|
|
_gpuFencePools.push_back(std::make_unique<CCVKGPUFencePool>(_gpuDevice.get()));
|
|
_gpuRecycleBins.push_back(std::make_unique<CCVKGPURecycleBin>(_gpuDevice.get()));
|
|
_gpuStagingBufferPools.push_back(std::make_unique<CCVKGPUStagingBufferPool>(_gpuDevice.get()));
|
|
}
|
|
|
|
_gpuBufferHub = std::make_unique<CCVKGPUBufferHub>(_gpuDevice.get());
|
|
_gpuIAHub = std::make_unique<CCVKGPUInputAssemblerHub>(_gpuDevice.get());
|
|
_gpuTransportHub = std::make_unique<CCVKGPUTransportHub>(_gpuDevice.get(), static_cast<CCVKQueue *>(_queue)->gpuQueue());
|
|
_gpuDescriptorHub = std::make_unique<CCVKGPUDescriptorHub>(_gpuDevice.get());
|
|
_gpuSemaphorePool = std::make_unique<CCVKGPUSemaphorePool>(_gpuDevice.get());
|
|
_gpuBarrierManager = std::make_unique<CCVKGPUBarrierManager>(_gpuDevice.get());
|
|
_gpuDescriptorSetHub = std::make_unique<CCVKGPUDescriptorSetHub>(_gpuDevice.get());
|
|
|
|
_gpuDevice->defaultSampler = ccnew CCVKGPUSampler();
|
|
_gpuDevice->defaultSampler->init();
|
|
|
|
_gpuDevice->defaultTexture = ccnew CCVKGPUTexture();
|
|
_gpuDevice->defaultTexture->format = Format::RGBA8;
|
|
_gpuDevice->defaultTexture->usage = TextureUsageBit::SAMPLED | TextureUsage::STORAGE;
|
|
_gpuDevice->defaultTexture->width = _gpuDevice->defaultTexture->height = 1U;
|
|
_gpuDevice->defaultTexture->size = formatSize(Format::RGBA8, 1U, 1U, 1U);
|
|
_gpuDevice->defaultTexture->init();
|
|
|
|
_gpuDevice->defaultTextureView = ccnew CCVKGPUTextureView();
|
|
_gpuDevice->defaultTextureView->gpuTexture = _gpuDevice->defaultTexture;
|
|
_gpuDevice->defaultTextureView->format = Format::RGBA8;
|
|
_gpuDevice->defaultTextureView->init();
|
|
|
|
ThsvsImageBarrier barrier{};
|
|
barrier.nextAccessCount = 1;
|
|
barrier.pNextAccesses = getAccessType(AccessFlagBit::VERTEX_SHADER_READ_TEXTURE);
|
|
barrier.image = _gpuDevice->defaultTexture->vkImage;
|
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
|
|
barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
|
|
gpuTransportHub()->checkIn(
|
|
[&barrier](const CCVKGPUCommandBuffer *gpuCommandBuffer) {
|
|
cmdFuncCCVKImageMemoryBarrier(gpuCommandBuffer, barrier);
|
|
},
|
|
true);
|
|
|
|
_gpuDevice->defaultBuffer = ccnew CCVKGPUBuffer();
|
|
_gpuDevice->defaultBuffer->usage = BufferUsage::UNIFORM | BufferUsage::STORAGE;
|
|
_gpuDevice->defaultBuffer->memUsage = MemoryUsage::HOST | MemoryUsage::DEVICE;
|
|
_gpuDevice->defaultBuffer->size = _gpuDevice->defaultBuffer->stride = 16U;
|
|
_gpuDevice->defaultBuffer->count = 1U;
|
|
_gpuDevice->defaultBuffer->init();
|
|
|
|
getAccessTypes(AccessFlagBit::COLOR_ATTACHMENT_WRITE, _gpuDevice->defaultColorBarrier.nextAccesses);
|
|
cmdFuncCCVKCreateGeneralBarrier(this, &_gpuDevice->defaultColorBarrier);
|
|
|
|
getAccessTypes(AccessFlagBit::DEPTH_STENCIL_ATTACHMENT_WRITE, _gpuDevice->defaultDepthStencilBarrier.nextAccesses);
|
|
cmdFuncCCVKCreateGeneralBarrier(this, &_gpuDevice->defaultDepthStencilBarrier);
|
|
|
|
_pipelineCache = std::make_unique<CCVKPipelineCache>();
|
|
_pipelineCache->init(_gpuDevice->vkDevice);
|
|
|
|
///////////////////// Print Debug Info /////////////////////
|
|
|
|
ccstd::string instanceLayers;
|
|
ccstd::string instanceExtensions;
|
|
ccstd::string deviceLayers;
|
|
ccstd::string deviceExtensions;
|
|
for (const char *layer : _gpuContext->layers) {
|
|
instanceLayers += layer + ccstd::string(" ");
|
|
}
|
|
for (const char *extension : _gpuContext->extensions) {
|
|
instanceExtensions += extension + ccstd::string(" ");
|
|
}
|
|
for (const char *layer : _layers) {
|
|
deviceLayers += layer + ccstd::string(" ");
|
|
}
|
|
for (const char *extension : _extensions) {
|
|
deviceExtensions += extension + ccstd::string(" ");
|
|
}
|
|
|
|
uint32_t apiVersion = _gpuContext->physicalDeviceProperties.apiVersion;
|
|
_renderer = _gpuContext->physicalDeviceProperties.deviceName;
|
|
_vendor = mapVendorName(_gpuContext->physicalDeviceProperties.vendorID);
|
|
_version = StringUtil::format("%d.%d.%d", VK_VERSION_MAJOR(apiVersion),
|
|
VK_VERSION_MINOR(apiVersion), VK_VERSION_PATCH(apiVersion));
|
|
|
|
CC_LOG_INFO("Vulkan device initialized.");
|
|
CC_LOG_INFO("RENDERER: %s", _renderer.c_str());
|
|
CC_LOG_INFO("VENDOR: %s", _vendor.c_str());
|
|
CC_LOG_INFO("VERSION: %s", _version.c_str());
|
|
CC_LOG_INFO("INSTANCE_LAYERS: %s", instanceLayers.c_str());
|
|
CC_LOG_INFO("INSTANCE_EXTENSIONS: %s", instanceExtensions.c_str());
|
|
CC_LOG_INFO("DEVICE_LAYERS: %s", deviceLayers.c_str());
|
|
CC_LOG_INFO("DEVICE_EXTENSIONS: %s", deviceExtensions.c_str());
|
|
CC_LOG_INFO("COMPRESSED_FORMATS: %s", compressedFmts.c_str());
|
|
|
|
if (_xr) {
|
|
cc::gfx::CCVKGPUQueue *vkQueue = static_cast<cc::gfx::CCVKQueue *>(getQueue())->gpuQueue();
|
|
_xr->setXRConfig(xr::XRConfigKey::VK_QUEUE_FAMILY_INDEX, static_cast<int>(vkQueue->queueFamilyIndex));
|
|
_xr->postGFXDeviceInitialize(_api);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void CCVKDevice::doDestroy() {
|
|
waitAllFences();
|
|
|
|
SPIRVUtils::getInstance()->destroy();
|
|
|
|
if (_gpuDevice) {
|
|
_gpuDevice->defaultBuffer = nullptr;
|
|
_gpuDevice->defaultTexture = nullptr;
|
|
_gpuDevice->defaultTextureView = nullptr;
|
|
_gpuDevice->defaultSampler = nullptr;
|
|
}
|
|
|
|
CC_SAFE_DESTROY_AND_DELETE(_queryPool)
|
|
CC_SAFE_DESTROY_AND_DELETE(_queue)
|
|
CC_SAFE_DESTROY_AND_DELETE(_cmdBuff)
|
|
|
|
_gpuStagingBufferPools.clear();
|
|
_gpuFencePools.clear();
|
|
|
|
_gpuBufferHub = nullptr;
|
|
_gpuTransportHub = nullptr;
|
|
_gpuSemaphorePool = nullptr;
|
|
_gpuDescriptorHub = nullptr;
|
|
_gpuBarrierManager = nullptr;
|
|
_gpuDescriptorSetHub = nullptr;
|
|
_gpuIAHub = nullptr;
|
|
|
|
if (_gpuDevice) {
|
|
uint32_t backBufferCount = _gpuDevice->backBufferCount;
|
|
for (uint32_t i = 0U; i < backBufferCount; i++) {
|
|
_gpuRecycleBins[i]->clear();
|
|
}
|
|
}
|
|
_gpuStagingBufferPools.clear();
|
|
_gpuRecycleBins.clear();
|
|
_gpuFencePools.clear();
|
|
|
|
if (_gpuDevice) {
|
|
_pipelineCache.reset();
|
|
|
|
if (_gpuDevice->memoryAllocator != VK_NULL_HANDLE) {
|
|
VmaStats stats;
|
|
vmaCalculateStats(_gpuDevice->memoryAllocator, &stats);
|
|
CC_LOG_INFO("Total device memory leaked: %d bytes.", stats.total.usedBytes);
|
|
CC_ASSERT(!_memoryStatus.bufferSize); // Buffer memory leaked.
|
|
CC_ASSERT(!_memoryStatus.textureSize); // Texture memory leaked.
|
|
|
|
vmaDestroyAllocator(_gpuDevice->memoryAllocator);
|
|
_gpuDevice->memoryAllocator = VK_NULL_HANDLE;
|
|
}
|
|
|
|
for (auto it = _gpuDevice->_commandBufferPools.begin(); it != _gpuDevice->_commandBufferPools.end(); ++it) {
|
|
CC_SAFE_DELETE(it->second)
|
|
}
|
|
_gpuDevice->_commandBufferPools.clear();
|
|
_gpuDevice->_descriptorSetPools.clear();
|
|
|
|
if (_gpuDevice->vkDevice != VK_NULL_HANDLE) {
|
|
vkDestroyDevice(_gpuDevice->vkDevice, nullptr);
|
|
_gpuDevice->vkDevice = VK_NULL_HANDLE;
|
|
}
|
|
|
|
_gpuDevice = nullptr;
|
|
}
|
|
|
|
_gpuContext = nullptr;
|
|
}
|
|
|
|
namespace {
|
|
ccstd::vector<VkSwapchainKHR> vkSwapchains;
|
|
ccstd::vector<uint32_t> vkSwapchainIndices;
|
|
ccstd::vector<CCVKGPUSwapchain *> gpuSwapchains;
|
|
ccstd::vector<VkImageMemoryBarrier> vkAcquireBarriers;
|
|
ccstd::vector<VkImageMemoryBarrier> vkPresentBarriers;
|
|
|
|
VkImageMemoryBarrier acquireBarrier{
|
|
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
|
nullptr,
|
|
0,
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
|
VK_QUEUE_FAMILY_IGNORED,
|
|
VK_QUEUE_FAMILY_IGNORED,
|
|
0, // NOLINT(modernize-use-nullptr) platform dependent type
|
|
{VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1},
|
|
};
|
|
VkImageMemoryBarrier presentBarrier{
|
|
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
|
nullptr,
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
|
0,
|
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
|
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
|
VK_QUEUE_FAMILY_IGNORED,
|
|
VK_QUEUE_FAMILY_IGNORED,
|
|
0, // NOLINT(modernize-use-nullptr) platform dependent type
|
|
{VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1},
|
|
};
|
|
} // namespace
|
|
|
|
void CCVKDevice::acquire(Swapchain *const *swapchains, uint32_t count) {
|
|
if (_onAcquire) _onAcquire->execute();
|
|
|
|
auto *queue = static_cast<CCVKQueue *>(_queue);
|
|
queue->gpuQueue()->lastSignaledSemaphores.clear();
|
|
vkSwapchainIndices.clear();
|
|
gpuSwapchains.clear();
|
|
vkSwapchains.clear();
|
|
vkAcquireBarriers.resize(count, acquireBarrier);
|
|
vkPresentBarriers.resize(count, presentBarrier);
|
|
for (uint32_t i = 0U; i < count; ++i) {
|
|
auto *swapchain = static_cast<CCVKSwapchain *>(swapchains[i]);
|
|
if (swapchain->gpuSwapchain()->lastPresentResult == VK_NOT_READY) {
|
|
if (!swapchain->checkSwapchainStatus()) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (_xr) {
|
|
xr::XRSwapchain xrSwapchain = _xr->doGFXDeviceAcquire(_api);
|
|
swapchain->gpuSwapchain()->curImageIndex = xrSwapchain.swapchainImageIndex;
|
|
}
|
|
if (swapchain->gpuSwapchain()->vkSwapchain) {
|
|
vkSwapchains.push_back(swapchain->gpuSwapchain()->vkSwapchain);
|
|
}
|
|
if (swapchain->gpuSwapchain()) {
|
|
gpuSwapchains.push_back(swapchain->gpuSwapchain());
|
|
}
|
|
vkSwapchainIndices.push_back(swapchain->gpuSwapchain()->curImageIndex);
|
|
}
|
|
|
|
_gpuDescriptorSetHub->flush();
|
|
_gpuSemaphorePool->reset();
|
|
|
|
for (uint32_t i = 0; i < vkSwapchains.size(); ++i) {
|
|
VkSemaphore acquireSemaphore = _gpuSemaphorePool->alloc();
|
|
VkResult res = vkAcquireNextImageKHR(_gpuDevice->vkDevice, vkSwapchains[i], ~0ULL,
|
|
acquireSemaphore, VK_NULL_HANDLE, &vkSwapchainIndices[i]);
|
|
CC_ASSERT(res == VK_SUCCESS || res == VK_SUBOPTIMAL_KHR);
|
|
gpuSwapchains[i]->curImageIndex = vkSwapchainIndices[i];
|
|
queue->gpuQueue()->lastSignaledSemaphores.push_back(acquireSemaphore);
|
|
|
|
vkAcquireBarriers[i].image = gpuSwapchains[i]->swapchainImages[vkSwapchainIndices[i]];
|
|
vkPresentBarriers[i].image = gpuSwapchains[i]->swapchainImages[vkSwapchainIndices[i]];
|
|
}
|
|
|
|
if (this->_options.enableBarrierDeduce) {
|
|
_gpuTransportHub->checkIn(
|
|
[&](const CCVKGPUCommandBuffer *gpuCommandBuffer) {
|
|
vkCmdPipelineBarrier(gpuCommandBuffer->vkCommandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
0, 0, nullptr, 0, nullptr, utils::toUint(vkSwapchains.size()), vkAcquireBarriers.data());
|
|
},
|
|
false, false);
|
|
|
|
_gpuTransportHub->checkIn(
|
|
[&](const CCVKGPUCommandBuffer *gpuCommandBuffer) {
|
|
vkCmdPipelineBarrier(gpuCommandBuffer->vkCommandBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
|
0, 0, nullptr, 0, nullptr, utils::toUint(vkSwapchains.size()), vkPresentBarriers.data());
|
|
},
|
|
false, true);
|
|
}
|
|
}
|
|
|
|
void CCVKDevice::present() {
|
|
CC_PROFILE(CCVKDevicePresent);
|
|
bool isGFXDeviceNeedsPresent = _xr ? _xr->isGFXDeviceNeedsPresent(_api) : true;
|
|
auto *queue = static_cast<CCVKQueue *>(_queue);
|
|
_numDrawCalls = queue->_numDrawCalls;
|
|
_numInstances = queue->_numInstances;
|
|
_numTriangles = queue->_numTriangles;
|
|
queue->_numDrawCalls = 0;
|
|
queue->_numInstances = 0;
|
|
queue->_numTriangles = 0;
|
|
|
|
if (!_gpuTransportHub->empty(false)) _gpuTransportHub->packageForFlight(false);
|
|
if (!_gpuTransportHub->empty(true)) _gpuTransportHub->packageForFlight(true);
|
|
|
|
#if CC_SWAPPY_ENABLED
|
|
// tripple buffer?
|
|
// static vector<uint8_t> queueFmlIdxBuff(_gpuDevice->backBufferCount);
|
|
// std::iota(std::begin(queueFmlIdxBuff), std::end(queueFmlIdxBuff), 0);
|
|
SwappyVk_setQueueFamilyIndex(_gpuDevice->vkDevice, queue->gpuQueue()->vkQueue, queue->gpuQueue()->queueFamilyIndex);
|
|
auto vkCCPresentFunc = SwappyVk_queuePresent;
|
|
#else
|
|
auto vkCCPresentFunc = vkQueuePresentKHR;
|
|
#endif
|
|
|
|
if (!vkSwapchains.empty()) { // don't present if not acquired
|
|
VkPresentInfoKHR presentInfo{VK_STRUCTURE_TYPE_PRESENT_INFO_KHR};
|
|
presentInfo.waitSemaphoreCount = utils::toUint(queue->gpuQueue()->lastSignaledSemaphores.size());
|
|
presentInfo.pWaitSemaphores = queue->gpuQueue()->lastSignaledSemaphores.data();
|
|
presentInfo.swapchainCount = utils::toUint(vkSwapchains.size());
|
|
presentInfo.pSwapchains = vkSwapchains.data();
|
|
presentInfo.pImageIndices = vkSwapchainIndices.data();
|
|
|
|
VkResult res = !isGFXDeviceNeedsPresent ? VK_SUCCESS : vkCCPresentFunc(queue->gpuQueue()->vkQueue, &presentInfo);
|
|
for (auto *gpuSwapchain : gpuSwapchains) {
|
|
gpuSwapchain->lastPresentResult = res;
|
|
}
|
|
}
|
|
|
|
_gpuDevice->curBackBufferIndex = (_gpuDevice->curBackBufferIndex + 1) % _gpuDevice->backBufferCount;
|
|
|
|
uint32_t fenceCount = gpuFencePool()->size();
|
|
if (fenceCount) {
|
|
VK_CHECK(vkWaitForFences(_gpuDevice->vkDevice, fenceCount,
|
|
gpuFencePool()->data(), VK_TRUE, DEFAULT_TIMEOUT));
|
|
}
|
|
|
|
gpuFencePool()->reset();
|
|
gpuRecycleBin()->clear();
|
|
gpuStagingBufferPool()->reset();
|
|
if (_xr) {
|
|
_xr->postGFXDevicePresent(_api);
|
|
}
|
|
}
|
|
|
|
void CCVKDevice::frameSync() {
|
|
}
|
|
|
|
CCVKGPUFencePool *CCVKDevice::gpuFencePool() { return _gpuFencePools[_gpuDevice->curBackBufferIndex].get(); }
|
|
CCVKGPURecycleBin *CCVKDevice::gpuRecycleBin() { return _gpuRecycleBins[_gpuDevice->curBackBufferIndex].get(); }
|
|
CCVKGPUStagingBufferPool *CCVKDevice::gpuStagingBufferPool() { return _gpuStagingBufferPools[_gpuDevice->curBackBufferIndex].get(); }
|
|
|
|
void CCVKDevice::waitAllFences() {
|
|
static ccstd::vector<VkFence> fences;
|
|
fences.clear();
|
|
|
|
for (auto &fencePool : _gpuFencePools) {
|
|
fences.insert(fences.end(), fencePool->data(), fencePool->data() + fencePool->size());
|
|
}
|
|
|
|
if (!fences.empty()) {
|
|
VK_CHECK(vkWaitForFences(_gpuDevice->vkDevice, utils::toUint(fences.size()), fences.data(), VK_TRUE, DEFAULT_TIMEOUT));
|
|
|
|
for (auto &fencePool : _gpuFencePools) {
|
|
fencePool->reset();
|
|
}
|
|
}
|
|
}
|
|
|
|
void CCVKDevice::updateBackBufferCount(uint32_t backBufferCount) {
|
|
if (backBufferCount <= _gpuDevice->backBufferCount) return;
|
|
for (uint32_t i = _gpuDevice->backBufferCount; i < backBufferCount; i++) {
|
|
_gpuFencePools.push_back(std::make_unique<CCVKGPUFencePool>(_gpuDevice.get()));
|
|
_gpuRecycleBins.push_back(std::make_unique<CCVKGPURecycleBin>(_gpuDevice.get()));
|
|
_gpuStagingBufferPools.push_back(std::make_unique<CCVKGPUStagingBufferPool>(_gpuDevice.get()));
|
|
}
|
|
_gpuBufferHub->updateBackBufferCount(backBufferCount);
|
|
_gpuDescriptorSetHub->updateBackBufferCount(backBufferCount);
|
|
_gpuDevice->backBufferCount = backBufferCount;
|
|
}
|
|
|
|
void CCVKDevice::initDeviceFeature() {
|
|
_features[toNumber(Feature::ELEMENT_INDEX_UINT)] = true;
|
|
_features[toNumber(Feature::INSTANCED_ARRAYS)] = true;
|
|
_features[toNumber(Feature::MULTIPLE_RENDER_TARGETS)] = true;
|
|
_features[toNumber(Feature::BLEND_MINMAX)] = true;
|
|
_features[toNumber(Feature::COMPUTE_SHADER)] = true;
|
|
_features[toNumber(Feature::INPUT_ATTACHMENT_BENEFIT)] = true;
|
|
_features[toNumber(Feature::SUBPASS_COLOR_INPUT)] = true;
|
|
_features[toNumber(Feature::SUBPASS_DEPTH_STENCIL_INPUT)] = true;
|
|
_features[toNumber(Feature::RASTERIZATION_ORDER_NOCOHERENT)] = true;
|
|
_features[toNumber(Feature::MULTI_SAMPLE_RESOLVE_DEPTH_STENCIL)] = checkExtension("VK_KHR_depth_stencil_resolve");
|
|
|
|
_gpuContext->debugReport = _gpuContext->checkExtension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME) &&
|
|
checkExtension(VK_EXT_DEBUG_MARKER_EXTENSION_NAME) &&
|
|
(vkCmdDebugMarkerBeginEXT != nullptr) &&
|
|
(vkCmdDebugMarkerInsertEXT != nullptr) &&
|
|
(vkCmdDebugMarkerEndEXT != nullptr);
|
|
_gpuContext->debugUtils = _gpuContext->checkExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME) &&
|
|
(vkCmdBeginDebugUtilsLabelEXT != nullptr) &&
|
|
(vkCmdInsertDebugUtilsLabelEXT != nullptr) &&
|
|
(vkCmdEndDebugUtilsLabelEXT != nullptr);
|
|
}
|
|
|
|
void CCVKDevice::initFormatFeature() {
|
|
const auto formatLen = static_cast<size_t>(Format::COUNT);
|
|
VkFormatProperties properties = {};
|
|
VkFormat format = {};
|
|
VkFormatFeatureFlags formatFeature = {};
|
|
for (uint32_t i = toNumber(Format::R8); i < formatLen; ++i) {
|
|
if (static_cast<Format>(i) == Format::ETC_RGB8) continue;
|
|
format = mapVkFormat(static_cast<Format>(i), _gpuDevice.get());
|
|
vkGetPhysicalDeviceFormatProperties(_gpuContext->physicalDevice, format, &properties);
|
|
|
|
// render buffer support
|
|
formatFeature = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
|
if (properties.optimalTilingFeatures & formatFeature) {
|
|
_formatFeatures[i] |= FormatFeature::RENDER_TARGET;
|
|
}
|
|
// texture storage support
|
|
formatFeature = VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
|
|
if (properties.optimalTilingFeatures & formatFeature) {
|
|
_formatFeatures[i] |= FormatFeature::STORAGE_TEXTURE;
|
|
}
|
|
// sampled render target support
|
|
formatFeature = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
|
|
if (properties.optimalTilingFeatures & formatFeature) {
|
|
_formatFeatures[i] |= FormatFeature::SAMPLED_TEXTURE;
|
|
}
|
|
// linear filter support
|
|
formatFeature = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
|
|
if (properties.optimalTilingFeatures & formatFeature) {
|
|
_formatFeatures[i] |= FormatFeature::LINEAR_FILTER;
|
|
}
|
|
// vertex attribute support
|
|
formatFeature = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
|
|
if (properties.bufferFeatures & formatFeature) {
|
|
_formatFeatures[i] |= FormatFeature::VERTEX_ATTRIBUTE;
|
|
}
|
|
// shading reate support
|
|
formatFeature = VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
|
|
if (properties.optimalTilingFeatures & formatFeature) {
|
|
_formatFeatures[i] |= FormatFeature ::SHADING_RATE;
|
|
}
|
|
}
|
|
}
|
|
|
|
void CCVKDevice::initExtensionCapability() {
|
|
_caps.supportVariableRateShading = checkExtension(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME);
|
|
_caps.supportVariableRateShading &= _gpuContext->physicalDeviceFragmentShadingRateFeatures.pipelineFragmentShadingRate &&
|
|
_gpuContext->physicalDeviceFragmentShadingRateFeatures.attachmentFragmentShadingRate;
|
|
_caps.supportVariableRateShading &= hasFlag(_formatFeatures[static_cast<uint32_t>(Format::R8UI)], FormatFeatureBit::SHADING_RATE);
|
|
|
|
_caps.supportSubPassShading = checkExtension(VK_HUAWEI_SUBPASS_SHADING_EXTENSION_NAME);
|
|
}
|
|
|
|
CommandBuffer *CCVKDevice::createCommandBuffer(const CommandBufferInfo & /*info*/, bool /*hasAgent*/) {
|
|
return ccnew CCVKCommandBuffer;
|
|
}
|
|
|
|
Queue *CCVKDevice::createQueue() {
|
|
return ccnew CCVKQueue;
|
|
}
|
|
|
|
QueryPool *CCVKDevice::createQueryPool() {
|
|
return ccnew CCVKQueryPool;
|
|
}
|
|
|
|
Swapchain *CCVKDevice::createSwapchain() {
|
|
if (_xr) {
|
|
_xr->createXRSwapchains();
|
|
}
|
|
return ccnew CCVKSwapchain;
|
|
}
|
|
|
|
Buffer *CCVKDevice::createBuffer() {
|
|
return ccnew CCVKBuffer;
|
|
}
|
|
|
|
Texture *CCVKDevice::createTexture() {
|
|
return ccnew CCVKTexture;
|
|
}
|
|
|
|
Shader *CCVKDevice::createShader() {
|
|
return ccnew CCVKShader;
|
|
}
|
|
|
|
InputAssembler *CCVKDevice::createInputAssembler() {
|
|
return ccnew CCVKInputAssembler;
|
|
}
|
|
|
|
RenderPass *CCVKDevice::createRenderPass() {
|
|
return ccnew CCVKRenderPass;
|
|
}
|
|
|
|
Framebuffer *CCVKDevice::createFramebuffer() {
|
|
return ccnew CCVKFramebuffer;
|
|
}
|
|
|
|
DescriptorSet *CCVKDevice::createDescriptorSet() {
|
|
return ccnew CCVKDescriptorSet;
|
|
}
|
|
|
|
DescriptorSetLayout *CCVKDevice::createDescriptorSetLayout() {
|
|
return ccnew CCVKDescriptorSetLayout;
|
|
}
|
|
|
|
PipelineLayout *CCVKDevice::createPipelineLayout() {
|
|
return ccnew CCVKPipelineLayout;
|
|
}
|
|
|
|
PipelineState *CCVKDevice::createPipelineState() {
|
|
return ccnew CCVKPipelineState;
|
|
}
|
|
|
|
Sampler *CCVKDevice::createSampler(const SamplerInfo &info) {
|
|
return ccnew CCVKSampler(info);
|
|
}
|
|
|
|
GeneralBarrier *CCVKDevice::createGeneralBarrier(const GeneralBarrierInfo &info) {
|
|
return ccnew CCVKGeneralBarrier(info);
|
|
}
|
|
|
|
TextureBarrier *CCVKDevice::createTextureBarrier(const TextureBarrierInfo &info) {
|
|
return ccnew CCVKTextureBarrier(info);
|
|
}
|
|
|
|
BufferBarrier *CCVKDevice::createBufferBarrier(const BufferBarrierInfo &info) {
|
|
return ccnew CCVKBufferBarrier(info);
|
|
}
|
|
|
|
void CCVKDevice::copyBuffersToTexture(const uint8_t *const *buffers, Texture *dst, const BufferTextureCopy *regions, uint32_t count) {
|
|
CC_PROFILE(CCVKDeviceCopyBuffersToTexture);
|
|
gpuTransportHub()->checkIn([this, buffers, dst, regions, count](CCVKGPUCommandBuffer *gpuCommandBuffer) {
|
|
cmdFuncCCVKCopyBuffersToTexture(this, buffers, static_cast<CCVKTexture *>(dst)->gpuTexture(), regions, count, gpuCommandBuffer);
|
|
});
|
|
}
|
|
|
|
void CCVKDevice::copyTextureToBuffers(Texture *srcTexture, uint8_t *const *buffers, const BufferTextureCopy *regions, uint32_t count) {
|
|
CC_PROFILE(CCVKDeviceCopyTextureToBuffers);
|
|
uint32_t totalSize = 0U;
|
|
Format format = srcTexture->getFormat();
|
|
ccstd::vector<std::pair<uint32_t, uint32_t>> regionOffsetSizes(count);
|
|
for (size_t i = 0U; i < count; ++i) {
|
|
const BufferTextureCopy ®ion = regions[i];
|
|
uint32_t w = region.buffStride > 0 ? region.buffStride : region.texExtent.width;
|
|
uint32_t h = region.buffTexHeight > 0 ? region.buffTexHeight : region.texExtent.height;
|
|
uint32_t regionSize = formatSize(format, w, h, region.texExtent.depth);
|
|
regionOffsetSizes[i] = {totalSize, regionSize};
|
|
totalSize += regionSize;
|
|
}
|
|
|
|
uint32_t texelSize = GFX_FORMAT_INFOS[toNumber(format)].size;
|
|
IntrusivePtr<CCVKGPUBufferView> stagingBuffer = gpuStagingBufferPool()->alloc(totalSize, texelSize);
|
|
|
|
// make sure the src texture is up-to-date
|
|
waitAllFences();
|
|
|
|
_gpuTransportHub->checkIn(
|
|
[&](CCVKGPUCommandBuffer *cmdBuffer) {
|
|
cmdFuncCCVKCopyTextureToBuffers(this, static_cast<const CCVKTexture *>(srcTexture)->gpuTexture(), stagingBuffer, regions, count, cmdBuffer);
|
|
},
|
|
true);
|
|
|
|
for (uint32_t i = 0; i < count; ++i) {
|
|
uint32_t regionOffset = 0;
|
|
uint32_t regionSize = 0;
|
|
std::tie(regionOffset, regionSize) = regionOffsetSizes[i];
|
|
memcpy(buffers[i], stagingBuffer->mappedData() + regionOffset, regionSize);
|
|
}
|
|
}
|
|
|
|
void CCVKDevice::getQueryPoolResults(QueryPool *queryPool) {
|
|
CC_PROFILE(CCVKDeviceGetQueryPoolResults);
|
|
auto *vkQueryPool = static_cast<CCVKQueryPool *>(queryPool);
|
|
auto queryCount = static_cast<uint32_t>(vkQueryPool->_ids.size());
|
|
CC_ASSERT(queryCount <= vkQueryPool->getMaxQueryObjects());
|
|
|
|
const bool bWait = queryPool->getForceWait();
|
|
uint32_t width = bWait ? 1U : 2U;
|
|
uint64_t stride = sizeof(uint64_t) * width;
|
|
VkQueryResultFlagBits flag = bWait ? VK_QUERY_RESULT_WAIT_BIT : VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
|
|
ccstd::vector<uint64_t> results(queryCount * width, 0);
|
|
|
|
if (queryCount > 0U) {
|
|
VkResult result = vkGetQueryPoolResults(
|
|
gpuDevice()->vkDevice,
|
|
vkQueryPool->_gpuQueryPool->vkPool,
|
|
0,
|
|
queryCount,
|
|
static_cast<size_t>(queryCount * stride),
|
|
results.data(),
|
|
stride,
|
|
VK_QUERY_RESULT_64_BIT | flag);
|
|
CC_ASSERT(result == VK_SUCCESS || result == VK_NOT_READY);
|
|
}
|
|
|
|
ccstd::unordered_map<uint32_t, uint64_t> mapResults;
|
|
for (auto queryId = 0U; queryId < queryCount; queryId++) {
|
|
uint32_t offset = queryId * width;
|
|
if (bWait || results[offset + 1] > 0) {
|
|
uint32_t id = vkQueryPool->_ids[queryId];
|
|
auto iter = mapResults.find(id);
|
|
if (iter != mapResults.end()) {
|
|
iter->second += results[offset];
|
|
} else {
|
|
mapResults[id] = results[offset];
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
std::lock_guard<std::mutex> lock(vkQueryPool->_mutex);
|
|
vkQueryPool->_results = std::move(mapResults);
|
|
}
|
|
}
|
|
|
|
//////////////////////////// Function Fallbacks /////////////////////////////////////////
|
|
|
|
static VkResult VKAPI_PTR vkCreateRenderPass2KHRFallback(
|
|
VkDevice device,
|
|
const VkRenderPassCreateInfo2 *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkRenderPass *pRenderPass) {
|
|
static ccstd::vector<VkAttachmentDescription> attachmentDescriptions;
|
|
static ccstd::vector<VkSubpassDescription> subpassDescriptions;
|
|
static ccstd::vector<VkAttachmentReference> attachmentReferences;
|
|
static ccstd::vector<VkSubpassDependency> subpassDependencies;
|
|
static ccstd::vector<size_t> inputs;
|
|
static ccstd::vector<size_t> colors;
|
|
static ccstd::vector<size_t> resolves;
|
|
static ccstd::vector<size_t> depths;
|
|
|
|
attachmentDescriptions.resize(pCreateInfo->attachmentCount);
|
|
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; ++i) {
|
|
VkAttachmentDescription &desc{attachmentDescriptions[i]};
|
|
const VkAttachmentDescription2 &desc2{pCreateInfo->pAttachments[i]};
|
|
desc.flags = desc2.flags;
|
|
desc.format = desc2.format;
|
|
desc.samples = desc2.samples;
|
|
desc.loadOp = desc2.loadOp;
|
|
desc.storeOp = desc2.storeOp;
|
|
desc.stencilLoadOp = desc2.stencilLoadOp;
|
|
desc.stencilStoreOp = desc2.stencilStoreOp;
|
|
desc.initialLayout = desc2.initialLayout;
|
|
desc.finalLayout = desc2.finalLayout;
|
|
}
|
|
|
|
subpassDescriptions.resize(pCreateInfo->subpassCount);
|
|
attachmentReferences.clear();
|
|
inputs.assign(pCreateInfo->subpassCount, std::numeric_limits<size_t>::max());
|
|
colors.assign(pCreateInfo->subpassCount, std::numeric_limits<size_t>::max());
|
|
resolves.assign(pCreateInfo->subpassCount, std::numeric_limits<size_t>::max());
|
|
depths.assign(pCreateInfo->subpassCount, std::numeric_limits<size_t>::max());
|
|
for (uint32_t i = 0; i < pCreateInfo->subpassCount; ++i) {
|
|
const VkSubpassDescription2 &desc2{pCreateInfo->pSubpasses[i]};
|
|
if (desc2.inputAttachmentCount) {
|
|
inputs[i] = attachmentReferences.size();
|
|
for (uint32_t j = 0; j < desc2.inputAttachmentCount; ++j) {
|
|
attachmentReferences.push_back({desc2.pInputAttachments[j].attachment, desc2.pInputAttachments[j].layout});
|
|
}
|
|
}
|
|
if (desc2.colorAttachmentCount) {
|
|
colors[i] = attachmentReferences.size();
|
|
for (uint32_t j = 0; j < desc2.colorAttachmentCount; ++j) {
|
|
attachmentReferences.push_back({desc2.pColorAttachments[j].attachment, desc2.pColorAttachments[j].layout});
|
|
}
|
|
if (desc2.pResolveAttachments) {
|
|
resolves[i] = attachmentReferences.size();
|
|
for (uint32_t j = 0; j < desc2.colorAttachmentCount; ++j) {
|
|
attachmentReferences.push_back({desc2.pResolveAttachments[j].attachment, desc2.pResolveAttachments[j].layout});
|
|
}
|
|
}
|
|
}
|
|
if (desc2.pDepthStencilAttachment) {
|
|
depths[i] = attachmentReferences.size();
|
|
attachmentReferences.push_back({desc2.pDepthStencilAttachment->attachment, desc2.pDepthStencilAttachment->layout});
|
|
}
|
|
}
|
|
for (uint32_t i = 0; i < pCreateInfo->subpassCount; ++i) {
|
|
VkSubpassDescription &desc{subpassDescriptions[i]};
|
|
const VkSubpassDescription2 &desc2{pCreateInfo->pSubpasses[i]};
|
|
desc.flags = desc2.flags;
|
|
desc.pipelineBindPoint = desc2.pipelineBindPoint;
|
|
desc.inputAttachmentCount = desc2.inputAttachmentCount;
|
|
desc.pInputAttachments = inputs[i] > attachmentReferences.size() ? nullptr : &attachmentReferences[inputs[i]];
|
|
desc.colorAttachmentCount = desc2.colorAttachmentCount;
|
|
desc.pColorAttachments = colors[i] > attachmentReferences.size() ? nullptr : &attachmentReferences[colors[i]];
|
|
desc.pResolveAttachments = resolves[i] > attachmentReferences.size() ? nullptr : &attachmentReferences[resolves[i]];
|
|
desc.pDepthStencilAttachment = depths[i] > attachmentReferences.size() ? nullptr : &attachmentReferences[depths[i]];
|
|
desc.preserveAttachmentCount = desc2.preserveAttachmentCount;
|
|
desc.pPreserveAttachments = desc2.pPreserveAttachments;
|
|
}
|
|
|
|
subpassDependencies.resize(pCreateInfo->dependencyCount);
|
|
for (uint32_t i = 0; i < pCreateInfo->dependencyCount; ++i) {
|
|
VkSubpassDependency &desc{subpassDependencies[i]};
|
|
const VkSubpassDependency2 &desc2{pCreateInfo->pDependencies[i]};
|
|
desc.srcSubpass = desc2.srcSubpass;
|
|
desc.dstSubpass = desc2.dstSubpass;
|
|
desc.srcStageMask = desc2.srcStageMask;
|
|
desc.dstStageMask = desc2.dstStageMask;
|
|
desc.srcAccessMask = desc2.srcAccessMask;
|
|
desc.dstAccessMask = desc2.dstAccessMask;
|
|
desc.dependencyFlags = desc2.dependencyFlags;
|
|
}
|
|
|
|
VkRenderPassCreateInfo renderPassCreateInfo{VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO};
|
|
renderPassCreateInfo.attachmentCount = utils::toUint(attachmentDescriptions.size());
|
|
renderPassCreateInfo.pAttachments = attachmentDescriptions.data();
|
|
renderPassCreateInfo.subpassCount = utils::toUint(subpassDescriptions.size());
|
|
renderPassCreateInfo.pSubpasses = subpassDescriptions.data();
|
|
renderPassCreateInfo.dependencyCount = utils::toUint(subpassDependencies.size());
|
|
renderPassCreateInfo.pDependencies = subpassDependencies.data();
|
|
|
|
return vkCreateRenderPass(device, &renderPassCreateInfo, pAllocator, pRenderPass);
|
|
}
|
|
|
|
SampleCount CCVKDevice::getMaxSampleCount(Format format, TextureUsage usage, TextureFlags flags) const {
|
|
auto vkFormat = mapVkFormat(format, gpuDevice());
|
|
auto usages = mapVkImageUsageFlags(usage, flags);
|
|
|
|
VkImageFormatProperties imageFormatProperties = {};
|
|
vkGetPhysicalDeviceImageFormatProperties(_gpuContext->physicalDevice, vkFormat, VK_IMAGE_TYPE_2D,
|
|
VK_IMAGE_TILING_OPTIMAL, usages, 0, &imageFormatProperties);
|
|
|
|
if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_64_BIT) return SampleCount::X64;
|
|
if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_32_BIT) return SampleCount::X32;
|
|
if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_16_BIT) return SampleCount::X16;
|
|
if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_8_BIT) return SampleCount::X8;
|
|
if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_4_BIT) return SampleCount::X4;
|
|
if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_2_BIT) return SampleCount::X2;
|
|
|
|
return SampleCount::X1;
|
|
}
|
|
|
|
} // namespace gfx
|
|
} // namespace cc
|
|
|