From b30a4c6bac999c0528a5fe19b3df14bcce9af8db Mon Sep 17 00:00:00 2001 From: yuchen Date: Tue, 31 Dec 2024 16:27:55 +0800 Subject: [PATCH] =?UTF-8?q?skia=E9=80=82=E9=85=8Dvulkan?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: yuchen --- src/gpu/ganesh/GrGpuBuffer.cpp | 2 +- src/gpu/ganesh/ops/SoftwarePathRenderer.cpp | 2 +- src/gpu/ganesh/vk/GrVkCaps.cpp | 23 ++++++++++++--------- src/gpu/ganesh/vk/GrVkCaps.h | 1 + src/gpu/ganesh/vk/GrVkRenderPass.cpp | 15 +++++++++++--- src/gpu/graphite/vk/VulkanCaps.cpp | 2 +- 6 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/gpu/ganesh/GrGpuBuffer.cpp b/src/gpu/ganesh/GrGpuBuffer.cpp index 011e3ae1..1bfd6b4c 100644 --- a/src/gpu/ganesh/GrGpuBuffer.cpp +++ b/src/gpu/ganesh/GrGpuBuffer.cpp @@ -89,7 +89,7 @@ void GrGpuBuffer::ComputeScratchKeyForDynamicBuffer(size_t size, } void GrGpuBuffer::computeScratchKey(skgpu::ScratchKey* key) const { - if (SkIsPow2(fSizeInBytes) && kDynamic_GrAccessPattern == fAccessPattern) { + if (kDynamic_GrAccessPattern == fAccessPattern) { ComputeScratchKeyForDynamicBuffer(fSizeInBytes, fIntendedType, key); } } diff --git a/src/gpu/ganesh/ops/SoftwarePathRenderer.cpp b/src/gpu/ganesh/ops/SoftwarePathRenderer.cpp index b6a9e5a6..b1f32870 100644 --- a/src/gpu/ganesh/ops/SoftwarePathRenderer.cpp +++ b/src/gpu/ganesh/ops/SoftwarePathRenderer.cpp @@ -297,7 +297,7 @@ bool SoftwarePathRenderer::onDrawPath(const DrawPathArgs& args) { builder[0] = boundsForMask->width(); builder[1] = boundsForMask->height(); -#ifdef SK_BUILD_FOR_ANDROID_FRAMEWORK +#ifdef SK_BUILD_FOR_ANDROID_FRAMEWORK || defined(SK_BUILD_FOR_OHOS) // Fractional translate does not affect caching on Android. This is done for better cache // hit ratio and speed, but it is matching HWUI behavior, which doesn't consider the matrix // at all when caching paths. diff --git a/src/gpu/ganesh/vk/GrVkCaps.cpp b/src/gpu/ganesh/vk/GrVkCaps.cpp index 9e19ec17..b5d225ec 100644 --- a/src/gpu/ganesh/vk/GrVkCaps.cpp +++ b/src/gpu/ganesh/vk/GrVkCaps.cpp @@ -420,7 +420,7 @@ void GrVkCaps::init(const GrContextOptions& contextOptions, // we do expect this to be a big win on tilers. // // On ARM devices we are seeing an average perf win of around 50%-60% across the board. - if (kARM_VkVendor == properties.vendorID) { + if (kARM_VkVendor == properties.vendorID || kHisi_VkVendor == properties.vendorID) { fPreferDiscardableMSAAAttachment = true; fSupportsMemorylessAttachments = true; } @@ -524,13 +524,13 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie } // On Mali galaxy s7 we see lots of rendering issues when we suballocate VkImages. - if (kARM_VkVendor == properties.vendorID && androidAPIVersion <= 28) { + if ((kARM_VkVendor == properties.vendorID && androidAPIVersion <= 28) || kHisi_VkVendor == properties.vendorID) { fShouldAlwaysUseDedicatedImageMemory = true; } // On Mali galaxy s7 and s9 we see lots of rendering issues with image filters dropping out when // using only primary command buffers. We also see issues on the P30 running android 28. - if (kARM_VkVendor == properties.vendorID && androidAPIVersion <= 28) { + if ((kARM_VkVendor == properties.vendorID && androidAPIVersion <= 28) || kHisi_VkVendor == properties.vendorID) { fPreferPrimaryOverSecondaryCommandBuffers = false; // If we are using secondary command buffers our code isn't setup to insert barriers into // the secondary cb so we need to disable support for them. @@ -548,7 +548,7 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie // On the Mali G76 and T880, the Perlin noise code needs to aggressively snap to multiples // of 1/255 to avoid artifacts in the double table lookup. - if (kARM_VkVendor == properties.vendorID) { + if (kARM_VkVendor == properties.vendorID || kHisi_VkVendor == properties.vendorID) { fShaderCaps->fPerlinNoiseRoundingFix = true; } @@ -562,7 +562,8 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie // On Qualcomm and Arm the gpu resolves an area larger than the render pass bounds when using // discardable msaa attachments. This causes the resolve to resolve uninitialized data from the // msaa image into the resolve image. - if (kQualcomm_VkVendor == properties.vendorID || kARM_VkVendor == properties.vendorID) { + if (kQualcomm_VkVendor == properties.vendorID || kARM_VkVendor == properties.vendorID + || kHisi_VkVendor == properties.vendorID) { fMustLoadFullImageWithDiscardableMSAA = true; } @@ -587,7 +588,9 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie if (kARM_VkVendor == properties.vendorID) { fAvoidWritePixelsFastPath = true; // bugs.skia.org/8064 } - + if (kHisi_VkVendor == properties.vendorID) { + fAvoidWritePixelsFastPath = false; // bugs.skia.org/8064 + } // AMD advertises support for MAX_UINT vertex input attributes, but in reality only supports 32. if (kAMD_VkVendor == properties.vendorID) { fMaxVertexAttributes = std::min(fMaxVertexAttributes, 32); @@ -618,7 +621,7 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie // On ARM indirect draws are broken on Android 9 and earlier. This was tested on a P30 and // Mate 20x running android 9. - if (properties.vendorID == kARM_VkVendor && androidAPIVersion <= 28) { + if ((properties.vendorID == kARM_VkVendor && androidAPIVersion <= 28) || kHisi_VkVendor == properties.vendorID) { fNativeDrawIndirectSupport = false; } @@ -632,7 +635,7 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie // ARM GPUs calculate `matrix * vector` in SPIR-V at full precision, even when the inputs are // RelaxedPrecision. Rewriting the multiply as a sum of vector*scalar fixes this. (skia:11769) - if (kARM_VkVendor == properties.vendorID) { + if (kARM_VkVendor == properties.vendorID || kHisi_VkVendor == properties.vendorID) { fShaderCaps->fRewriteMatrixVectorMultiply = true; } } @@ -711,7 +714,7 @@ void GrVkCaps::initGrCaps(const skgpu::VulkanInterface* vkInterface, } } - if (kARM_VkVendor == properties.vendorID) { + if (kARM_VkVendor == properties.vendorID || kHisi_VkVendor == properties.vendorID) { fShouldCollapseSrcOverToSrcWhenAble = true; } } @@ -746,7 +749,7 @@ void GrVkCaps::initShaderCaps(const VkPhysicalDeviceProperties& properties, // Assume the minimum precisions mandated by the SPIR-V spec. shaderCaps->fFloatIs32Bits = true; shaderCaps->fHalfIs32Bits = false; - + shaderCaps->fFBFetchSupport = true; shaderCaps->fMaxFragmentSamplers = std::min( std::min(properties.limits.maxPerStageDescriptorSampledImages, properties.limits.maxPerStageDescriptorSamplers), diff --git a/src/gpu/ganesh/vk/GrVkCaps.h b/src/gpu/ganesh/vk/GrVkCaps.h index a6086eda..1608d1a1 100644 --- a/src/gpu/ganesh/vk/GrVkCaps.h +++ b/src/gpu/ganesh/vk/GrVkCaps.h @@ -280,6 +280,7 @@ private: enum VkVendor { kAMD_VkVendor = 4098, kARM_VkVendor = 5045, + kHisi_VkVendor = 6629, kImagination_VkVendor = 4112, kIntel_VkVendor = 32902, kNvidia_VkVendor = 4318, diff --git a/src/gpu/ganesh/vk/GrVkRenderPass.cpp b/src/gpu/ganesh/vk/GrVkRenderPass.cpp index 468b72f6..aa72b0e6 100644 --- a/src/gpu/ganesh/vk/GrVkRenderPass.cpp +++ b/src/gpu/ganesh/vk/GrVkRenderPass.cpp @@ -154,6 +154,7 @@ GrVkRenderPass* GrVkRenderPass::Create(GrVkGpu* gpu, VkSubpassDependency dependencies[2]; int currentDependency = 0; + bool skipSetSubpassDep = false; if (attachmentFlags & kColor_AttachmentFlag) { // set up color attachment @@ -195,6 +196,8 @@ GrVkRenderPass* GrVkRenderPass::Create(GrVkGpu* gpu, dependency.dstStageMask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; dependency.dstAccessMask |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + + skipSetSubpassDep = true; } } @@ -297,9 +300,15 @@ GrVkRenderPass* GrVkRenderPass::Create(GrVkGpu* gpu, createInfo.pAttachments = attachments.begin(); createInfo.subpassCount = subpassCount; createInfo.pSubpasses = subpassDescs; - createInfo.dependencyCount = currentDependency; - createInfo.pDependencies = dependencies; - + // skipSetSubpassDep is a non-specification operation + if (skipSetSubpassDep && currentDependency == 1) { + createInfo.dependencyCount = 0; + createInfo.pDependencies = nullptr; + } else { + createInfo.dependencyCount = currentDependency; + createInfo.pDependencies = dependencies; + } + VkResult result; VkRenderPass renderPass; GR_VK_CALL_RESULT(gpu, result, CreateRenderPass(gpu->device(), diff --git a/src/gpu/graphite/vk/VulkanCaps.cpp b/src/gpu/graphite/vk/VulkanCaps.cpp index b3540fc8..3d9fed72 100644 --- a/src/gpu/graphite/vk/VulkanCaps.cpp +++ b/src/gpu/graphite/vk/VulkanCaps.cpp @@ -54,7 +54,7 @@ void VulkanCaps::init(const skgpu::VulkanInterface* vkInterface, // Enable the use of memoryless attachments for tiler GPUs (ARM Mali and Qualcomm Adreno). if (physDevProperties.vendorID == kARM_VkVendor || - physDevProperties.vendorID == kQualcomm_VkVendor) { + physDevProperties.vendorID == kQualcomm_VkVendor || kHisi_VkVendor == physDevProperties.vendorID) { fSupportsMemorylessAttachments = true; } -- Gitee