From b30a4c6bac999c0528a5fe19b3df14bcce9af8db Mon Sep 17 00:00:00 2001
From: yuchen <yuchen49@huawei.com>
Date: Tue, 31 Dec 2024 16:27:55 +0800
Subject: [PATCH] =?UTF-8?q?skia=E9=80=82=E9=85=8Dvulkan?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: yuchen <yuchen49@huawei.com>
---
 src/gpu/ganesh/GrGpuBuffer.cpp              |  2 +-
 src/gpu/ganesh/ops/SoftwarePathRenderer.cpp |  2 +-
 src/gpu/ganesh/vk/GrVkCaps.cpp              | 23 ++++++++++++---------
 src/gpu/ganesh/vk/GrVkCaps.h                |  1 +
 src/gpu/ganesh/vk/GrVkRenderPass.cpp        | 15 +++++++++++---
 src/gpu/graphite/vk/VulkanCaps.cpp          |  2 +-
 6 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/src/gpu/ganesh/GrGpuBuffer.cpp b/src/gpu/ganesh/GrGpuBuffer.cpp
index 011e3ae1..1bfd6b4c 100644
--- a/src/gpu/ganesh/GrGpuBuffer.cpp
+++ b/src/gpu/ganesh/GrGpuBuffer.cpp
@@ -89,7 +89,7 @@ void GrGpuBuffer::ComputeScratchKeyForDynamicBuffer(size_t size,
 }
 
 void GrGpuBuffer::computeScratchKey(skgpu::ScratchKey* key) const {
-    if (SkIsPow2(fSizeInBytes) && kDynamic_GrAccessPattern == fAccessPattern) {
+    if (kDynamic_GrAccessPattern == fAccessPattern) {
         ComputeScratchKeyForDynamicBuffer(fSizeInBytes, fIntendedType, key);
     }
 }
diff --git a/src/gpu/ganesh/ops/SoftwarePathRenderer.cpp b/src/gpu/ganesh/ops/SoftwarePathRenderer.cpp
index b6a9e5a6..b1f32870 100644
--- a/src/gpu/ganesh/ops/SoftwarePathRenderer.cpp
+++ b/src/gpu/ganesh/ops/SoftwarePathRenderer.cpp
@@ -297,7 +297,7 @@ bool SoftwarePathRenderer::onDrawPath(const DrawPathArgs& args) {
         builder[0] = boundsForMask->width();
         builder[1] = boundsForMask->height();
 
-#ifdef SK_BUILD_FOR_ANDROID_FRAMEWORK
+#ifdef SK_BUILD_FOR_ANDROID_FRAMEWORK || defined(SK_BUILD_FOR_OHOS)
         // Fractional translate does not affect caching on Android. This is done for better cache
         // hit ratio and speed, but it is matching HWUI behavior, which doesn't consider the matrix
         // at all when caching paths.
diff --git a/src/gpu/ganesh/vk/GrVkCaps.cpp b/src/gpu/ganesh/vk/GrVkCaps.cpp
index 9e19ec17..b5d225ec 100644
--- a/src/gpu/ganesh/vk/GrVkCaps.cpp
+++ b/src/gpu/ganesh/vk/GrVkCaps.cpp
@@ -420,7 +420,7 @@ void GrVkCaps::init(const GrContextOptions& contextOptions,
     // we do expect this to be a big win on tilers.
     //
     // On ARM devices we are seeing an average perf win of around 50%-60% across the board.
-    if (kARM_VkVendor == properties.vendorID) {
+    if (kARM_VkVendor == properties.vendorID || kHisi_VkVendor == properties.vendorID) {
         fPreferDiscardableMSAAAttachment = true;
         fSupportsMemorylessAttachments = true;
     }
@@ -524,13 +524,13 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie
     }
 
     // On Mali galaxy s7 we see lots of rendering issues when we suballocate VkImages.
-    if (kARM_VkVendor == properties.vendorID && androidAPIVersion <= 28) {
+    if ((kARM_VkVendor == properties.vendorID && androidAPIVersion <= 28) || kHisi_VkVendor == properties.vendorID) {
         fShouldAlwaysUseDedicatedImageMemory = true;
     }
 
     // On Mali galaxy s7 and s9 we see lots of rendering issues with image filters dropping out when
     // using only primary command buffers. We also see issues on the P30 running android 28.
-    if (kARM_VkVendor == properties.vendorID && androidAPIVersion <= 28) {
+    if ((kARM_VkVendor == properties.vendorID && androidAPIVersion <= 28) || kHisi_VkVendor == properties.vendorID) {
         fPreferPrimaryOverSecondaryCommandBuffers = false;
         // If we are using secondary command buffers our code isn't setup to insert barriers into
         // the secondary cb so we need to disable support for them.
@@ -548,7 +548,7 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie
 
     // On the Mali G76 and T880, the Perlin noise code needs to aggressively snap to multiples
     // of 1/255 to avoid artifacts in the double table lookup.
-    if (kARM_VkVendor == properties.vendorID) {
+    if (kARM_VkVendor == properties.vendorID || kHisi_VkVendor  == properties.vendorID) {
         fShaderCaps->fPerlinNoiseRoundingFix = true;
     }
 
@@ -562,7 +562,8 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie
     // On Qualcomm and Arm the gpu resolves an area larger than the render pass bounds when using
     // discardable msaa attachments. This causes the resolve to resolve uninitialized data from the
     // msaa image into the resolve image.
-    if (kQualcomm_VkVendor == properties.vendorID || kARM_VkVendor == properties.vendorID) {
+    if (kQualcomm_VkVendor == properties.vendorID || kARM_VkVendor == properties.vendorID
+        || kHisi_VkVendor == properties.vendorID) {
         fMustLoadFullImageWithDiscardableMSAA = true;
     }
 
@@ -587,7 +588,9 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie
     if (kARM_VkVendor == properties.vendorID) {
         fAvoidWritePixelsFastPath = true; // bugs.skia.org/8064
     }
-
+    if (kHisi_VkVendor == properties.vendorID) {
+        fAvoidWritePixelsFastPath = false; // bugs.skia.org/8064
+    }
     // AMD advertises support for MAX_UINT vertex input attributes, but in reality only supports 32.
     if (kAMD_VkVendor == properties.vendorID) {
         fMaxVertexAttributes = std::min(fMaxVertexAttributes, 32);
@@ -618,7 +621,7 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie
 
     // On ARM indirect draws are broken on Android 9 and earlier. This was tested on a P30 and
     // Mate 20x running android 9.
-    if (properties.vendorID == kARM_VkVendor && androidAPIVersion <= 28) {
+    if ((properties.vendorID == kARM_VkVendor && androidAPIVersion <= 28) || kHisi_VkVendor == properties.vendorID) {
         fNativeDrawIndirectSupport = false;
     }
 
@@ -632,7 +635,7 @@ void GrVkCaps::applyDriverCorrectnessWorkarounds(const VkPhysicalDevicePropertie
 
     // ARM GPUs calculate `matrix * vector` in SPIR-V at full precision, even when the inputs are
     // RelaxedPrecision. Rewriting the multiply as a sum of vector*scalar fixes this. (skia:11769)
-    if (kARM_VkVendor == properties.vendorID) {
+    if (kARM_VkVendor == properties.vendorID || kHisi_VkVendor == properties.vendorID) {
         fShaderCaps->fRewriteMatrixVectorMultiply = true;
     }
 }
@@ -711,7 +714,7 @@ void GrVkCaps::initGrCaps(const skgpu::VulkanInterface* vkInterface,
         }
     }
 
-    if (kARM_VkVendor == properties.vendorID) {
+    if (kARM_VkVendor == properties.vendorID || kHisi_VkVendor == properties.vendorID) {
         fShouldCollapseSrcOverToSrcWhenAble = true;
     }
 }
@@ -746,7 +749,7 @@ void GrVkCaps::initShaderCaps(const VkPhysicalDeviceProperties& properties,
     // Assume the minimum precisions mandated by the SPIR-V spec.
     shaderCaps->fFloatIs32Bits = true;
     shaderCaps->fHalfIs32Bits = false;
-
+    shaderCaps->fFBFetchSupport = true;
     shaderCaps->fMaxFragmentSamplers = std::min(
                                        std::min(properties.limits.maxPerStageDescriptorSampledImages,
                                               properties.limits.maxPerStageDescriptorSamplers),
diff --git a/src/gpu/ganesh/vk/GrVkCaps.h b/src/gpu/ganesh/vk/GrVkCaps.h
index a6086eda..1608d1a1 100644
--- a/src/gpu/ganesh/vk/GrVkCaps.h
+++ b/src/gpu/ganesh/vk/GrVkCaps.h
@@ -280,6 +280,7 @@ private:
     enum VkVendor {
         kAMD_VkVendor = 4098,
         kARM_VkVendor = 5045,
+        kHisi_VkVendor = 6629,
         kImagination_VkVendor = 4112,
         kIntel_VkVendor = 32902,
         kNvidia_VkVendor = 4318,
diff --git a/src/gpu/ganesh/vk/GrVkRenderPass.cpp b/src/gpu/ganesh/vk/GrVkRenderPass.cpp
index 468b72f6..aa72b0e6 100644
--- a/src/gpu/ganesh/vk/GrVkRenderPass.cpp
+++ b/src/gpu/ganesh/vk/GrVkRenderPass.cpp
@@ -154,6 +154,7 @@ GrVkRenderPass* GrVkRenderPass::Create(GrVkGpu* gpu,
 
     VkSubpassDependency dependencies[2];
     int currentDependency = 0;
+    bool skipSetSubpassDep = false;
 
     if (attachmentFlags & kColor_AttachmentFlag) {
         // set up color attachment
@@ -195,6 +196,8 @@ GrVkRenderPass* GrVkRenderPass::Create(GrVkGpu* gpu,
 
                 dependency.dstStageMask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
                 dependency.dstAccessMask |= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+
+                skipSetSubpassDep = true;
             }
         }
 
@@ -297,9 +300,15 @@ GrVkRenderPass* GrVkRenderPass::Create(GrVkGpu* gpu,
     createInfo.pAttachments = attachments.begin();
     createInfo.subpassCount = subpassCount;
     createInfo.pSubpasses = subpassDescs;
-    createInfo.dependencyCount = currentDependency;
-    createInfo.pDependencies = dependencies;
-
+    // skipSetSubpassDep is a non-specification operation
+    if (skipSetSubpassDep && currentDependency == 1) {
+        createInfo.dependencyCount = 0;
+        createInfo.pDependencies = nullptr;
+    } else {
+        createInfo.dependencyCount = currentDependency;
+        createInfo.pDependencies = dependencies;
+    }
+    
     VkResult result;
     VkRenderPass renderPass;
     GR_VK_CALL_RESULT(gpu, result, CreateRenderPass(gpu->device(),
diff --git a/src/gpu/graphite/vk/VulkanCaps.cpp b/src/gpu/graphite/vk/VulkanCaps.cpp
index b3540fc8..3d9fed72 100644
--- a/src/gpu/graphite/vk/VulkanCaps.cpp
+++ b/src/gpu/graphite/vk/VulkanCaps.cpp
@@ -54,7 +54,7 @@ void VulkanCaps::init(const skgpu::VulkanInterface* vkInterface,
 
     // Enable the use of memoryless attachments for tiler GPUs (ARM Mali and Qualcomm Adreno).
     if (physDevProperties.vendorID == kARM_VkVendor ||
-        physDevProperties.vendorID == kQualcomm_VkVendor) {
+        physDevProperties.vendorID == kQualcomm_VkVendor || kHisi_VkVendor == physDevProperties.vendorID) {
         fSupportsMemorylessAttachments = true;
     }
 
-- 
Gitee