diff --git a/_build/backend.py b/_build/backend.py index 9e40d0817ef8b7b950487f3cbfce0d2183f14255..c0077bc0a2ce49195c57d409f3bf58a3876d8fe5 100644 --- a/_build/backend.py +++ b/_build/backend.py @@ -328,7 +328,17 @@ def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): 'manylinux_2_27_x86_64', 'manylinux_2_28_x86_64', 'manylinux_2_31_x86_64', + 'manylinux_2_34_x86_64', + 'manylinux_2_35_x86_64', 'linux_x86_64', + 'manylinux2014_aarch64', # NB: equivalent to manylinux_2_17_aarch64 + 'manylinux_2_24_aarch64', + 'manylinux_2_27_aarch64', + 'manylinux_2_28_aarch64', + 'manylinux_2_31_aarch64', + 'manylinux_2_34_aarch64', + 'manylinux_2_35_aarch64', + 'linux_aarch64', ): logging.info('----------------------------------------') logging.info('Trying to delocate to platform: %s', plat) diff --git a/ccsrc/include/CMakeLists.txt b/ccsrc/include/CMakeLists.txt index 0e449bc8c91aabf2f0f498eade03ca1fc50f6a5b..0f805ed9e2d696dca5e01cd37fe60a406b41e73a 100644 --- a/ccsrc/include/CMakeLists.txt +++ b/ccsrc/include/CMakeLists.txt @@ -47,8 +47,14 @@ append_to_property(mq_install_targets GLOBAL include_lib) # ------------------------------------------------------------------------------ install( - DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/config ${CMAKE_CURRENT_LIST_DIR}/core ${CMAKE_CURRENT_LIST_DIR}/ops - ${CMAKE_CURRENT_LIST_DIR}/simulator ${CMAKE_CURRENT_LIST_DIR}/device ${CMAKE_CURRENT_LIST_DIR}/math + DIRECTORY + ${CMAKE_CURRENT_LIST_DIR}/algorithm + ${CMAKE_CURRENT_LIST_DIR}/config + ${CMAKE_CURRENT_LIST_DIR}/core + ${CMAKE_CURRENT_LIST_DIR}/ops + ${CMAKE_CURRENT_LIST_DIR}/simulator + ${CMAKE_CURRENT_LIST_DIR}/device + ${CMAKE_CURRENT_LIST_DIR}/math DESTINATION ${MQ_INSTALL_INCLUDEDIR}/ PATTERN "CPPLINT.cfg" EXCLUDE) diff --git a/ccsrc/include/algorithm/qaia/csr_base.h b/ccsrc/include/algorithm/qaia/csr_base.h new file mode 100644 index 0000000000000000000000000000000000000000..c694701e2b38fa8680f3d61f7e97cfe1ed18f5f4 --- /dev/null +++ b/ccsrc/include/algorithm/qaia/csr_base.h @@ -0,0 +1,40 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CSR_BASE_H +#define CSR_BASE_H + +#include "core/mq_base_types.h" + +namespace mindquantum::sparse { +template +struct CsrBase { + int dim_; + int nnz_; + Index *indptr_; + Index *indices_; + T *data_; + + CsrBase() : dim_(0), nnz_(0), indptr_(nullptr), indices_(nullptr), data_(nullptr) { + } + + CsrBase(int dim, int nnz, Index *indptr, Index *indices, T *data) + : dim_(dim), nnz_(nnz), indptr_(indptr), indices_(indices), data_(data) { + } +}; +} // namespace mindquantum::sparse + +#endif diff --git a/ccsrc/include/algorithm/qaia/detail/common.cuh b/ccsrc/include/algorithm/qaia/detail/common.cuh new file mode 100644 index 0000000000000000000000000000000000000000..6df3ceb1bc9254d22e198f72a6c66f80fda252f1 --- /dev/null +++ b/ccsrc/include/algorithm/qaia/detail/common.cuh @@ -0,0 +1,77 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +static void HandleError(cudaError_t err, const char* file, int line) { + if (err != cudaSuccess) { + printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line); + exit(EXIT_FAILURE); + } + return; +} +#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) + +#define CHECK_CUDA(func) \ + { \ + cudaError_t status = (func); \ + if (status != cudaSuccess) { \ + printf("CUDA API failed at line %d with error: %s (%d)\n", __LINE__, cudaGetErrorString(status), status); \ + } \ + } + +#define CHECK_CUSPARSE(func) \ + { \ + cusparseStatus_t status = (func); \ + if (status != CUSPARSE_STATUS_SUCCESS) { \ + printf("CUSPARSE API failed at line %d with error: %s (%d)\n", __LINE__, cusparseGetErrorString(status), \ + status); \ + } \ + } + +#define CHECK_CUBLAS(call) \ + { \ + cublasStatus_t err; \ + if ((err = (call)) != CUBLAS_STATUS_SUCCESS) { \ + std::cerr << "CUBLAS error in file '" << __FILE__ << "' in line " << __LINE__ << ": " \ + << getCublasErrorString(err) << "." << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ + } + +const char* getCublasErrorString(cublasStatus_t status) { + switch (status) { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; + default: + return "UNKNOWN CUBLAS ERROR"; + } +} diff --git a/ccsrc/include/algorithm/qaia/detail/gpu_sb.cuh b/ccsrc/include/algorithm/qaia/detail/gpu_sb.cuh new file mode 100644 index 0000000000000000000000000000000000000000..62a5cff14dfc456227f811b2111de7296cb19c67 --- /dev/null +++ b/ccsrc/include/algorithm/qaia/detail/gpu_sb.cuh @@ -0,0 +1,47 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_QAIA_DETAIL_GPU_SB_CUH +#define INCLUDE_QAIA_DETAIL_GPU_SB_CUH + +#include "algorithm/qaia/csr_base.h" +#include "algorithm/qaia/detail/para.h" + +namespace mindquantum::algorithm::qaia::detail { + +struct SBBase { + static void dSB_update_int8(mindquantum::sparse::CsrBase csr, double* x, Para paras); + + static void bSB_update_int8(mindquantum::sparse::CsrBase csr, double* x, Para paras); + + static void bSB_update_fp16(mindquantum::sparse::CsrBase csr, double* x, Para paras); + + static void dSB_update_fp16(mindquantum::sparse::CsrBase csr, double* x, Para paras); + + static void dSB_update_h_int8(mindquantum::sparse::CsrBase csr, double* x, Para paras, double* h, int ndim); + + static void bSB_update_h_int8(mindquantum::sparse::CsrBase csr, double* x, Para paras, double* h, int ndim); + + static void bSB_update_h_fp16(mindquantum::sparse::CsrBase csr, double* x, Para paras, double* h, int ndim); + + static void dSB_update_h_fp16(mindquantum::sparse::CsrBase csr, double* x, Para paras, double* h, int ndim); + + static void cublas_warmup(int N, int B); +}; + +} // namespace mindquantum::algorithm::qaia::detail + +#endif diff --git a/ccsrc/include/algorithm/qaia/detail/para.h b/ccsrc/include/algorithm/qaia/detail/para.h new file mode 100644 index 0000000000000000000000000000000000000000..956473829dbebeb0a126081eb6a452af24c856a0 --- /dev/null +++ b/ccsrc/include/algorithm/qaia/detail/para.h @@ -0,0 +1,39 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PARA_H +#define PARA_H + +namespace mindquantum::algorithm::qaia::detail { + +struct Para { + int B; + float xi; + float delta; + float dt; + int n_iter; + + Para() : B(0), xi(0.0f), delta(0.0f), dt(0.0f), n_iter(0) { + } + + Para(int B_, float xi_, float delta_, float dt_, int n_iter_) + : B(B_), xi(xi_), delta(delta_), dt(dt_), n_iter(n_iter_) { + } +}; + +} // namespace mindquantum::algorithm::qaia::detail + +#endif diff --git a/ccsrc/include/algorithm/qaia/detail/sb_helper.h b/ccsrc/include/algorithm/qaia/detail/sb_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..decc60325fb3e81ae7e39d0234199f0a08eaee3f --- /dev/null +++ b/ccsrc/include/algorithm/qaia/detail/sb_helper.h @@ -0,0 +1,97 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SB_HELPER_H +#define SB_HELPER_H + +#include + +#include "algorithm/qaia/csr_base.h" +#include "algorithm/qaia/detail/gpu_sb.cuh" +#include "algorithm/qaia/detail/para.h" + +namespace mindquantum::algorithm::qaia::detail { + +// Helper structure, using template specialization to select different update functions +template +struct SBUpdater; + +template <> +struct SBUpdater<0, half, true> { + static void update(const mindquantum::sparse::CsrBase& csr_matrix, double* raw_x, + const mindquantum::algorithm::qaia::detail::Para& paras, double* raw_h, int ndim) { + mindquantum::algorithm::qaia::detail::SBBase::bSB_update_h_fp16(csr_matrix, raw_x, paras, raw_h, ndim); + } +}; + +template <> +struct SBUpdater<0, int8_t, true> { + static void update(const mindquantum::sparse::CsrBase& csr_matrix, double* raw_x, + const mindquantum::algorithm::qaia::detail::Para& paras, double* raw_h, int ndim) { + mindquantum::algorithm::qaia::detail::SBBase::bSB_update_h_int8(csr_matrix, raw_x, paras, raw_h, ndim); + } +}; + +template <> +struct SBUpdater<1, half, true> { + static void update(const mindquantum::sparse::CsrBase& csr_matrix, double* raw_x, + const mindquantum::algorithm::qaia::detail::Para& paras, double* raw_h, int ndim) { + mindquantum::algorithm::qaia::detail::SBBase::dSB_update_h_fp16(csr_matrix, raw_x, paras, raw_h, ndim); + } +}; + +template <> +struct SBUpdater<1, int8_t, true> { + static void update(const mindquantum::sparse::CsrBase& csr_matrix, double* raw_x, + const mindquantum::algorithm::qaia::detail::Para& paras, double* raw_h, int ndim) { + mindquantum::algorithm::qaia::detail::SBBase::dSB_update_h_int8(csr_matrix, raw_x, paras, raw_h, ndim); + } +}; + +template <> +struct SBUpdater<0, half, false> { + static void update(const mindquantum::sparse::CsrBase& csr_matrix, double* raw_x, + const mindquantum::algorithm::qaia::detail::Para& paras, double* raw_h, int ndim) { + mindquantum::algorithm::qaia::detail::SBBase::bSB_update_fp16(csr_matrix, raw_x, paras); + } +}; + +template <> +struct SBUpdater<0, int8_t, false> { + static void update(const mindquantum::sparse::CsrBase& csr_matrix, double* raw_x, + const mindquantum::algorithm::qaia::detail::Para& paras, double* raw_h, int ndim) { + mindquantum::algorithm::qaia::detail::SBBase::bSB_update_int8(csr_matrix, raw_x, paras); + } +}; + +template <> +struct SBUpdater<1, half, false> { + static void update(const mindquantum::sparse::CsrBase& csr_matrix, double* raw_x, + const mindquantum::algorithm::qaia::detail::Para& paras, double* raw_h, int ndim) { + mindquantum::algorithm::qaia::detail::SBBase::dSB_update_fp16(csr_matrix, raw_x, paras); + } +}; + +template <> +struct SBUpdater<1, int8_t, false> { + static void update(const mindquantum::sparse::CsrBase& csr_matrix, double* raw_x, + const mindquantum::algorithm::qaia::detail::Para& paras, double* raw_h, int ndim) { + mindquantum::algorithm::qaia::detail::SBBase::dSB_update_int8(csr_matrix, raw_x, paras); + } +}; + +} // namespace mindquantum::algorithm::qaia::detail +#endif diff --git a/ccsrc/include/algorithm/qaia/detail/tools.cuh b/ccsrc/include/algorithm/qaia/detail/tools.cuh new file mode 100644 index 0000000000000000000000000000000000000000..35e0be49e45ec5b929375b048e98cb10c679bd76 --- /dev/null +++ b/ccsrc/include/algorithm/qaia/detail/tools.cuh @@ -0,0 +1,216 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TOOLS_CUH +#define TOOLS_CUH + +#include + +#include + +#include +#include + +#include "algorithm/qaia/csr_base.h" +#include "core/mq_base_types.h" + +using mindquantum::Index; + +template +struct Bounds; + +// Specialization for int8_t +template <> +struct Bounds { + static __device__ int8_t UP() { + return 127; + } + static __device__ int8_t LOW() { + return -127; + } +}; + +// Specialization for half +template <> +struct Bounds { + static __device__ half UP() { + return __float2half(1.0f); + } + static __device__ half LOW() { + return __float2half(-1.0f); + } +}; + +// Templated CUDA kernel functions +template +__global__ void sign_kernel(const T* __restrict__ x, T* __restrict__ signx, int size) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size) + return; + + T zero = static_cast(0); + if (x[idx] > zero) { + signx[idx] = Bounds::UP(); + } else if (x[idx] < zero) { + signx[idx] = Bounds::LOW(); + } else { + signx[idx] = zero; + } +} + +#define UP 127 +#define LOW -127 + +__global__ void update_tail_half(const half* __restrict__ tmp, half* x, half* y, int size, float xi, float beta, + float delta, float dt) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size) + return; + y[idx] += static_cast(xi * static_cast(tmp[idx]) + beta * static_cast(x[idx])); + half x_idx = static_cast(x[idx]) + y[idx] * static_cast(delta * dt); + if (x_idx >= static_cast(1.0)) { + x[idx] = 1.0; + y[idx] = 0; + } else if (x_idx <= static_cast(-1.0)) { + x[idx] = -1.0; + y[idx] = 0; + } else { + x[idx] = x_idx; + } +} + +__global__ void update_h_tail_half(const half* __restrict__ tmp, half* x, half* y, half* h, int size, float xi, + float beta, float delta, float dt) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size) + return; + y[idx] += static_cast(xi * static_cast(tmp[idx] + h[idx]) + beta * static_cast(x[idx])); + half x_idx = static_cast(x[idx]) + y[idx] * static_cast(delta * dt); + if (x_idx >= static_cast(1.0)) { + x[idx] = 1.0; + y[idx] = 0; + } else if (x_idx <= static_cast(-1.0)) { + x[idx] = -1.0; + y[idx] = 0; + } else { + x[idx] = x_idx; + } +} + +__global__ void update_tail(const int* __restrict__ tmp, int8_t* x, int* y, int size, float xi, float beta, float delta, + float dt) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size) + return; + y[idx] += static_cast(xi * static_cast(tmp[idx] / 127) + beta * static_cast(x[idx])); + int x_idx = static_cast(x[idx]) + y[idx] * delta * dt; + if (x_idx >= UP) { + x[idx] = UP; + y[idx] = 0; + } else if (x_idx <= LOW) { + x[idx] = LOW; + y[idx] = 0; + } else { + x[idx] = x_idx; + } +} + +__global__ void update_h_tail(const int* __restrict__ tmp, int8_t* x, int* y, int* h, int size, float xi, float beta, + float delta, float dt) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size) + return; + y[idx] += static_cast(xi * static_cast((tmp[idx] + h[idx]) / 127) + beta * static_cast(x[idx])); + int x_idx = static_cast(x[idx]) + y[idx] * delta * dt; + if (x_idx >= UP) { + x[idx] = UP; + y[idx] = 0; + } else if (x_idx <= LOW) { + x[idx] = LOW; + y[idx] = 0; + } else { + x[idx] = x_idx; + } +} + +// Base template (left undefined) +template +__global__ void init_xy(T* array, int size, uint64_t seed); +// Specialization for int +template <> +__global__ void init_xy(int* array, int size, uint64_t seed) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) { + curandState state; + curand_init(seed, idx, 0, &state); + int randomValue = curand(&state) % 7 - 3; + array[idx] = randomValue; + } +} +// Specialization for int8_t +template <> +__global__ void init_xy(int8_t* array, int size, uint64_t seed) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) { + curandState state; + curand_init(seed, idx, 0, &state); + int randomValue = curand(&state) % 7 - 3; + array[idx] = static_cast(randomValue); + } +} +// Specialization for half +template <> +__global__ void init_xy(half* array, int size, uint64_t seed) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) { + curandState state; + curand_init(seed, idx, 0, &state); + float rand_float = curand_uniform(&state) * 0.02f - 0.01f; + array[idx] = __float2half(rand_float); + } +} + +template +struct Value; + +template <> +struct Value { + static int8_t v(double value) { + return static_cast(value * 127.0); + } +}; + +template <> +struct Value { + static half v(double value) { + return static_cast(value); + } +}; + +template +void fill_J(Index* indices, Index* indptr, double* data, std::vector* h_J, int N) { + for (int i = 0; i < N; ++i) { + Index start = indptr[i]; + Index end = indptr[i + 1]; + for (Index j = start; j < end; ++j) { + Index col = indices[j]; + double value = data[j]; + (*h_J)[i * N + col] = Value::v(value); + } + } +} + +#endif diff --git a/ccsrc/lib/CMakeLists.txt b/ccsrc/lib/CMakeLists.txt index e4d688cbe239bec2708903b07ea1f8ac55fe90a6..bbbdf89bd4f356567377fdf3406bd0a8b0f67c3e 100644 --- a/ccsrc/lib/CMakeLists.txt +++ b/ccsrc/lib/CMakeLists.txt @@ -16,6 +16,7 @@ # # ============================================================================== +add_subdirectory(algorithm) add_subdirectory(mq_base) add_subdirectory(simulator) add_subdirectory(device) diff --git a/ccsrc/lib/algorithm/CMakeLists.txt b/ccsrc/lib/algorithm/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3353242ced1eabdcfb7380b5e5b429ac72f27632 --- /dev/null +++ b/ccsrc/lib/algorithm/CMakeLists.txt @@ -0,0 +1,26 @@ +# ============================================================================== +# +# Copyright 2022 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +if(ENABLE_CUDA) + add_library(qaia_sb_gpu STATIC) + target_link_libraries(qaia_sb_gpu PUBLIC CUDA_mindquantum mq_math cublas) + set_target_properties(qaia_sb_gpu PROPERTIES CUDA_RUNTIME_LIBRARY $,Static,Shared>) + force_at_least_cxx17_workaround(qaia_sb_gpu) + append_to_property(mq_install_targets GLOBAL qaia_sb_gpu) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/qaia/detail) +endif() diff --git a/ccsrc/lib/algorithm/qaia/detail/CMakeLists.txt b/ccsrc/lib/algorithm/qaia/detail/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..612e6cc98a27c3f533a7b988f5d1e158212621b1 --- /dev/null +++ b/ccsrc/lib/algorithm/qaia/detail/CMakeLists.txt @@ -0,0 +1,21 @@ +# ============================================================================== +# +# Copyright 2020 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +# lint_cmake: -whitespace/indent + +target_sources(qaia_sb_gpu PRIVATE ${CMAKE_CURRENT_LIST_DIR}/gpu_sb_update.cu) diff --git a/ccsrc/lib/algorithm/qaia/detail/gpu_sb_update.cu b/ccsrc/lib/algorithm/qaia/detail/gpu_sb_update.cu new file mode 100644 index 0000000000000000000000000000000000000000..18f986ba662582693977024a0b41c1277bbdb8c0 --- /dev/null +++ b/ccsrc/lib/algorithm/qaia/detail/gpu_sb_update.cu @@ -0,0 +1,552 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "algorithm/qaia/csr_base.h" +#include "algorithm/qaia/detail/common.cuh" +#include "algorithm/qaia/detail/gpu_sb.cuh" +#include "algorithm/qaia/detail/para.h" +#include "algorithm/qaia/detail/tools.cuh" + +using mindquantum::Index; + +namespace mindquantum::algorithm::qaia::detail { + +void SBBase::dSB_update_int8(mindquantum::sparse::CsrBase csr, double* x, Para paras) { + Index* indptr = csr.indptr_; + Index* indices = csr.indices_; + double* data = csr.data_; + int N = csr.dim_; + int B = paras.B; + float xi = paras.xi; + float delta = paras.delta; + float dt = paras.dt; + int n_iter = paras.n_iter; + int NN = N * N; + int NB = N * B; + + std::vector h_J(NN, 0); + std::vector h_x(NB); + std::vector h_y(NB); + + fill_J(indices, indptr, data, &h_J, N); + + int8_t *d_J, *d_x, *signx; + int *d_y, *tmp; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), NN * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), NB * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_y), NB * sizeof(int))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), NB * sizeof(int))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&signx), NB * sizeof(int8_t))); + HANDLE_ERROR(cudaMemcpy(d_J, h_J.data(), NN * sizeof(int8_t), cudaMemcpyHostToDevice)); + + init_xy<<<(NB + 255) / 256, 256>>>(d_x, NB, time(NULL)); + init_xy<<<(NB + 255) / 256, 256>>>(d_y, NB, time(NULL)); + + cublasHandle_t handle; + cublasCreate(&handle); + int alpha = 1; + int b = 0; + for (int i = 0; i < n_iter; i++) { + float beta = (n_iter == 1) ? -delta : static_cast(i) / (n_iter - 1) - delta; + + sign_kernel<<<(NB + 255) / 256, 256>>>(d_x, signx, NB); + + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_T, CUBLAS_OP_N, N, B, N, &alpha, d_J, CUDA_R_8I, N, signx, + CUDA_R_8I, N, &b, tmp, CUDA_R_32I, N, CUDA_R_32I, CUBLAS_GEMM_ALGO15_TENSOR_OP)); + + update_tail<<<(NB + 255) / 256, 256>>>(tmp, d_x, d_y, NB, xi, beta, delta, dt); + } + CHECK_CUDA(cudaMemcpy(h_x.data(), d_x, NB * sizeof(int8_t), cudaMemcpyDeviceToHost)) + + for (int i = 0; i < N; i++) { + for (int j = 0; j < B; j++) { + x[i * B + j] = static_cast(h_x[j * N + i]) / 127.0; + } + } + + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(d_y)); + HANDLE_ERROR(cudaFree(tmp)); + HANDLE_ERROR(cudaFree(signx)); +} + +void SBBase::bSB_update_int8(mindquantum::sparse::CsrBase csr, double* x, Para paras) { + Index* indptr = csr.indptr_; + Index* indices = csr.indices_; + double* data = csr.data_; + int N = csr.dim_; + int B = paras.B; + float xi = paras.xi; + float delta = paras.delta; + float dt = paras.dt; + int n_iter = paras.n_iter; + int NN = N * N; + int NB = N * B; + + std::vector h_J(NN, 0); + std::vector h_x(NB); + std::vector h_y(NB); + + fill_J(indices, indptr, data, &h_J, N); + + int8_t *d_J, *d_x; + int *d_y, *tmp; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), NN * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), NB * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_y), NB * sizeof(int))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), NB * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_J, h_J.data(), NN * sizeof(int8_t), cudaMemcpyHostToDevice)); + + init_xy<<<(NB + 255) / 256, 256>>>(d_x, NB, time(NULL)); + init_xy<<<(NB + 255) / 256, 256>>>(d_y, NB, time(NULL)); + + cublasHandle_t handle; + cublasCreate(&handle); + int alpha = 1; + int b = 0; + for (int i = 0; i < n_iter; i++) { + float beta = (n_iter == 1) ? -delta : static_cast(i) / (n_iter - 1) - delta; + + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_T, CUBLAS_OP_N, N, B, N, &alpha, d_J, CUDA_R_8I, N, d_x, CUDA_R_8I, + N, &b, tmp, CUDA_R_32I, N, CUDA_R_32I, CUBLAS_GEMM_ALGO15_TENSOR_OP)); + + update_tail<<<(NB + 255) / 256, 256>>>(tmp, d_x, d_y, NB, xi, beta, delta, dt); + } + CHECK_CUDA(cudaMemcpy(h_x.data(), d_x, NB * sizeof(int8_t), cudaMemcpyDeviceToHost)) + + for (int i = 0; i < N; i++) { + for (int j = 0; j < B; j++) { + x[i * B + j] = static_cast(h_x[j * N + i]) / 127.0; + } + } + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(d_y)); + HANDLE_ERROR(cudaFree(tmp)); +} + +void SBBase::bSB_update_fp16(mindquantum::sparse::CsrBase csr, double* x, Para paras) { + Index* indptr = csr.indptr_; + Index* indices = csr.indices_; + double* data = csr.data_; + int N = csr.dim_; + int B = paras.B; + float xi = paras.xi; + float delta = paras.delta; + float dt = paras.dt; + int n_iter = paras.n_iter; + int NN = N * N; + int NB = N * B; + + std::vector h_J(NN, 0); + std::vector h_x(NB); + std::vector h_y(NB); + + fill_J(indices, indptr, data, &h_J, N); + + half *d_J, *d_x; + half *d_y, *tmp; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), NN * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_y), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), NB * sizeof(half))); + HANDLE_ERROR(cudaMemcpy(d_J, h_J.data(), NN * sizeof(half), cudaMemcpyHostToDevice)); + + init_xy<<<(NB + 255) / 256, 256>>>(d_x, NB, time(NULL)); + init_xy<<<(NB + 255) / 256, 256>>>(d_y, NB, time(NULL)); + + cublasHandle_t handle; + cublasCreate(&handle); + // cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH); + half b = 0.0; + half alpha = 1.0; + for (int i = 0; i < n_iter; i++) { + float beta = (n_iter == 1) ? -delta : static_cast(i) / (n_iter - 1) - delta; + + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_N, CUBLAS_OP_N, B, N, N, &alpha, d_x, CUDA_R_16F, B, d_J, + CUDA_R_16F, N, &b, tmp, CUDA_R_16F, B, CUBLAS_COMPUTE_16F, + CUBLAS_GEMM_ALGO15_TENSOR_OP)); + + update_tail_half<<<(NB + 255) / 256, 256>>>(tmp, d_x, d_y, NB, xi, beta, delta, dt); + } + CHECK_CUDA(cudaMemcpy(h_x.data(), d_x, NB * sizeof(half), cudaMemcpyDeviceToHost)) + + for (int i = 0; i < N; i++) { + for (int j = 0; j < B; j++) { + x[i * B + j] = static_cast(h_x[i * B + j]); + } + } + + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(d_y)); + HANDLE_ERROR(cudaFree(tmp)); +} + +void SBBase::dSB_update_fp16(mindquantum::sparse::CsrBase csr, double* x, Para paras) { + Index* indptr = csr.indptr_; + Index* indices = csr.indices_; + double* data = csr.data_; + int N = csr.dim_; + int B = paras.B; + float xi = paras.xi; + float delta = paras.delta; + float dt = paras.dt; + int n_iter = paras.n_iter; + int NN = N * N; + int NB = N * B; + + std::vector h_J(NN, 0); + std::vector h_x(NB); + std::vector h_y(NB); + + fill_J(indices, indptr, data, &h_J, N); + + half *d_J, *d_x; + half *d_y, *tmp, *signx; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), NN * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_y), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&signx), NB * sizeof(half))); + HANDLE_ERROR(cudaMemcpy(d_J, h_J.data(), NN * sizeof(half), cudaMemcpyHostToDevice)); + + init_xy<<<(NB + 255) / 256, 256>>>(d_x, NB, time(NULL)); + init_xy<<<(NB + 255) / 256, 256>>>(d_y, NB, time(NULL)); + + cublasHandle_t handle; + cublasCreate(&handle); + half b = 0.0; + half alpha = 1.0; + for (int i = 0; i < n_iter; i++) { + float beta = (n_iter == 1) ? -delta : static_cast(i) / (n_iter - 1) - delta; + + sign_kernel<<<(NB + 255) / 256, 256>>>(d_x, signx, NB); + + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_N, CUBLAS_OP_N, B, N, N, &alpha, signx, CUDA_R_16F, B, d_J, + CUDA_R_16F, N, &b, tmp, CUDA_R_16F, B, CUBLAS_COMPUTE_16F, + CUBLAS_GEMM_ALGO15_TENSOR_OP)); + + update_tail_half<<<(NB + 255) / 256, 256>>>(tmp, d_x, d_y, NB, xi, beta, delta, dt); + } + CHECK_CUDA(cudaMemcpy(h_x.data(), d_x, NB * sizeof(half), cudaMemcpyDeviceToHost)) + + for (int i = 0; i < N; i++) { + for (int j = 0; j < B; j++) { + x[i * B + j] = static_cast(h_x[i * B + j]); + } + } + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(d_y)); + HANDLE_ERROR(cudaFree(tmp)); + HANDLE_ERROR(cudaFree(signx)); +} + +void SBBase::dSB_update_h_int8(mindquantum::sparse::CsrBase csr, double* x, Para paras, double* h, int ndim) { + Index* indptr = csr.indptr_; + Index* indices = csr.indices_; + double* data = csr.data_; + int N = csr.dim_; + int B = paras.B; + float xi = paras.xi; + float delta = paras.delta; + float dt = paras.dt; + int n_iter = paras.n_iter; + int NN = N * N; + int NB = N * B; + + std::vector h_J(NN, 0); + std::vector h_x(NB); + std::vector h_y(NB); + + fill_J(indices, indptr, data, &h_J, N); + + int8_t *d_J, *d_x, *signx; + int *d_y, *tmp, *d_h; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), NN * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), NB * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_y), NB * sizeof(int))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), NB * sizeof(int))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&signx), NB * sizeof(int8_t))); + HANDLE_ERROR(cudaMemcpy(d_J, h_J.data(), NN * sizeof(int8_t), cudaMemcpyHostToDevice)); + std::vector h_h(NB, 0); + for (int i = 0; i < NB; i++) { + h_h[i] = static_cast(h[i] * 127); + } + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_h), NB * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_h, h_h.data(), NB * sizeof(int), cudaMemcpyHostToDevice)); + + init_xy<<<(NB + 255) / 256, 256>>>(d_x, NB, time(NULL)); + init_xy<<<(NB + 255) / 256, 256>>>(d_y, NB, time(NULL)); + + cublasHandle_t handle; + cublasCreate(&handle); + int alpha = 1; + int b = 0; + for (int i = 0; i < n_iter; i++) { + float beta = (n_iter == 1) ? -delta : static_cast(i) / (n_iter - 1) - delta; + + sign_kernel<<<(NB + 255) / 256, 256>>>(d_x, signx, NB); + + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_T, CUBLAS_OP_N, N, B, N, &alpha, d_J, CUDA_R_8I, N, signx, + CUDA_R_8I, N, &b, tmp, CUDA_R_32I, N, CUDA_R_32I, CUBLAS_GEMM_ALGO15_TENSOR_OP)); + + update_h_tail<<<(NB + 255) / 256, 256>>>(tmp, d_x, d_y, d_h, NB, xi, beta, delta, dt); + } + CHECK_CUDA(cudaMemcpy(h_x.data(), d_x, NB * sizeof(int8_t), cudaMemcpyDeviceToHost)) + + for (int i = 0; i < N; i++) { + for (int j = 0; j < B; j++) { + x[i * B + j] = static_cast(h_x[j * N + i]) / 127.0; + } + } + HANDLE_ERROR(cudaFree(d_h)); + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(d_y)); + HANDLE_ERROR(cudaFree(tmp)); + HANDLE_ERROR(cudaFree(signx)); +} + +void SBBase::bSB_update_h_int8(mindquantum::sparse::CsrBase csr, double* x, Para paras, double* h, int ndim) { + Index* indptr = csr.indptr_; + Index* indices = csr.indices_; + double* data = csr.data_; + int N = csr.dim_; + int B = paras.B; + float xi = paras.xi; + float delta = paras.delta; + float dt = paras.dt; + int n_iter = paras.n_iter; + int NN = N * N; + int NB = N * B; + + std::vector h_J(NN, 0); + std::vector h_x(NB); + std::vector h_y(NB); + + fill_J(indices, indptr, data, &h_J, N); + + int8_t *d_J, *d_x; + int *d_y, *tmp, *d_h; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), NN * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), NB * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_y), NB * sizeof(int))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), NB * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_J, h_J.data(), NN * sizeof(int8_t), cudaMemcpyHostToDevice)); + std::vector h_h(NB, 0); + for (int i = 0; i < NB; i++) { + h_h[i] = static_cast(data[i] * 127); + } + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_h), NB * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_h, h_h.data(), NB * sizeof(int), cudaMemcpyHostToDevice)); + + init_xy<<<(NB + 255) / 256, 256>>>(d_x, NB, time(NULL)); + init_xy<<<(NB + 255) / 256, 256>>>(d_y, NB, time(NULL)); + + cublasHandle_t handle; + cublasCreate(&handle); + int alpha = 1; + int b = 0; + for (int i = 0; i < n_iter; i++) { + float beta = (n_iter == 1) ? -delta : static_cast(i) / (n_iter - 1) - delta; + + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_T, CUBLAS_OP_N, N, B, N, &alpha, d_J, CUDA_R_8I, N, d_x, CUDA_R_8I, + N, &b, tmp, CUDA_R_32I, N, CUDA_R_32I, CUBLAS_GEMM_ALGO15_TENSOR_OP)); + + update_h_tail<<<(NB + 255) / 256, 256>>>(tmp, d_x, d_y, d_h, NB, xi, beta, delta, dt); + } + CHECK_CUDA(cudaMemcpy(h_x.data(), d_x, NB * sizeof(int8_t), cudaMemcpyDeviceToHost)) + + for (int i = 0; i < N; i++) { + for (int j = 0; j < B; j++) { + x[i * B + j] = static_cast(h_x[j * N + i]) / 127.0; + } + } + + HANDLE_ERROR(cudaFree(d_h)); + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(d_y)); + HANDLE_ERROR(cudaFree(tmp)); +} + +void SBBase::bSB_update_h_fp16(mindquantum::sparse::CsrBase csr, double* x, Para paras, double* h, int ndim) { + Index* indptr = csr.indptr_; + Index* indices = csr.indices_; + double* data = csr.data_; + int N = csr.dim_; + int B = paras.B; + float xi = paras.xi; + float delta = paras.delta; + float dt = paras.dt; + int n_iter = paras.n_iter; + int NN = N * N; + int NB = N * B; + + std::vector h_J(NN, 0); + std::vector h_x(NB); + std::vector h_y(NB); + + fill_J(indices, indptr, data, &h_J, N); + + half *d_J, *d_x; + half *d_y, *tmp, *d_h; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), NN * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_y), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), NB * sizeof(half))); + HANDLE_ERROR(cudaMemcpy(d_J, h_J.data(), NN * sizeof(half), cudaMemcpyHostToDevice)); + std::vector h_h(NB, 0); + for (int i = 0; i < NB; i++) { + h_h[i] = static_cast(h[i]); + } + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_h), NB * sizeof(half))); + HANDLE_ERROR(cudaMemcpy(d_h, h_h.data(), NB * sizeof(half), cudaMemcpyHostToDevice)); + + init_xy<<<(NB + 255) / 256, 256>>>(d_x, NB, time(NULL)); + init_xy<<<(NB + 255) / 256, 256>>>(d_y, NB, time(NULL)); + + cublasHandle_t handle; + cublasCreate(&handle); + // cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH); + half b = 0.0; + half alpha = 1.0; + for (int i = 0; i < n_iter; i++) { + float beta = (n_iter == 1) ? -delta : static_cast(i) / (n_iter - 1) - delta; + + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_N, CUBLAS_OP_N, B, N, N, &alpha, d_x, CUDA_R_16F, B, d_J, + CUDA_R_16F, N, &b, tmp, CUDA_R_16F, B, CUBLAS_COMPUTE_16F, + CUBLAS_GEMM_ALGO15_TENSOR_OP)); + + update_h_tail_half<<<(NB + 255) / 256, 256>>>(tmp, d_x, d_y, d_h, NB, xi, beta, delta, dt); + } + CHECK_CUDA(cudaMemcpy(h_x.data(), d_x, NB * sizeof(half), cudaMemcpyDeviceToHost)) + + for (int i = 0; i < N; i++) { + for (int j = 0; j < B; j++) { + x[i * B + j] = static_cast(h_x[i * B + j]); + } + } + + HANDLE_ERROR(cudaFree(d_h)); + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(d_y)); + HANDLE_ERROR(cudaFree(tmp)); +} + +void SBBase::dSB_update_h_fp16(mindquantum::sparse::CsrBase csr, double* x, Para paras, double* h, int ndim) { + Index* indptr = csr.indptr_; + Index* indices = csr.indices_; + double* data = csr.data_; + int N = csr.dim_; + int B = paras.B; + float xi = paras.xi; + float delta = paras.delta; + float dt = paras.dt; + int n_iter = paras.n_iter; + int NN = N * N; + int NB = N * B; + + std::vector h_J(NN, 0); + std::vector h_x(NB); + std::vector h_y(NB); + + fill_J(indices, indptr, data, &h_J, N); + + half *d_J, *d_x; + half *d_y, *tmp, *d_h, *signx; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), NN * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_y), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), NB * sizeof(half))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&signx), NB * sizeof(half))); + HANDLE_ERROR(cudaMemcpy(d_J, h_J.data(), NN * sizeof(half), cudaMemcpyHostToDevice)); + std::vector h_h(NB, 0); + for (int i = 0; i < NB; i++) { + h_h[i] = static_cast(h[i]); + } + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_h), NB * sizeof(half))); + HANDLE_ERROR(cudaMemcpy(d_h, h_h.data(), NB * sizeof(half), cudaMemcpyHostToDevice)); + + init_xy<<<(NB + 255) / 256, 256>>>(d_x, NB, time(NULL)); + init_xy<<<(NB + 255) / 256, 256>>>(d_y, NB, time(NULL)); + + cublasHandle_t handle; + cublasCreate(&handle); + half b = 0.0; + half alpha = 1.0; + for (int i = 0; i < n_iter; i++) { + float beta = (n_iter == 1) ? -delta : static_cast(i) / (n_iter - 1) - delta; + + sign_kernel<<<(NB + 255) / 256, 256>>>(d_x, signx, NB); + + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_N, CUBLAS_OP_N, B, N, N, &alpha, signx, CUDA_R_16F, B, d_J, + CUDA_R_16F, N, &b, tmp, CUDA_R_16F, B, CUBLAS_COMPUTE_16F, + CUBLAS_GEMM_ALGO15_TENSOR_OP)); + + update_h_tail_half<<<(NB + 255) / 256, 256>>>(tmp, d_x, d_y, d_h, NB, xi, beta, delta, dt); + } + CHECK_CUDA(cudaMemcpy(h_x.data(), d_x, NB * sizeof(half), cudaMemcpyDeviceToHost)) + + for (int i = 0; i < N; i++) { + for (int j = 0; j < B; j++) { + x[i * B + j] = static_cast(h_x[i * B + j]); + } + } + + HANDLE_ERROR(cudaFree(d_h)); + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(d_y)); + HANDLE_ERROR(cudaFree(tmp)); + HANDLE_ERROR(cudaFree(signx)); +} + +void SBBase::cublas_warmup(int N, int B) { + int8_t *d_J, *d_x; + int* tmp; + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_J), N * N * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&d_x), N * B * sizeof(int8_t))); + HANDLE_ERROR(cudaMalloc(reinterpret_cast(&tmp), N * B * sizeof(int))); + cublasHandle_t handle; + cublasCreate(&handle); + int alpha = 1; + int beta = 0; + for (int i = 0; i < 100; i++) { + CHECK_CUBLAS(cublasGemmEx(handle, CUBLAS_OP_T, CUBLAS_OP_N, N, B, N, &alpha, d_J, CUDA_R_8I, N, d_x, CUDA_R_8I, + N, &beta, tmp, CUDA_R_32I, N, CUDA_R_32I, CUBLAS_GEMM_ALGO15_TENSOR_OP)); + } + HANDLE_ERROR(cudaFree(d_J)); + HANDLE_ERROR(cudaFree(d_x)); + HANDLE_ERROR(cudaFree(tmp)); +} + +} // namespace mindquantum::algorithm::qaia::detail diff --git a/ccsrc/python/CMakeLists.txt b/ccsrc/python/CMakeLists.txt index 1cea0d7bf2556d75840cddb57f026b2f5b30de5b..b2fa5403da5d6b53ff5cdebf8b02100ba2d0b910 100644 --- a/ccsrc/python/CMakeLists.txt +++ b/ccsrc/python/CMakeLists.txt @@ -20,6 +20,7 @@ add_subdirectory(core) # ============================================================================== +add_subdirectory(algorithm) add_subdirectory(mqbackend) add_subdirectory(simulator) add_subdirectory(device) diff --git a/ccsrc/python/algorithm/CMakeLists.txt b/ccsrc/python/algorithm/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..66da5c23b0a27b4c4cfa06be39b79ab4f172e687 --- /dev/null +++ b/ccsrc/python/algorithm/CMakeLists.txt @@ -0,0 +1,26 @@ +# ============================================================================== +# +# Copyright 2022 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +if(ENABLE_CUDA) + pybind11_add_module(_qaia_sb MODULE + ${CMAKE_CURRENT_SOURCE_DIR}/lib/_qaia_sb.cu + OUTPUT_HINT "${MQ_PYTHON_PACKAGE_NAME}") + force_at_least_cxx17_workaround(_qaia_sb) + target_link_libraries(_qaia_sb PUBLIC mq_python_core qaia_sb_gpu) + set_target_properties(_qaia_sb PROPERTIES CUDA_RUNTIME_LIBRARY $,Static,Shared>) +endif() diff --git a/ccsrc/python/algorithm/lib/_qaia_sb.cu b/ccsrc/python/algorithm/lib/_qaia_sb.cu new file mode 100644 index 0000000000000000000000000000000000000000..de5cbc44bfb97e283223beed742d6dfa9c71d692 --- /dev/null +++ b/ccsrc/python/algorithm/lib/_qaia_sb.cu @@ -0,0 +1,80 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "algorithm/qaia/csr_base.h" +#include "algorithm/qaia/detail/gpu_sb.cuh" +#include "algorithm/qaia/detail/para.h" +#include "algorithm/qaia/detail/sb_helper.h" +#include "config/constexpr_type_name.h" +#include "config/format/std_complex.h" +#include "config/type_traits.h" +#include "core/mq_base_types.h" + +namespace py = pybind11; +using namespace pybind11::literals; // NOLINT(build/namespaces_literals) +using mindquantum::Index; + +template +void sb_update(const py::object& csr, const py::array_t& x, const py::array_t& h, int B, float xi, + float delta, float dt, int n_iter) { + auto indices = csr.attr("indices").cast>(); + auto indptr = csr.attr("indptr").cast>(); + auto data = csr.attr("data").cast>(); + int nnz = data.size(); + Index* raw_indices = static_cast(indices.request().ptr); + Index* raw_indptr = static_cast(indptr.request().ptr); + double* raw_data = static_cast(data.request().ptr); + auto shape = csr.attr("shape").cast(); + int nrows = shape[0].cast(); + int ncols = shape[1].cast(); + int N = nrows; + mindquantum::sparse::CsrBase csr_matrix(N, nnz, raw_indptr, raw_indices, raw_data); + + double* raw_x = static_cast(x.request().ptr); + mindquantum::algorithm::qaia::detail::Para paras(B, xi, delta, dt, n_iter); + + double* raw_h = static_cast(h.request().ptr); + int ndim = h.ndim(); + + mindquantum::algorithm::qaia::detail::SBUpdater::update(csr_matrix, raw_x, paras, raw_h, ndim); +} + +PYBIND11_MODULE(_qaia_sb, module) { + module.def("cuda_init", mindquantum::algorithm::qaia::detail::SBBase::cublas_warmup, "warmup cuBLAS"); + module.def("bsb_update_h_int8", &sb_update<0, int8_t, true>, "BSB update func(int8_t) with h"); + module.def("bsb_update_h_half", &sb_update<0, half, true>, "BSB update func(half) with h"); + module.def("dsb_update_h_int8", &sb_update<1, int8_t, true>, "DSB update func(int8_t) with h"); + module.def("dsb_update_h_half", &sb_update<1, half, true>, "DSB update func(half) with h"); + + module.def("bsb_update_int8", &sb_update<0, int8_t, false>, "BSB update func(int8_t) without h"); + module.def("bsb_update_half", &sb_update<0, half, false>, "BSB update func(half) without h"); + module.def("dsb_update_int8", &sb_update<1, int8_t, false>, "DSB update func(int8_t) without h"); + module.def("dsb_update_half", &sb_update<1, half, false>, "DSB update func(half) without h"); +} diff --git a/docs/api_python/algorithm/compiler/mindquantum.algorithm.compiler.cnry_decompose.rst b/docs/api_python/algorithm/compiler/mindquantum.algorithm.compiler.cnry_decompose.rst new file mode 100644 index 0000000000000000000000000000000000000000..672cb844f33851d81ef56c760481b05f6d3a9313 --- /dev/null +++ b/docs/api_python/algorithm/compiler/mindquantum.algorithm.compiler.cnry_decompose.rst @@ -0,0 +1,12 @@ +mindquantum.algorithm.compiler.cnry_decompose +============================================= + +.. py:function:: mindquantum.algorithm.compiler.cnry_decompose(gate: gates.RY) + + 分解一个受控的 :class:`~.core.gates.RY` 门。 + + 参数: + - **gate** (:class:`~.core.gates.RY`) - 有零或多个控制位的 :class:`~.core.gates.RY` 门。 + + 返回: + List[:class:`~.core.circuit.Circuit`],可能的分解方式。 diff --git a/docs/api_python/algorithm/qaia/mindquantum.algorithm.qaia.BSB.rst b/docs/api_python/algorithm/qaia/mindquantum.algorithm.qaia.BSB.rst index 13396cd74605b60001517b0229a9d742e8a88f41..a5d5b9eff840d7853f6c476fcfdc2f7ed2320b27 100644 --- a/docs/api_python/algorithm/qaia/mindquantum.algorithm.qaia.BSB.rst +++ b/docs/api_python/algorithm/qaia/mindquantum.algorithm.qaia.BSB.rst @@ -1,7 +1,7 @@ mindquantum.algorithm.qaia.BSB =============================== -.. py:class:: mindquantum.algorithm.qaia.BSB(J, h=None, x=None, n_iter=1000, batch_size=1, dt=1, xi=None) +.. py:class:: mindquantum.algorithm.qaia.BSB(J, h=None, x=None, n_iter=1000, batch_size=1, dt=1, xi=None, device='cpu', precision='float32') 弹道模拟分叉算法。 @@ -12,9 +12,11 @@ mindquantum.algorithm.qaia.BSB - **h** (numpy.array) - 外场强度,维度为 :math:`(N, )`。 - **x** (numpy.array) - 自旋初始化配置,维度为 :math:`(N x batch_size)`。默认值: ``None``。 - **n_iter** (int) - 迭代步数。默认值: ``1000``。 - - **batch_size** (int) - 样本个数。默认值为: ``1``。 - - **dt** (float) - 迭代步长。默认值: ``0.1``。 + - **batch_size** (int) - 样本个数。默认值: ``1``。 + - **dt** (float) - 迭代步长。默认值: ``1``。 - **xi** (float) - 频率维数,正的常数。默认值: ``None``。 + - **device** (str) - 计算设备('cpu'或'gpu')。默认值: ``'cpu'``。 + - **precision** (str) - 使用GPU时的精度类型('float32'、'float16'或'int8')。默认值: ``'float32'``。 .. py:method:: update() diff --git a/docs/api_python/algorithm/qaia/mindquantum.algorithm.qaia.DSB.rst b/docs/api_python/algorithm/qaia/mindquantum.algorithm.qaia.DSB.rst index e5463048b601f97d9fae6fdd285ac7472dabfd30..6695fd54de22a3e451a8ef6c5e044c084fb509df 100644 --- a/docs/api_python/algorithm/qaia/mindquantum.algorithm.qaia.DSB.rst +++ b/docs/api_python/algorithm/qaia/mindquantum.algorithm.qaia.DSB.rst @@ -1,7 +1,7 @@ mindquantum.algorithm.qaia.DSB =============================== -.. py:class:: mindquantum.algorithm.qaia.DSB(J, h=None, x=None, n_iter=1000, batch_size=1, dt=1, xi=None) +.. py:class:: mindquantum.algorithm.qaia.DSB(J, h=None, x=None, n_iter=1000, batch_size=1, dt=1, xi=None, device='cpu', precision='float32') 离散模拟分叉算法。 @@ -12,9 +12,11 @@ mindquantum.algorithm.qaia.DSB - **h** (numpy.array) - 外场强度,维度为 :math:`(N, )`。 - **x** (numpy.array) - 自旋初始化配置,维度为 :math:`(N x batch_size)`。默认值: ``None``。 - **n_iter** (int) - 迭代步数。默认值: ``1000``。 - - **batch_size** (int) - 样本个数。默认值为: ``1``。 - - **dt** (float) - 迭代步长。默认值: ``0.1``。 + - **batch_size** (int) - 样本个数。默认值: ``1``。 + - **dt** (float) - 迭代步长。默认值: ``1``。 - **xi** (float) - 频率维数,正的常数。默认值: ``None``。 + - **device** (str) - 计算设备('cpu' 或 'gpu')。默认值: ``'cpu'``。 + - **precision** (str) - 使用 GPU 时的精度类型('float32'、'float16'、'int8')。默认值: ``'float32'``。 .. py:method:: update() diff --git a/docs/api_python_en/algorithm/mindquantum.algorithm.compiler.rst b/docs/api_python_en/algorithm/mindquantum.algorithm.compiler.rst index 8098cfe59bfc066aaa8897580bfb3583875d02b4..4db7a8197e69cb22ae4fe17a2b21fc1225109b2b 100644 --- a/docs/api_python_en/algorithm/mindquantum.algorithm.compiler.rst +++ b/docs/api_python_en/algorithm/mindquantum.algorithm.compiler.rst @@ -16,6 +16,7 @@ Fixed decompose rules mindquantum.algorithm.compiler.rxx_decompose mindquantum.algorithm.compiler.crxx_decompose mindquantum.algorithm.compiler.cry_decompose + mindquantum.algorithm.compiler.cnry_decompose mindquantum.algorithm.compiler.ryy_decompose mindquantum.algorithm.compiler.cryy_decompose mindquantum.algorithm.compiler.rzz_decompose diff --git a/mindquantum/algorithm/compiler/decompose/fixed_decompose/__init__.py b/mindquantum/algorithm/compiler/decompose/fixed_decompose/__init__.py index 713d5c93bb601e8c16311639e3c9f0ca4c04b434..6330de08464feb3ea638cfddbb343770037005ea 100644 --- a/mindquantum/algorithm/compiler/decompose/fixed_decompose/__init__.py +++ b/mindquantum/algorithm/compiler/decompose/fixed_decompose/__init__.py @@ -33,7 +33,7 @@ from . import ( from .h_related import ch_decompose from .rx_related import crx_decompose from .rxx_related import crxx_decompose, rxx_decompose -from .ry_related import cry_decompose +from .ry_related import cry_decompose, cnry_decompose from .ryy_related import cryy_decompose, ryy_decompose from .rz_related import crz_decompose from .rzz_related import rzz_decompose diff --git a/mindquantum/algorithm/compiler/decompose/fixed_decompose/ry_related.py b/mindquantum/algorithm/compiler/decompose/fixed_decompose/ry_related.py index 316f0a9d59373ec1c4bef60ada679805f6a7ba70..c2c78078d6a1726931fc0d444ffd74430796669f 100644 --- a/mindquantum/algorithm/compiler/decompose/fixed_decompose/ry_related.py +++ b/mindquantum/algorithm/compiler/decompose/fixed_decompose/ry_related.py @@ -60,6 +60,74 @@ def cry_decompose(gate: gates.RY): circuit += gates.X.on(target, control) return [circuit] +def _cnry_frame(ctrl_qubits, obj_qubit, angle): + """The function used to construct iteratively the circuit of the cnry_decompose compiler. -decompose_rules = ['cry_decompose'] + Args: + ctrl_qubits (List[int]): The remainder ctrl_qubits in current iteration. + obj_qubit (int): The obj_qubit of the RY gate to be decomposed. + angle (:class:'~core.parameterresolver.ParameterResolver`): The rotation angle in current iteration. + + Returns: + :class:`~.core.circuit.Circuit`, the decompose solution in current iteration. + """ + circ = Circuit() + if len(ctrl_qubits) == 1: + circ.ry(angle/2, obj_qubit) + circ.x(obj_qubit, ctrl_qubits[0]) + circ.ry(-angle/2, obj_qubit) + circ.x(obj_qubit, ctrl_qubits[0]) + else: + circ += _cnry_frame(ctrl_qubits[1:], obj_qubit, angle/2) + circ.x(obj_qubit, ctrl_qubits[0]) + circ += _cnry_frame(ctrl_qubits[1:], obj_qubit, -angle/2) + circ.x(obj_qubit, ctrl_qubits[0]) + return circ + +def cnry_decompose(gate: gates.RY): + """ + Decompose controlled :class:`~.core.gates.RY` gate. + + Args: + gate (:class:`~.core.gates.RY`): A :class:`~.core.gates.RY` gate with zero or more control qubits. + + Returns: + List[:class:`~.core.circuit.Circuit`], all possible decompose solution. + + Examples: + >>> from mindquantum.algorithm.compiler import cnry_decompose + >>> from mindquantum.core.circuit import Circuit + >>> from mindquantum.core.gates import RY + >>> cry = RY(1).on(2, [0, 1]) + >>> origin_circ = Circuit() + cry + >>> decomposed_circ = cnry_decompose(cry)[0] + >>> origin_circ + q0: ──────■─────── + ┃ + ┃ + q1: ──────■─────── + ┃ + ┏━━━┻━━━┓ + q2: ──┨ RY(1) ┠─── + ┗━━━━━━━┛ + >>> decomposed_circ + q0: ─────────────────────────────────────────■──────────────────────────────────────────■──── + ┃ ┃ + ┃ ┃ + q1: ────────────────■──────────────────■─────╂──────────────────■─────────────────■─────╂──── + ┃ ┃ ┃ ┃ ┃ ┃ + ┏━━━━━━━━━┓ ┏━┻━┓ ┏━━━━━━━━━━┓ ┏━┻━┓ ┏━┻━┓ ┏━━━━━━━━━━┓ ┏━┻━┓ ┏━━━━━━━━━┓ ┏━┻━┓ ┏━┻━┓ + q2: ──┨ RY(1/4) ┠─┨╺╋╸┠─┨ RY(-1/4) ┠─┨╺╋╸┠─┨╺╋╸┠─┨ RY(-1/4) ┠─┨╺╋╸┠─┨ RY(1/4) ┠─┨╺╋╸┠─┨╺╋╸┠── + ┗━━━━━━━━━┛ ┗━━━┛ ┗━━━━━━━━━━┛ ┗━━━┛ ┗━━━┛ ┗━━━━━━━━━━┛ ┗━━━┛ ┗━━━━━━━━━┛ ┗━━━┛ ┗━━━┛ + """ + _check_input_type('gate', gates.RY, gate) + + if not gate.ctrl_qubits: + return [Circuit(gate)] + + circuit = _cnry_frame(gate.ctrl_qubits, gate.obj_qubits[0], gate.coeff) + return [circuit] + + +decompose_rules = ['cry_decompose', 'cnry_decompose'] __all__ = decompose_rules diff --git a/mindquantum/algorithm/error_mitigation/VD.py b/mindquantum/algorithm/error_mitigation/VD.py new file mode 100644 index 0000000000000000000000000000000000000000..8dde252480a3e723312400de6b35d5ceee65502f --- /dev/null +++ b/mindquantum/algorithm/error_mitigation/VD.py @@ -0,0 +1,195 @@ +""" +VD.py - Virtual Distillation Algorithm Implementation +===================================================== + +This module implements the virtual distillation algorithm for error mitigation in quantum circuits. + +Example: + >>> from mindquantum.core.gates import AmplitudeDampingChannel, H, CNOT, RX, Z, RY + >>> from mindquantum.core.circuit.channel_adder import BitFlipAdder + >>> from mindquantum.core.circuit import Circuit + >>> from mindquantum.simulator import Simulator + >>> from mindquantum.core.operators import Hamiltonian, QubitOperator + >>> import numpy as np + + >>> circ = Circuit() + >>> circ += RY(1).on(0) + >>> from mindquantum.core.gates import BitFlipChannel + >>> noise_circ = circ + BitFlipChannel(0.005).on(0) + >>> position = [0] + >>> vd_calculator = VDCalculator(noise_circ, position, M=3, nshots=100000, para=None) + >>> result = vd_calculator.calculate() + >>> print(result) + + +Note: + This module assumes that the input observable operator is sigma_z. +""" + +from mindquantum.core.gates import SWAP, Givens +from mindquantum.core.circuit import Circuit, apply +from mindquantum.simulator import Simulator +import numpy as np +from mindquantum.core.gates import RY +from mindquantum.core.gates import BitFlipChannel + +class VDCalculator: + """ + VDCalculator class for virtual distillation. + + Attributes: + noise_circ: The circuit of the noisy state. + position: The position that expect operator act on. + Mp: Power. + nshots: The shots of the sampling. + para: The parameters for the parameters circuit if it has. + noise_circ.n_qubits: The qubit number of the origin circuit. + self.prepare_circuit(): Extended circuit based on power generation. + + Methods: + __init__(Mp, noise_circ): Initializes the VDCalculator instance. + prepare_circuit(): Prepares the circuit for virtual distillation. + product_swap(self, res_l, res_r): Calculates the expectation of Swap gates. + product_with_z(self, res_l, res_r): Calculates the expectation of Sigma_Z. + calculate(): Calculates the virtual distillation result. + """ + def __init__(self, noise_circ, position, Mp=2, nshots=1000000, para=None): + self.noise_circ = noise_circ # The circuit of the noisy state + self.position = position # The position that expect operator act on + self.Mp = Mp # Power + self.nshots = nshots # The shots of the sampling + self.para = para # The parameters for the parameters circuit if it has + self.nqubits = noise_circ.n_qubits # The qubit number of the origin circuit + self.circ = self.prepare_circuit() # Extended circuit based on power generation + + def prepare_circuit(self): + ''' + The purpose of this function is to construct a direct product state in terms of power. + ''' + circ = Circuit() + circ += apply(self.noise_circ, [i for i in range(self.nqubits)]) + circ = circ.remove_measure() + + if self.para is not None: + para_name = circ.params_name + pr = {key: value for key, value in zip(para_name, self.para)} + circ = circ.apply_value(pr) + + temp = Circuit() + for i in range(self.Mp): + temp += apply(circ, [i * self.nqubits + j for j in range(self.nqubits)]) + return temp + + def product_swap(self, res_l, res_r): + ''' + Calculate the trace of the density matrix to the M power. + ''' + pr_l = res_l.data + qvec_l = np.zeros(2 ** (self.nqubits * self.Mp)) + for key in pr_l: + qvec_l[int(key, 2)] = np.sqrt(pr_l[key] / self.nshots) + + pr_r = res_r.data + qvec_r = np.zeros(2 ** (self.nqubits * self.Mp)) + + for key in pr_r: + qvec_r[int(key, 2)] = np.sqrt(pr_r[key] / self.nshots) + + for key in pr_r: + for j in range(self.Mp-1): + for i in range(self.nqubits): + if (int(key[i+j*self.nqubits], 2) == 1 and int(key[i + (j+1)*self.nqubits], 2) == 0): + qvec_r[int(key, 2)] = - qvec_r[int(key, 2)] + + return np.inner(qvec_l, qvec_r) + + def product_with_z(self, res_l, res_r, position): + ''' + Calculate the expectation with observable Pauli_Z. + ''' + pr_l = res_l.data + qvec_l = np.zeros(2 ** (self.nqubits * self.Mp)) + for key in pr_l: + qvec_l[int(key, 2)] = np.sqrt(pr_l[key] / self.nshots) + + pr_r = res_r.data + qvec_r = np.zeros(2 ** (self.nqubits * self.Mp)) + + for key in pr_r: + qvec_r[int(key, 2)] = np.sqrt(pr_r[key] / self.nshots) + + for key in pr_r: + for j in range(self.Mp-1): + for i in range(self.nqubits): + if (int(key[i+j*self.nqubits], 2) == 1 and int(key[i + (j+1)*self.nqubits], 2) == 0): + qvec_r[int(key, 2)] = - qvec_r[int(key, 2)] + + for key in pr_r: + for i in range(len(position)): + if int(key[position[i]]) == 1: + qvec_r[int(key, 2)] = -qvec_r[int(key, 2)] + return np.inner(qvec_l, qvec_r) + + def calculate(self): + """ + Calulate the result of VD by using the sampling result. + """ + circ_swap = apply(self.circ, [i for i in range(self.circ.n_qubits)]) + circ_copy = apply(self.circ, [i for i in range(self.circ.n_qubits)]) + + for i in range(self.Mp - 1): + for j in range(self.nqubits): + circ_copy += Givens(np.pi / 4).on([j + i * self.nqubits, j + (i + 1) * self.nqubits]) + + for i in range(self.Mp - 1): + for j in range(self.nqubits): + circ_swap += Givens(np.pi / 4).on([j + i * self.nqubits, j + (i + 1) * self.nqubits]) + circ_swap += SWAP([j + i * self.nqubits, j + (i + 1) * self.nqubits]) + + sim = Simulator('mqvector', self.circ.n_qubits) + sim.reset() + res = sim.sampling(circ_copy.measure_all(), shots=self.nshots, seed=42) + + sim.reset() + circ_swap = circ_swap.remove_measure() + res_s = sim.sampling(circ_swap.measure_all(), shots=self.nshots, seed=42) + + + trpm = self.product_swap(res, res_s) + print(trpm) + frac = 0 + + for t in range(self.Mp): + circ_o_swap = Circuit() + circ_o_swap += apply(circ_swap, [i for i in range(circ_swap.n_qubits)]) + + sim.reset() + res_r = sim.sampling(circ_o_swap, shots=self.nshots) + sim.reset() + pos = [self.position[i] + t * self.nqubits for i in range(len(self.position))] + frac += self.product_with_z(res_s, res_r, pos) + + tropm = frac / self.Mp + exp = tropm / trpm + return exp + +# 使用示例 +# noise_circ, O, position, M, nshots, para 需要根据实际情况定义 +# vd_calculator = VDCalculator(noise_circ, O, position, M, nshots, para) +# result = vd_calculator.calculate() +# print(result) + +circuit = Circuit() +# circ += RX(1).on(0) +# circ += CNOT.on(1,0) +circuit += RY(1).on(0) + + +noise_c = circuit + BitFlipChannel(0.005).on(0) + + +posi = [0] + +vd_calculator = VDCalculator(noise_c, posi, Mp=3, nshots=100000, para=None) +result = vd_calculator.calculate() +print(result) diff --git a/mindquantum/algorithm/library/amplitude_encoder.py b/mindquantum/algorithm/library/amplitude_encoder.py index 36958dc6159e91076373ceedf0c16f5ded0db02f..376f0508cdf7d6cdf6490f1edbe82c0f2d9eb356 100644 --- a/mindquantum/algorithm/library/amplitude_encoder.py +++ b/mindquantum/algorithm/library/amplitude_encoder.py @@ -21,29 +21,31 @@ from mindquantum.core.parameterresolver import ParameterResolver from mindquantum.utils.type_value_check import _check_input_type from mindquantum.utils import normalize + def amp_circuit(n_qubits): '''Construct the quantum circuit of the amplitude_encoder.''' circ = Circuit() - circ += RY(f'alpha_{n_qubits}0').on(n_qubits-1) - circ += RZ(f'lambda_{n_qubits}0').on(n_qubits-1) + circ += RY(f'alpha_{n_qubits}_0').on(n_qubits - 1) + circ += RZ(f'lambda_{n_qubits}_0').on(n_qubits - 1) for i in range(1, n_qubits): for j in range(int(2**i)): string = bin(j)[2:].zfill(i) for tem_qubit, bit in enumerate(string): qubit = int(n_qubits - tem_qubit) if bit == '0': - circ += X.on(qubit-1) + circ += X.on(qubit - 1) - circ += RY(f'alpha_{int(n_qubits-i)}{j}').on(int(n_qubits-1-i), list(range(n_qubits-i, n_qubits))) - circ += RZ(f'lambda_{int(n_qubits-i)}{j}').on(int(n_qubits-1-i), list(range(n_qubits-i, n_qubits))) + circ += RY(f'alpha_{int(n_qubits-i)}_{j}').on(int(n_qubits - 1 - i), list(range(n_qubits - i, n_qubits))) + circ += RZ(f'lambda_{int(n_qubits-i)}_{j}').on(int(n_qubits - 1 - i), list(range(n_qubits - i, n_qubits))) string = bin(j)[2:].zfill(i) for tem_qubit, bit in enumerate(string): qubit = int(n_qubits - tem_qubit) if bit == '0': - circ += X.on(qubit-1) + circ += X.on(qubit - 1) return circ + # pylint: disable=too-many-locals def amplitude_encoder(x, n_qubits): """ @@ -97,10 +99,10 @@ def amplitude_encoder(x, n_qubits): for i in range(len(vec)): eta[0].append(abs(vec[i])) - for v in range(1, n_qubits+1): + for v in range(1, n_qubits + 1): eta.append([]) - for i in range(int(2**(n_qubits-v))): - eta[v].append(np.sqrt(eta[v-1][2*i]**2 + eta[v-1][2*i+1]**2)) + for i in range(int(2 ** (n_qubits - v))): + eta[v].append(np.sqrt(eta[v - 1][2 * i] ** 2 + eta[v - 1][2 * i + 1] ** 2)) omega_0 = [] for i in range(len(vec)): @@ -108,26 +110,26 @@ def amplitude_encoder(x, n_qubits): omega = [[]] for i in range(len(vec)): - omega[0].append(2*np.angle(vec[i])) + omega[0].append(2 * np.angle(vec[i])) - for v in range(1, n_qubits+1): + for v in range(1, n_qubits + 1): omega.append([]) - for i in range(2**(n_qubits-v)): - omega[v].append(0.) + for i in range(2 ** (n_qubits - v)): + omega[v].append(0.0) for j in range(2**v): - omega[v][i] += omega_0[i*2**v+j]/2**(v-1) + omega[v][i] += omega_0[i * 2**v + j] / 2 ** (v - 1) alphas = {} for v in range(n_qubits, 0, -1): - for i in range(2**(n_qubits-v)): + for i in range(2 ** (n_qubits - v)): if eta[v][i] < 1e-6: - alphas[f'alpha_{v}{i}'] = 0 + alphas[f'alpha_{v}_{i}'] = 0 else: - alphas[f'alpha_{v}{i}'] = 2*np.arcsin(eta[v-1][2*i+1]/eta[v][i]) + alphas[f'alpha_{v}_{i}'] = 2 * np.arcsin(eta[v - 1][2 * i + 1] / eta[v][i]) lambs = {} for v in range(n_qubits, 0, -1): - for i in range(2**(n_qubits-v)): - lambs[f'lambda_{v}{i}'] = omega[v-1][2*i+1] - omega[v][i] + for i in range(2 ** (n_qubits - v)): + lambs[f'lambda_{v}_{i}'] = omega[v - 1][2 * i + 1] - omega[v][i] return amp_circuit(n_qubits), ParameterResolver({**alphas, **lambs}) diff --git a/mindquantum/algorithm/nisq/qnn/qn.py b/mindquantum/algorithm/nisq/qnn/qn.py new file mode 100644 index 0000000000000000000000000000000000000000..4bcaf0619dcfc68bd4fca286e8ab30125c7320bf --- /dev/null +++ b/mindquantum/algorithm/nisq/qnn/qn.py @@ -0,0 +1,294 @@ +''' +该模块实现了文章arXiv:1711.11240v1中的quantum neuron单元和下面的quantum hopfield network +''' + +import numpy as np +from numpy import pi,cos,sin,sqrt +from mindquantum.core import Circuit,X,H,RY,Y,RZ,Measure +from mindquantum.core.circuit import dagger +from mindquantum.simulator import Simulator # pylint: disable=unused-import +import matplotlib.pyplot as plt + + +def load_w(): + ''' + 根据文章中给的吸引子加载网络的权重 + ''' + mat = np.array([[0,1,1],[1,0,0],[0,1,1]]) + mat1 = np.array([[1,0,1],[0,1,0],[0,1,0]]) + data = np.array([mat,mat1]) + w = w_matrix(data) + return w + + +def mat2vec(mat): + ''' + Convert matrix to vector + Args: + mat: the input matrix,the matrix dimension is (n,m,m) where n is the numbers of dataset + Returns: + the vector the shape is (n,m*m) + ''' + return mat.flatten().tolist() + + +def w_matrix(data): + ''' + Input is three dimension + utilize the hebb rule to construct W matrix + ''' + n_data = len(data) + dim = len(data[0]) ** 2 + w = np.zeros((dim,dim)) + for i in range(n_data): + vec = mat2vec(data[i]) + w += np.outer(vec,vec) + diag_w = np.diag(np.diag(w)) + w = w - diag_w + w /= n_data + return w + + +def input_rotation(w,gamma,bias,update_qubit:int,num_update:int): + ''' + Args: + w:the weight matrix + gamma: the scaling factor + update_qubit:which qubit needs to update + bias: the vector of bias + Return: + the circuit of input rotation + ''' + assert len(w) == len(bias) + n = len(w) + circ = Circuit() + for ctrl_qubit in range(n): + if w[update_qubit][ctrl_qubit] == 0: + continue + circ += RY(4*gamma*w[update_qubit][ctrl_qubit]).on(n+num_update,ctrl_qubit) + circ += RY(2*bias[update_qubit]).on(n+num_update) + return circ + + +def quantum_neuron(w,gamma,bias,update_qubit:int,num_update:int): + ''' + 实现quantum neuron单元接口 + 其中w为权重矩阵,gamma为放缩因子,bias为偏置,update_qubit为需要更新的量子比特,num_update为第几次更新 + ''' + n = len(w) # control qubit ,we need total n+1 qubits + rotationcirc = input_rotation(w,gamma,bias,update_qubit,num_update) + rotationcirc_dag = dagger(rotationcirc) + circ = Circuit() # initial circuit instance + circ += rotationcirc + circ += Y.on(update_qubit,n+num_update) + circ += RZ(-pi/2).on(n+num_update) + circ += rotationcirc_dag + circ += Measure().on(n+num_update) + return circ + + +def initial_state(vec): + ''' + convert the vector to the circuit + ''' + circ = Circuit() + for i,x in enumerate(vec): + if x == '+': + circ += H.on(i) + elif x == '0': + continue + elif x == '1': + circ += X.on(i) + return circ + + +def q(theta,k): + ''' + 实现非线性激活函数 + ''' + return np.arctan(np.tan(0.7*theta)**(2**k)) + + +def ry(theta): + ''' + Ry量子门的矩阵形式 + ''' + return np.array([[cos(theta/2),-sin(theta/2)],[sin(theta/2),cos(theta/2)]]) + + +def ry_activate(theta,psi): + ''' + psi is the state which needs to activate + ''' + act_psi = ry(2*theta) @ psi + return act_psi + + +def first_update(vec1,w,gamma,bias,update_qubit=0,num_update=0): + ''' + 第一次更新 + ''' + circ = Circuit() + circ += initial_state(vec1) + circ += quantum_neuron(w,gamma,bias,update_qubit,num_update) + + psi = vec1[update_qubit] + if psi == '+': + psi = 1/sqrt(2) * np.array([1,1]) + elif psi == '1': + psi = np.array([0,1]) + elif psi == '0': + psi = np.array([1,0]) + + theta0 = 4*gamma*0.5 + 2*bias[update_qubit] + theta1 = 4*gamma*1 + 2*bias[update_qubit] + + act_psi = 1/sqrt(2)*ry_activate(q(theta0,1),psi) +1/sqrt(2)* ry_activate(q(theta1,1),psi) + if abs(act_psi[0]) >= abs(act_psi[1]): + vec1[update_qubit] = '1' + else: + vec1[update_qubit] = '0' + return vec1 + + +def second_update(vec2,w,gamma,bias,update_qubit=1,num_update=1): + ''' + 第二次更新 + ''' + circ = Circuit() + circ += initial_state(vec2) + circ += quantum_neuron(w,gamma,bias,update_qubit,num_update) + psi = vec2[update_qubit] + + if psi == '+': + psi = 1/sqrt(2) * np.array([1,1]) + elif psi == '1': + psi = np.array([0,1]) + elif psi == '0': + psi = np.array([1,0]) + theta0 = 4*gamma*1.5 + 2*bias[update_qubit] + theta1 = 4*gamma*2 + 2*bias[update_qubit] + act_psi = 1/sqrt(2)*ry_activate(q(theta0,1),psi) +1/sqrt(2)* ry_activate(q(theta1,1),psi) + if abs(act_psi[0]) <= abs(act_psi[1]): + vec2[update_qubit] = '1' + else: + vec2[update_qubit] = '0' + return vec2 + + +def third_update(vec3,w,gamma,bias,update_qubit=2,num_update=2): + ''' + 第三次更新 + ''' + circ = Circuit() + circ += initial_state(vec3) + circ += quantum_neuron(w,gamma,bias,update_qubit,num_update) + psi = vec3[update_qubit] + + if psi == '+': + psi = 1/sqrt(2) * np.array([1,1]) + elif psi == '1': + psi = np.array([0,1]) + elif psi == '0': + psi = np.array([1,0]) + theta0 = 4*gamma*2.5 + 2*bias[update_qubit] + act_psi = ry_activate(q(theta0,1),psi) + if abs(act_psi[0]) <= abs(act_psi[1]): + vec3[update_qubit] = '1' + else: + vec3[update_qubit] = '0' + return vec3 + + +def vec2mat(vec): + ''' + convert vector to matrix + ''' + vec = np.array(vec) + dim = len(vec) + data = np.reshape(vec,(int(sqrt(dim)),int(sqrt(dim)))) + return data + + +def str2int(vec): + ''' + 字符串转int数据类型 + ''' + new_vec = [] + for i in vec: + if i == '0': + new_vec.append(0) + elif i == '+': + new_vec.append(0.5) + elif i=='1': + new_vec.append(1) + return new_vec + +def plot(vec): + ''' + 画某个子图 + ''' + new_vec = str2int(vec) + mat = vec2mat(new_vec) + plt.imshow(mat) + plt.axis('off') + plt.colorbar() + # plt.savefig(f'../fig/result/update_{update}.png') + plt.show() + + +def total_plot(total_state): + ''' + 画出总的更新图 + ''' + total_state1 = [] + for i in total_state: + total_state1.append(str2int(i)) + fig, axs = plt.subplots(1,4, figsize=(20,6)) # 1 row, 4 columns of plots + titles = ['initial input', 'after 1 update', 'after 2 updates', 'after 3 updates'] + cmap = plt.cm.viridis # pylint: disable=no-member + norm = plt.Normalize(0, 1) + cbar = axs[0].imshow(vec2mat(total_state1[0]), cmap=cmap, norm=norm) + axs[0].set_title(titles[0]) + for i in range(1,len(total_state1)): + updated_matrix = vec2mat(total_state1[i]) + axs[i].imshow(updated_matrix, cmap=cmap, norm=norm) + axs[i].set_title(titles[i]) + + fig.colorbar(cbar, ax=axs, orientation='vertical', fraction=0.02, pad=0.04) + for ax in axs: + ax.axis('off') + # plt.tight_layout() + plt.savefig('../fig/result/total_update1.png') + plt.show() + + +def main(): + ''' + 把所有的都集成到main函数上 + ''' + inital_state = ['+','+','+','1','0','0','0','1','1'] + w = load_w() + bias = np.array([np.pi/4] * 9 ) + gamma = 0.7 /(w.max()*len(w) + bias.max()) + total_state = [] + total_state.append(inital_state.copy()) + for i in range(3): + if i == 0: + update_state = first_update(inital_state,w,gamma,bias) + plot(update_state) + total_state.append(update_state.copy()) + elif i == 1: + update_state = second_update(update_state,w,gamma,bias) + plot(update_state) + total_state.append(update_state.copy()) + elif i == 2: + update_state = third_update(update_state,w,gamma,bias) + plot(update_state) + total_state.append(update_state.copy()) + total_plot(total_state) + + +if __name__ == '__main__': + main() + \ No newline at end of file diff --git a/mindquantum/algorithm/qaia/SB.py b/mindquantum/algorithm/qaia/SB.py index a7b0a82ef08276dc50b0a51d9301a03f4c4ed60e..50143160709fd4d3fc7b618ba0bd2b1fb479df19 100644 --- a/mindquantum/algorithm/qaia/SB.py +++ b/mindquantum/algorithm/qaia/SB.py @@ -14,6 +14,7 @@ # ============================================================================ """Simulated bifurcation (SB) algorithms and its variants.""" # pylint: disable=invalid-name +import warnings import numpy as np from scipy.sparse import csr_matrix @@ -143,36 +144,70 @@ class BSB(SB): # noqa: N801 batch_size (int): The number of sampling. Default: ``1``. dt (float): The step size. Default: ``1``. xi (float): positive constant with the dimension of frequency. Default: ``None``. + device (str): The device to use for computation ('cpu' or 'gpu'). Default: ``'cpu'``. + precision (str): Precision type when using GPU ('float32', 'float16', 'int8'). Default: ``'float32'``. """ # pylint: disable=too-many-arguments - def __init__( - self, - J, - h=None, - x=None, - n_iter=1000, - batch_size=1, - dt=1, - xi=None, - ): + def __init__(self, J, h=None, x=None, n_iter=1000, batch_size=1, dt=1, xi=None, device='cpu', precision='float32'): """Construct BSB algorithm.""" super().__init__(J, h, x, n_iter, batch_size, dt, xi) + + self.device = device.lower() + if self.device == 'gpu': + try: + from mindquantum import _qaia_sb + except ImportError: + warnings.warn( + "Unable to import bSB GPU backend. This could be due to your environment " + "not satisfying the requirements. Will use CPU backend instead.", + stacklevel=2, + ) + self.device = 'cpu' + except RuntimeError as err: + warnings.warn(f"Disable bSB GPU backend due to: {err}. Will use CPU backend instead.", stacklevel=2) + self.device = 'cpu' + + self.precision = precision.lower() + if self.precision not in ['float32', 'float16', 'int8']: + raise ValueError("precision must be one of 'float32', 'float16', 'int8'") + + if self.precision != 'float32' and device.lower() == 'cpu': + raise NotImplementedError(f"BSB with {precision} precision only supports GPU computation") + self.initialize() + if self.device == 'gpu': + _qaia_sb.cuda_init(self.J.shape[0], self.batch_size) + if self.precision == 'float32': + self._update_func = _qaia_sb.bsb_update + elif self.precision == 'float16': + self._update_func = _qaia_sb.bsb_update_half if self.h is None else _qaia_sb.bsb_update_h_half + else: # int8 + self._update_func = _qaia_sb.bsb_update_int8 if self.h is None else _qaia_sb.bsb_update_h_int8 + self._is_gpu = True + else: + self._update_func = self._cpu_update + self._is_gpu = False + + # pylint: disable=unused-argument + def _cpu_update(self, J, x, h, batch_size, xi, delta, dt, n_iter): + """CPU implementation of update.""" + for i in range(n_iter): + if h is None: + self.y += (-(delta - self.p[i]) * x + xi * J.dot(x)) * dt + else: + self.y += (-(delta - self.p[i]) * x + xi * (J.dot(x) + h)) * dt + x += dt * self.y * delta + + cond = np.abs(x) > 1 + x = np.where(cond, np.sign(x), x) + self.y = np.where(cond, np.zeros_like(x), self.y) + # pylint: disable=attribute-defined-outside-init def update(self): """Dynamical evolution based on Modified explicit symplectic Euler method.""" - for i in range(self.n_iter): - if self.h is None: - self.y += (-(self.delta - self.p[i]) * self.x + self.xi * self.J.dot(self.x)) * self.dt - else: - self.y += (-(self.delta - self.p[i]) * self.x + self.xi * (self.J.dot(self.x) + self.h)) * self.dt - self.x += self.dt * self.y * self.delta - - cond = np.abs(self.x) > 1 - self.x = np.where(cond, np.sign(self.x), self.x) - self.y = np.where(cond, np.zeros_like(self.x), self.y) + self._update_func(self.J, self.x, self.h, self.batch_size, self.xi, self.delta, self.dt, self.n_iter) class DSB(SB): # noqa: N801 @@ -190,36 +225,66 @@ class DSB(SB): # noqa: N801 batch_size (int): The number of sampling. Default: ``1``. dt (float): The step size. Default: ``1``. xi (float): positive constant with the dimension of frequency. Default: ``None``. + device (str): The device to use for computation ('cpu' or 'gpu'). Default: ``'cpu'``. + precision (str): Precision type when using GPU ('float32', 'float16', 'int8'). Default: ``'float32'``. """ # pylint: disable=too-many-arguments - def __init__( - self, - J, - h=None, - x=None, - n_iter=1000, - batch_size=1, - dt=1, - xi=None, - ): + def __init__(self, J, h=None, x=None, n_iter=1000, batch_size=1, dt=1, xi=None, device='cpu', precision='float32'): """Construct DSB algorithm.""" super().__init__(J, h, x, n_iter, batch_size, dt, xi) + + self.device = device.lower() + if self.device == 'gpu': + try: + from mindquantum import _qaia_sb + except ImportError: + warnings.warn( + "Unable to import dSB GPU backend. This could be due to your environment " + "not satisfying the requirements. Will use CPU backend instead.", + stacklevel=2, + ) + self.device = 'cpu' + except RuntimeError as err: + warnings.warn(f"Disable dSB GPU backend due to: {err}. Will use CPU backend instead.", stacklevel=2) + self.device = 'cpu' + + self.precision = precision.lower() + if self.precision not in ['float32', 'float16', 'int8']: + raise ValueError("precision must be one of 'float32', 'float16', 'int8'") + + if self.precision != 'float32' and device.lower() == 'cpu': + raise NotImplementedError(f"DSB with {precision} precision only supports GPU computation") + self.initialize() + if self.device == 'gpu': + _qaia_sb.cuda_init(self.J.shape[0], self.batch_size) + if self.precision == 'float32': + self._update_func = _qaia_sb.dsb_update + elif self.precision == 'float16': + self._update_func = _qaia_sb.dsb_update_half if self.h is None else _qaia_sb.dsb_update_h_half + else: # int8 + self._update_func = _qaia_sb.dsb_update_int8 if self.h is None else _qaia_sb.dsb_update_h_int8 + self._is_gpu = True + else: + self._update_func = self._cpu_update + self._is_gpu = False + + # pylint: disable=unused-argument + def _cpu_update(self, J, x, h, batch_size, xi, delta, dt, n_iter): + """CPU implementation of update.""" + for i in range(n_iter): + if h is None: + self.y += (-(delta - self.p[i]) * x + xi * J.dot(np.sign(x))) * dt + else: + self.y += (-(delta - self.p[i]) * x + xi * (J.dot(np.sign(x)) + h)) * dt + x += dt * self.y * delta + + cond = np.abs(x) > 1 + x = np.where(cond, np.sign(x), x) + self.y = np.where(cond, np.zeros_like(x), self.y) # pylint: disable=attribute-defined-outside-init def update(self): """Dynamical evolution based on Modified explicit symplectic Euler method.""" - for i in range(self.n_iter): - if self.h is None: - self.y += (-(self.delta - self.p[i]) * self.x + self.xi * self.J.dot(np.sign(self.x))) * self.dt - else: - self.y += ( - -(self.delta - self.p[i]) * self.x + self.xi * (self.J.dot(np.sign(self.x)) + self.h) - ) * self.dt - - self.x += self.dt * self.y * self.delta - - cond = np.abs(self.x) > 1 - self.x = np.where(cond, np.sign(self.x), self.x) - self.y = np.where(cond, np.zeros_like(self.y), self.y) + self._update_func(self.J, self.x, self.h, self.batch_size, self.xi, self.delta, self.dt, self.n_iter) diff --git a/setup.py b/setup.py index f142323cf42bff5bf4dca7c7ce7c11b56c445c68..1b881a9e09e807fed6e8ef28b452cdc58c52f6c8 100644 --- a/setup.py +++ b/setup.py @@ -504,6 +504,7 @@ ext_modules = [ CMakeExtension(pymod='mindquantum._mq_vector_gpu', optional=True), CMakeExtension(pymod='mindquantum._mq_matrix'), CMakeExtension(pymod='mindquantum._math'), + CMakeExtension(pymod='mindquantum._qaia_sb', optional=True), ] diff --git a/tests/st/test_algorithm/test_compiler/test_decompose/test_fixed_decompose/test_ry_decompose.py b/tests/st/test_algorithm/test_compiler/test_decompose/test_fixed_decompose/test_ry_decompose.py index a2532b21497253b16457ebbb5b4ade6f45b88759..74f4000a5a1d5b8cd8b3ebd6d66c4b9d0863a3d9 100644 --- a/tests/st/test_algorithm/test_compiler/test_decompose/test_fixed_decompose/test_ry_decompose.py +++ b/tests/st/test_algorithm/test_compiler/test_decompose/test_fixed_decompose/test_ry_decompose.py @@ -16,7 +16,7 @@ import numpy as np import pytest -from mindquantum.algorithm.compiler.decompose import cry_decompose +from mindquantum.algorithm.compiler.decompose import cry_decompose, cnry_decompose from mindquantum.core.circuit import Circuit from mindquantum.core.gates import RY @@ -39,3 +39,15 @@ def test_cry(): cry = RY(1.23).on(1, 0) for solution in cry_decompose(cry): circuit_equal_test(cry, solution) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +def test_cnry(): + """ + Description: Test cnry decompose + Expectation: success + """ + cnry = RY(1.23).on(2, [0, 1]) + for solution in cnry_decompose(cnry): + circuit_equal_test(cnry, solution) diff --git a/tests/st/test_algorithm/test_qaia/test_qaia.py b/tests/st/test_algorithm/test_qaia/test_qaia.py index 9e0412285b72314728490da73245401270c1e090..f9374f79d8729fe120faae9b6e32c572baa8df78 100644 --- a/tests/st/test_algorithm/test_qaia/test_qaia.py +++ b/tests/st/test_algorithm/test_qaia/test_qaia.py @@ -22,7 +22,19 @@ from scipy.sparse import coo_matrix from mindquantum.algorithm.qaia import ASB, BSB, CAC, CFC, DSB, LQA, SFC from mindquantum.utils.fdopen import fdopen +import pytest +try: + # pylint: disable=unused-import + from mindquantum import _qaia_sb + + GPU_AVAILABLE = True +except (ImportError, RuntimeError): + GPU_AVAILABLE = False + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu def read_gset(filename, negate=True): """ Reading Gset and transform it into sparse matrix @@ -65,6 +77,8 @@ def read_gset(filename, negate=True): G = read_gset(str(Path(__file__).parent.parent.parent / 'G1.txt')) +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu def test_aSB(): """ Description: Test ASB @@ -84,6 +98,8 @@ def test_aSB(): np.allclose(x, solver.x) +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu def test_bSB(): """ Description: Test BSB @@ -103,6 +119,68 @@ def test_bSB(): np.allclose(x, solver.x) +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +@pytest.mark.skipif(not GPU_AVAILABLE, reason="GPU backend not available") +def test_bSB_gpu(): + """ + Description: Test BSB GPU implementation + Expectation: success + """ + N = G.shape[0] + np.random.seed(666) + x = 0.01 * (np.random.rand(N, 1) - 0.5) + y = 0.01 * (np.random.rand(N, 1) - 0.5) + + # Test float32 precision + solver_gpu = BSB(G, n_iter=1, device='gpu', precision='float32') + solver_cpu = BSB(G, n_iter=1, device='cpu') + + solver_gpu.x = x.copy() + solver_gpu.y = y.copy() + solver_cpu.x = x.copy() + solver_cpu.y = y.copy() + + solver_gpu.update() + solver_cpu.update() + assert np.allclose(solver_gpu.x, solver_cpu.x, rtol=1e-5, atol=1e-5) + + # Test with external field + h = np.random.rand(N, 1) + solver_gpu_h = BSB(G, h=h, n_iter=1, device='gpu', precision='float32') + solver_cpu_h = BSB(G, h=h, n_iter=1, device='cpu') + + solver_gpu_h.x = x.copy() + solver_gpu_h.y = y.copy() + solver_cpu_h.x = x.copy() + solver_cpu_h.y = y.copy() + + solver_gpu_h.update() + solver_cpu_h.update() + assert np.allclose(solver_gpu_h.x, solver_cpu_h.x, rtol=1e-5, atol=1e-5) + + # Test float16 precision + solver_fp16 = BSB(G, n_iter=1, device='gpu', precision='float16') + solver_fp16.x = x.copy() + solver_fp16.y = y.copy() + solver_fp16.update() + assert np.allclose( + solver_fp16.x, solver_cpu.x, rtol=1e-3, atol=1e-3 + ) # Lower precision for float16, relaxed error bounds + + # Test int8 precision + solver_int8 = BSB(G, n_iter=1, device='gpu', precision='int8') + solver_int8.x = x.copy() + solver_int8.y = y.copy() + solver_int8.update() + assert np.allclose( + solver_int8.x, solver_cpu.x, rtol=1e-2, atol=1e-2 + ) # Lowest precision for int8, further relaxed error bounds + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu def test_dSB(): """ Description: Test DSB @@ -122,6 +200,68 @@ def test_dSB(): np.allclose(x, solver.x) +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +@pytest.mark.skipif(not GPU_AVAILABLE, reason="GPU backend not available") +def test_dSB_gpu(): + """ + Description: Test DSB GPU implementation + Expectation: success + """ + N = G.shape[0] + np.random.seed(666) + x = 0.01 * (np.random.rand(N, 1) - 0.5) + y = 0.01 * (np.random.rand(N, 1) - 0.5) + + # Test float32 precision + solver_gpu = DSB(G, n_iter=1, device='gpu', precision='float32') + solver_cpu = DSB(G, n_iter=1, device='cpu') + + solver_gpu.x = x.copy() + solver_gpu.y = y.copy() + solver_cpu.x = x.copy() + solver_cpu.y = y.copy() + + solver_gpu.update() + solver_cpu.update() + assert np.allclose(solver_gpu.x, solver_cpu.x, rtol=1e-5, atol=1e-5) + + # Test with external field + h = np.random.rand(N, 1) + solver_gpu_h = DSB(G, h=h, n_iter=1, device='gpu', precision='float32') + solver_cpu_h = DSB(G, h=h, n_iter=1, device='cpu') + + solver_gpu_h.x = x.copy() + solver_gpu_h.y = y.copy() + solver_cpu_h.x = x.copy() + solver_cpu_h.y = y.copy() + + solver_gpu_h.update() + solver_cpu_h.update() + assert np.allclose(solver_gpu_h.x, solver_cpu_h.x, rtol=1e-5, atol=1e-5) + + # Test float16 precision + solver_fp16 = DSB(G, n_iter=1, device='gpu', precision='float16') + solver_fp16.x = x.copy() + solver_fp16.y = y.copy() + solver_fp16.update() + assert np.allclose( + solver_fp16.x, solver_cpu.x, rtol=1e-3, atol=1e-3 + ) # Lower precision for float16, relaxed error bounds + + # Test int8 precision + solver_int8 = DSB(G, n_iter=1, device='gpu', precision='int8') + solver_int8.x = x.copy() + solver_int8.y = y.copy() + solver_int8.update() + assert np.allclose( + solver_int8.x, solver_cpu.x, rtol=1e-2, atol=1e-2 + ) # Lowest precision for int8, further relaxed error bounds + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu def test_LQA(): """ Description: Test LQA @@ -154,6 +294,8 @@ def test_LQA(): np.allclose(x, solver.x) +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu def test_CAC(): """ Description: Test CAC @@ -172,6 +314,8 @@ def test_CAC(): np.allclose(x, solver.x) +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu def test_CFC(): """ Description: Test CFC @@ -191,6 +335,8 @@ def test_CFC(): np.allclose(x, solver.x) +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu def test_SFC(): """ Description: Test SFC