diff --git a/quest/src/core/fastmath.hpp b/quest/src/core/fastmath.hpp index 6367e116..31f44315 100644 --- a/quest/src/core/fastmath.hpp +++ b/quest/src/core/fastmath.hpp @@ -59,6 +59,37 @@ INLINE int fast_getPlusOrMinusMaskedBitParity(qindex num, qindex mask) { } +#ifdef USE_CU_QCOMP + +INLINE QCOMP_ALIAS fast_addQcompAlias(QCOMP_ALIAS a, QCOMP_ALIAS b) { + return addCuQcomp(a, b); +} + +INLINE QCOMP_ALIAS fast_mulQcompAlias(QCOMP_ALIAS a, QCOMP_ALIAS b) { + return mulCuQcomp(a, b); +} + +INLINE QCOMP_ALIAS fast_divQcompAlias(QCOMP_ALIAS a, QCOMP_ALIAS b) { + return divCuQcomp(a, b); +} + +#else + +INLINE QCOMP_ALIAS fast_addQcompAlias(QCOMP_ALIAS a, QCOMP_ALIAS b) { + return a + b; +} + +INLINE QCOMP_ALIAS fast_mulQcompAlias(QCOMP_ALIAS a, QCOMP_ALIAS b) { + return a * b; +} + +INLINE QCOMP_ALIAS fast_divQcompAlias(QCOMP_ALIAS a, QCOMP_ALIAS b) { + return a / b; +} + +#endif + + /* * INDEX ALGEBRA @@ -165,7 +196,7 @@ INLINE QCOMP_ALIAS fast_getPauliStrElem(PauliStr str, qindex row, qindex col) { int p = getTwoAdjacentBits(str.lowPaulis, 2*t); int i = getBit(row, t); int j = getBit(col, t); - elem = elem * matrices[p][i][j]; // HIP-friendly avoiding *= + elem = fast_mulQcompAlias(elem, matrices[p][i][j]); } // could be compile-time unrolled into 32 iterations @@ -173,7 +204,7 @@ INLINE QCOMP_ALIAS fast_getPauliStrElem(PauliStr str, qindex row, qindex col) { int p = getTwoAdjacentBits(str.highPaulis, 2*t); int i = getBit(row, t + numPaulisPerMask); int j = getBit(col, t + numPaulisPerMask); - elem = elem * matrices[p][i][j]; + elem = fast_mulQcompAlias(elem, matrices[p][i][j]); } return elem; @@ -190,7 +221,10 @@ INLINE QCOMP_ALIAS fast_getPauliStrSumElem(QCOMP_ALIAS* coeffs, PauliStr* string // this loop is expected exponentially smaller than caller's loop for (qindex n=0; n x, vect cu_qcomp value = thrust::transform_reduce(indIter, endIter, functor, init, thrust::plus()); - return value * toCuQcomp(util_getPowerOfI(y.size())); + return mulCuQcomp(value, toCuQcomp(util_getPowerOfI(y.size()))); } @@ -943,7 +946,7 @@ cu_qcomp thrust_statevec_calcExpecPauliStr_subB(Qureg qureg, vector x, vect cu_qcomp value = thrust::transform_reduce(indIter, endIter, functor, init, thrust::plus()); - return value * toCuQcomp(util_getPowerOfI(y.size())); + return mulCuQcomp(value, toCuQcomp(util_getPowerOfI(y.size()))); } @@ -961,7 +964,7 @@ cu_qcomp thrust_densmatr_calcExpecPauliStr_sub(Qureg qureg, vector x, vecto cu_qcomp value = thrust::transform_reduce(indIter, endIter, functor, init, thrust::plus()); - return value * toCuQcomp(util_getPowerOfI(y.size())); + return mulCuQcomp(value, toCuQcomp(util_getPowerOfI(y.size()))); } @@ -1082,4 +1085,4 @@ void thrust_statevec_initUnnormalisedUniformlyRandomPureStateAmps_sub(Qureg qure -#endif // GPU_THRUST_HPP \ No newline at end of file +#endif // GPU_THRUST_HPP diff --git a/quest/src/gpu/gpu_types.cuh b/quest/src/gpu/gpu_types.cuh index a934ecef..60c233cd 100644 --- a/quest/src/gpu/gpu_types.cuh +++ b/quest/src/gpu/gpu_types.cuh @@ -134,14 +134,63 @@ __host__ inline std::array unpackMatrixToCuQcomps(CompMatr2 in) { +/* + * cu_qcomp ARITHMETIC HELPERS + * + * These wrap the cuComplex helper API and are used in critical kernels/functors + * where backend operator overload behaviour has varied across toolchains. + */ + + +INLINE cu_qcomp addCuQcomp(cu_qcomp a, cu_qcomp b) { +#if (FLOAT_PRECISION == 1) + return cuCaddf(a, b); +#else + return cuCadd(a, b); +#endif +} + +INLINE cu_qcomp subCuQcomp(cu_qcomp a, cu_qcomp b) { +#if (FLOAT_PRECISION == 1) + return cuCsubf(a, b); +#else + return cuCsub(a, b); +#endif +} + +INLINE cu_qcomp mulCuQcomp(cu_qcomp a, cu_qcomp b) { +#if (FLOAT_PRECISION == 1) + return cuCmulf(a, b); +#else + return cuCmul(a, b); +#endif +} + +INLINE cu_qcomp divCuQcomp(cu_qcomp a, cu_qcomp b) { +#if (FLOAT_PRECISION == 1) + return cuCdivf(a, b); +#else + return cuCdiv(a, b); +#endif +} + +INLINE cu_qcomp mulCuQcomp(cu_qcomp a, qreal b) { + return getCuQcomp(a.x * b, a.y * b); +} + +INLINE cu_qcomp mulCuQcomp(qreal b, cu_qcomp a) { + return mulCuQcomp(a, b); +} + + + /* * cu_qcomp ARITHMETIC OVERLOADS * - * which are only needed by NVCC because - * HIP defines them for us. This good deed - * goes punished; a HIP bug disables our - * use of *= and += overloads, so kernels.cuh - * has disgusting (x = x * y) statements. Bah! + * which are only needed by NVCC because HIP + * defines them for us. GPU kernels should still + * prefer the helpers above for complex-complex + * multiply and divide to avoid backend quirks. */ @@ -233,12 +282,19 @@ INLINE cu_qcomp operator * (const qreal& b, const cu_qcomp& a) { INLINE qreal getCompReal(cu_qcomp num) { - return num.x; +#if (FLOAT_PRECISION == 1) + return cuCrealf(num); +#else + return cuCreal(num); +#endif } INLINE cu_qcomp getCompConj(cu_qcomp num) { - num.y *= -1; - return num; +#if (FLOAT_PRECISION == 1) + return cuConjf(num); +#else + return cuConj(num); +#endif } INLINE qreal getCompNorm(cu_qcomp num) { @@ -271,4 +327,4 @@ INLINE cu_qcomp getCompPower(cu_qcomp base, cu_qcomp exponent) { -#endif // GPU_TYPES_HPP \ No newline at end of file +#endif // GPU_TYPES_HPP diff --git a/tests/unit/calculations.cpp b/tests/unit/calculations.cpp index 4b4db284..c08c98cc 100644 --- a/tests/unit/calculations.cpp +++ b/tests/unit/calculations.cpp @@ -178,6 +178,39 @@ TEST_CASE( "calcExpecPauliStr", TEST_CATEGORY ) { } +TEST_CASE( "calcExpecPauliStr GPU regression preserves complex contributions", TEST_CATEGORY ) { + + SECTION( LABEL_CORRECTNESS ) { + + if (!getQuESTEnv().isGpuAccelerated) + return; + + int numQubits = getNumCachedQubits(); + qindex dim = getPow2(numQubits); + qreal amp = 1 / std::sqrt((qreal) 2); + qvector ref = getZeroVector(dim); + PauliStr str = getPauliStr("Y", {0}); + + ref[0] = qcomp(amp, 0); + ref[1] = qcomp(0, amp); + + qreal expected = std::real(getReferenceExpectationValue(ref, str)); + + for (auto& [label, qureg] : getCachedStatevecs()) { + + if (!qureg.isGpuAccelerated) + continue; + + DYNAMIC_SECTION( label ) { + + setQuregToReference(qureg, ref); + REQUIRE_AGREE( calcExpecPauliStr(qureg, str), expected ); + } + } + } +} + + TEST_CASE( "calcExpecPauliStrSum", TEST_CATEGORY ) { diff --git a/tests/unit/initialisations.cpp b/tests/unit/initialisations.cpp index ac1f1abd..40a6c712 100644 --- a/tests/unit/initialisations.cpp +++ b/tests/unit/initialisations.cpp @@ -406,6 +406,42 @@ TEST_CASE( "setQuregToPauliStrSum", TEST_CATEGORY ) { } +TEST_CASE( "setQuregToPauliStrSum GPU regression preserves imaginary amplitudes", TEST_CATEGORY ) { + + SECTION( LABEL_CORRECTNESS ) { + + if (!getQuESTEnv().isGpuAccelerated) + return; + + int numQubits = getNumCachedQubits(); + std::vector strings = { + getPauliStr("X", {0}), + getPauliStr("Y", {0}) + }; + std::vector coeffs = { + qcomp(0.25, 0.50), + qcomp(-0.50, 0.75) + }; + PauliStrSum sum = createPauliStrSum(strings, coeffs); + qmatrix refMat = getMatrix(sum, numQubits); + + for (auto& [label, qureg] : getCachedDensmatrs()) { + + if (!qureg.isGpuAccelerated) + continue; + + DYNAMIC_SECTION( label ) { + + setQuregToPauliStrSum(qureg, sum); + REQUIRE_AGREE( qureg, refMat ); + } + } + + destroyPauliStrSum(sum); + } +} + + TEST_CASE( "setQuregToWeightedSum", TEST_CATEGORY ) { SECTION( LABEL_CORRECTNESS ) { diff --git a/tests/unit/operations.cpp b/tests/unit/operations.cpp index 0e33220d..ab449581 100644 --- a/tests/unit/operations.cpp +++ b/tests/unit/operations.cpp @@ -2365,6 +2365,39 @@ TEST_CASE( "applyNonUnitaryPauliGadget", TEST_CATEGORY_OPS ) { } +TEST_CASE( "applyPauliY GPU regression preserves imaginary amplitudes", TEST_CATEGORY_OPS ) { + + SECTION( LABEL_CORRECTNESS ) { + + if (!getQuESTEnv().isGpuAccelerated) + return; + + int numQubits = getNumCachedQubits(); + qindex dim = getPow2(numQubits); + qreal amp = 1 / std::sqrt((qreal) dim); + qvector in = getConstantVector(dim, qcomp(amp, 0)); + qvector ref = in; + + applyReferenceOperator(ref, {0}, FixedMatrices::Y); + + for (auto& [label, qureg] : getCachedStatevecs()) { + + if (!qureg.isGpuAccelerated) + continue; + + DYNAMIC_SECTION( label ) { + + setQuregToReference(qureg, in); + applyPauliY(qureg, 0); + + REQUIRE_AGREE( qureg, ref ); + REQUIRE_AGREE( calcTotalProb(qureg), getReferenceProbability(ref) ); + } + } + } +} + + /** @} (end defgroup) */