diff --git a/CMakeLists.txt b/CMakeLists.txt index d2aca45..a793828 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,7 @@ install(TARGETS cusignm DESTINATION lib) install(FILES cusignm.h DESTINATION include) # examples -foreach(x d) +foreach(x s d c z) # Newton add_executable(example_cusignm_${x}Newton example_cusignm_${x}Newton.cu) target_link_libraries(example_cusignm_${x}Newton PUBLIC cusignm) diff --git a/cusignm_halley.cu b/cusignm_halley.cu index f6ada88..d794892 100644 --- a/cusignm_halley.cu +++ b/cusignm_halley.cu @@ -272,10 +272,17 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe *-----------------------------------------------------------------------------*/ // workspace query for LU factorization CHECK_CUSOLVER(cusolverDnXgetrf_bufferSize(cusolverH, params, n, n, cusignm_traits::dataType, Stmp, n, cusignm_traits::computeType, &lworkdevice, &lworkhost)); + // compute LU factorization CHECK_CUSOLVER(cusolverDnXgetrf(cusolverH, params, n, n, cusignm_traits::dataType, Stmp, n, d_ipiv, cusignm_traits::computeType, d_work, lworkdevice, h_work, lworkhost, d_info)); - // solve the linear system + // int d_info_h; + // CHECK_CUDA(cudaMemcpy(&d_info_h, d_info, sizeof(int), cudaMemcpyDeviceToHost)); + // printf("iter=%d, d_info=%d\n", iter, d_info_h); + + // solve the linear system CHECK_CUSOLVER(cusolverDnXgetrs(cusolverH, params, CUBLAS_OP_N, n, n, cusignm_traits::dataType, Stmp, n, d_ipiv, cusignm_traits::computeType, Stmp2, n, d_info)); + // CHECK_CUDA(cudaMemcpy(&d_info_h, d_info, sizeof(int), cudaMemcpyDeviceToHost)); + // printf("iter=%d, d_info=%d\n", iter, d_info_h); /*----------------------------------------------------------------------------- * update S as S <- Sold*(I + c * Sold*Sold)\(a * I + b * Sold*Sold) @@ -288,7 +295,7 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe Scalar diffSSold, nrmS; CHECK_CUSIGNM(cusignm_diffnormFro(n, n, S, Sold, &diffSSold)); CHECK_CUSIGNM(cusignm_normFro(n, n, S, &nrmS)); - printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS); + // printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS); /*----------------------------------------------------------------------------- * stopping criteria @@ -298,6 +305,14 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe break; } + // NaN detected + if (isnan(diffSSold) || isnan(nrmS)) { + fprintf(stderr, "%s-%s:%d no convergence - NaN detected\n", __func__, __FILE__, __LINE__); + fflush(stderr); + ret = -1; + break; + } + // maximum number of iterations reached if (iter == maxiter) { fprintf(stderr, "%s-%s:%d no convergence - maximum number of iterations reached\n", __func__, __FILE__, __LINE__); diff --git a/cusignm_newton.cu b/cusignm_newton.cu index ebaba24..4f0a156 100644 --- a/cusignm_newton.cu +++ b/cusignm_newton.cu @@ -227,7 +227,7 @@ static int cusignm_Newton(const int n, const T *A, void *d_buffer, void *h_buffe Scalar diffSSold, nrmS; CHECK_CUSIGNM(cusignm_diffnormFro(n, n, S, Sold, &diffSSold)); CHECK_CUSIGNM(cusignm_normFro(n, n, S, &nrmS)); - printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS); + // printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS); /*----------------------------------------------------------------------------- * stopping criteria @@ -237,6 +237,13 @@ static int cusignm_Newton(const int n, const T *A, void *d_buffer, void *h_buffe break; } + if (isnan(diffSSold) || isnan(nrmS)) { + fprintf(stderr, "%s-%s:%d no convergence - NaN detected\n", __func__, __FILE__, __LINE__); + fflush(stderr); + ret = -1; + break; + } + // maximum number of iterations reached if (iter == maxiter) { fprintf(stderr, "%s-%s:%d no convergence - maximum number of iterations reached\n", __func__, __FILE__, __LINE__); diff --git a/example_cusignm_cHalley.cu b/example_cusignm_cHalley.cu new file mode 100644 index 0000000..af7d0ed --- /dev/null +++ b/example_cusignm_cHalley.cu @@ -0,0 +1,108 @@ +/* MIT License + * + * Copyright (c) 2024 Maximilian Behr + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cusignm.h" + +int main(void) { + /*----------------------------------------------------------------------------- + * variables + *-----------------------------------------------------------------------------*/ + int ret = 0; // return value + const int n = 10; // size of the input matrix A n-by-n + cuComplex *A, *S; // input matrix and sign matrix + void *d_buffer = NULL; // device buffer + void *h_buffer = NULL; // host buffer + + /*----------------------------------------------------------------------------- + * allocate A, Q and H + *-----------------------------------------------------------------------------*/ + cudaMallocManaged((void **)&A, sizeof(*A) * n * n); + cudaMallocManaged((void **)&S, sizeof(*S) * n * n); + + /*----------------------------------------------------------------------------- + * create a random matrix A + *-----------------------------------------------------------------------------*/ + srand(0); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX}); + } + } + + /*----------------------------------------------------------------------------- + * perform a workspace query and allocate memory buffer on the host and device + *-----------------------------------------------------------------------------*/ + size_t d_bufferSize = 0, h_bufferSize = 0; + cusignm_cHalleyBufferSize(n, &d_bufferSize, &h_bufferSize); + + if (d_bufferSize > 0) { + cudaMalloc((void **)&d_buffer, d_bufferSize); + } + + if (h_bufferSize > 0) { + h_buffer = malloc(h_bufferSize); + } + + /*----------------------------------------------------------------------------- + * compute Sign Function S = sign(A) + *-----------------------------------------------------------------------------*/ + cusignm_cHalley(n, A, d_buffer, h_buffer, S); + + /*----------------------------------------------------------------------------- + * check sign function ||S^2 - I||_F + *-----------------------------------------------------------------------------*/ + float fronrmdiff = 0.0f; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cuComplex sum = cuComplex{0.0f, 0.0f}; + for (int k = 0; k < n; ++k) { + sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n])); + } + if (i == j) { + sum = cuCsubf(sum, cuComplex{1.0f, 0.0f}); + } + float diff = cuCabsf(sum); + fronrmdiff += diff * diff; + } + } + float error = sqrtf(fronrmdiff / sqrtf((float)n)); + printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error); + if (error < 1e-4) { + printf("Sign Function Approximation successful\n"); + } else { + printf("Sign Function Approximation failed\n"); + ret = 1; + } + + /*----------------------------------------------------------------------------- + * clear memory + *-----------------------------------------------------------------------------*/ + cudaFree(A); + cudaFree(S); + cudaFree(d_buffer); + free(h_buffer); + return ret; +} diff --git a/example_cusignm_cNewton.cu b/example_cusignm_cNewton.cu new file mode 100644 index 0000000..270248f --- /dev/null +++ b/example_cusignm_cNewton.cu @@ -0,0 +1,108 @@ +/* MIT License + * + * Copyright (c) 2024 Maximilian Behr + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cusignm.h" + +int main(void) { + /*----------------------------------------------------------------------------- + * variables + *-----------------------------------------------------------------------------*/ + int ret = 0; // return value + const int n = 10; // size of the input matrix A n-by-n + cuComplex *A, *S; // input matrix and sign matrix + void *d_buffer = NULL; // device buffer + void *h_buffer = NULL; // host buffer + + /*----------------------------------------------------------------------------- + * allocate A, Q and H + *-----------------------------------------------------------------------------*/ + cudaMallocManaged((void **)&A, sizeof(*A) * n * n); + cudaMallocManaged((void **)&S, sizeof(*S) * n * n); + + /*----------------------------------------------------------------------------- + * create a random matrix A + *-----------------------------------------------------------------------------*/ + srand(0); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX}); + } + } + + /*----------------------------------------------------------------------------- + * perform a workspace query and allocate memory buffer on the host and device + *-----------------------------------------------------------------------------*/ + size_t d_bufferSize = 0, h_bufferSize = 0; + cusignm_cNewtonBufferSize(n, &d_bufferSize, &h_bufferSize); + + if (d_bufferSize > 0) { + cudaMalloc((void **)&d_buffer, d_bufferSize); + } + + if (h_bufferSize > 0) { + h_buffer = malloc(h_bufferSize); + } + + /*----------------------------------------------------------------------------- + * compute Sign Function S = sign(A) + *-----------------------------------------------------------------------------*/ + cusignm_cNewton(n, A, d_buffer, h_buffer, S); + + /*----------------------------------------------------------------------------- + * check sign function ||S^2 - I||_F + *-----------------------------------------------------------------------------*/ + float fronrmdiff = 0.0f; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cuComplex sum = cuComplex{0.0f, 0.0f}; + for (int k = 0; k < n; ++k) { + sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n])); + } + if (i == j) { + sum = cuCsubf(sum, cuComplex{1.0f, 0.0f}); + } + float diff = cuCabsf(sum); + fronrmdiff += diff * diff; + } + } + float error = sqrtf(fronrmdiff / sqrtf((float)n)); + printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error); + if (error < 1e-4) { + printf("Sign Function Approximation successful\n"); + } else { + printf("Sign Function Approximation failed\n"); + ret = 1; + } + + /*----------------------------------------------------------------------------- + * clear memory + *-----------------------------------------------------------------------------*/ + cudaFree(A); + cudaFree(S); + cudaFree(d_buffer); + free(h_buffer); + return ret; +} diff --git a/example_cusignm_dHalley.cu b/example_cusignm_dHalley.cu index 5ff82f9..4807fdb 100644 --- a/example_cusignm_dHalley.cu +++ b/example_cusignm_dHalley.cu @@ -31,7 +31,7 @@ int main(void) { * variables *-----------------------------------------------------------------------------*/ int ret = 0; // return value - const int n = 100; // size of the input matrix A n-by-n + const int n = 10; // size of the input matrix A n-by-n double *A, *S; // input matrix and sign matrix void *d_buffer = NULL; // device buffer void *h_buffer = NULL; // host buffer diff --git a/example_cusignm_dNewton.cu b/example_cusignm_dNewton.cu index cb29418..5dae9f6 100644 --- a/example_cusignm_dNewton.cu +++ b/example_cusignm_dNewton.cu @@ -31,7 +31,7 @@ int main(void) { * variables *-----------------------------------------------------------------------------*/ int ret = 0; // return value - const int n = 100; // size of the input matrix A n-by-n + const int n = 10; // size of the input matrix A n-by-n double *A, *S; // input matrix and sign matrix void *d_buffer = NULL; // device buffer void *h_buffer = NULL; // host buffer diff --git a/example_cusignm_sHalley.cu b/example_cusignm_sHalley.cu index 24af445..2b11bfe 100644 --- a/example_cusignm_sHalley.cu +++ b/example_cusignm_sHalley.cu @@ -31,7 +31,7 @@ int main(void) { * variables *-----------------------------------------------------------------------------*/ int ret = 0; // return value - const int n = 100; // size of the input matrix A n-by-n + const int n = 10; // size of the input matrix A n-by-n float *A, *S; // input matrix and sign matrix void *d_buffer = NULL; // device buffer void *h_buffer = NULL; // host buffer @@ -84,7 +84,7 @@ int main(void) { if (i == j) { sum -= 1.0f; } - sum = absf(sum); + sum = fabs(sum); fronrmdiff += sum * sum; } } diff --git a/example_cusignm_sNewton.cu b/example_cusignm_sNewton.cu new file mode 100644 index 0000000..270248f --- /dev/null +++ b/example_cusignm_sNewton.cu @@ -0,0 +1,108 @@ +/* MIT License + * + * Copyright (c) 2024 Maximilian Behr + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cusignm.h" + +int main(void) { + /*----------------------------------------------------------------------------- + * variables + *-----------------------------------------------------------------------------*/ + int ret = 0; // return value + const int n = 10; // size of the input matrix A n-by-n + cuComplex *A, *S; // input matrix and sign matrix + void *d_buffer = NULL; // device buffer + void *h_buffer = NULL; // host buffer + + /*----------------------------------------------------------------------------- + * allocate A, Q and H + *-----------------------------------------------------------------------------*/ + cudaMallocManaged((void **)&A, sizeof(*A) * n * n); + cudaMallocManaged((void **)&S, sizeof(*S) * n * n); + + /*----------------------------------------------------------------------------- + * create a random matrix A + *-----------------------------------------------------------------------------*/ + srand(0); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX}); + } + } + + /*----------------------------------------------------------------------------- + * perform a workspace query and allocate memory buffer on the host and device + *-----------------------------------------------------------------------------*/ + size_t d_bufferSize = 0, h_bufferSize = 0; + cusignm_cNewtonBufferSize(n, &d_bufferSize, &h_bufferSize); + + if (d_bufferSize > 0) { + cudaMalloc((void **)&d_buffer, d_bufferSize); + } + + if (h_bufferSize > 0) { + h_buffer = malloc(h_bufferSize); + } + + /*----------------------------------------------------------------------------- + * compute Sign Function S = sign(A) + *-----------------------------------------------------------------------------*/ + cusignm_cNewton(n, A, d_buffer, h_buffer, S); + + /*----------------------------------------------------------------------------- + * check sign function ||S^2 - I||_F + *-----------------------------------------------------------------------------*/ + float fronrmdiff = 0.0f; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cuComplex sum = cuComplex{0.0f, 0.0f}; + for (int k = 0; k < n; ++k) { + sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n])); + } + if (i == j) { + sum = cuCsubf(sum, cuComplex{1.0f, 0.0f}); + } + float diff = cuCabsf(sum); + fronrmdiff += diff * diff; + } + } + float error = sqrtf(fronrmdiff / sqrtf((float)n)); + printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error); + if (error < 1e-4) { + printf("Sign Function Approximation successful\n"); + } else { + printf("Sign Function Approximation failed\n"); + ret = 1; + } + + /*----------------------------------------------------------------------------- + * clear memory + *-----------------------------------------------------------------------------*/ + cudaFree(A); + cudaFree(S); + cudaFree(d_buffer); + free(h_buffer); + return ret; +} diff --git a/example_cusignm_zHalley.cu b/example_cusignm_zHalley.cu new file mode 100644 index 0000000..9674f04 --- /dev/null +++ b/example_cusignm_zHalley.cu @@ -0,0 +1,108 @@ +/* MIT License + * + * Copyright (c) 2024 Maximilian Behr + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cusignm.h" + +int main(void) { + /*----------------------------------------------------------------------------- + * variables + *-----------------------------------------------------------------------------*/ + int ret = 0; // return value + const int n = 10; // size of the input matrix A n-by-n + cuDoubleComplex *A, *S; // input matrix and sign matrix + void *d_buffer = NULL; // device buffer + void *h_buffer = NULL; // host buffer + + /*----------------------------------------------------------------------------- + * allocate A, Q and H + *-----------------------------------------------------------------------------*/ + cudaMallocManaged((void **)&A, sizeof(*A) * n * n); + cudaMallocManaged((void **)&S, sizeof(*S) * n * n); + + /*----------------------------------------------------------------------------- + * create a random matrix A + *-----------------------------------------------------------------------------*/ + srand(0); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + A[i + j * n] = cuDoubleComplex({(double)rand() / RAND_MAX, (double)rand() / RAND_MAX}); + } + } + + /*----------------------------------------------------------------------------- + * perform a workspace query and allocate memory buffer on the host and device + *-----------------------------------------------------------------------------*/ + size_t d_bufferSize = 0, h_bufferSize = 0; + cusignm_zHalleyBufferSize(n, &d_bufferSize, &h_bufferSize); + + if (d_bufferSize > 0) { + cudaMalloc((void **)&d_buffer, d_bufferSize); + } + + if (h_bufferSize > 0) { + h_buffer = malloc(h_bufferSize); + } + + /*----------------------------------------------------------------------------- + * compute Sign Function S = sign(A) + *-----------------------------------------------------------------------------*/ + cusignm_zHalley(n, A, d_buffer, h_buffer, S); + + /*----------------------------------------------------------------------------- + * check sign function ||S^2 - I||_F + *-----------------------------------------------------------------------------*/ + double fronrmdiff = 0.0; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cuDoubleComplex sum = cuDoubleComplex{0.0, 0.0}; + for (int k = 0; k < n; ++k) { + sum = cuCadd(sum, cuCmul(S[i + k * n], S[k + j * n])); + } + if (i == j) { + sum = cuCsub(sum, cuDoubleComplex{1.0, 0.0}); + } + double diff = cuCabs(sum); + fronrmdiff += diff * diff; + } + } + double error = sqrtf(fronrmdiff / sqrtf((double)n)); + printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error); + if (error < 1e-8) { + printf("Sign Function Approximation successful\n"); + } else { + printf("Sign Function Approximation failed\n"); + ret = 1; + } + + /*----------------------------------------------------------------------------- + * clear memory + *-----------------------------------------------------------------------------*/ + cudaFree(A); + cudaFree(S); + cudaFree(d_buffer); + free(h_buffer); + return ret; +} diff --git a/example_cusignm_zNewton.cu b/example_cusignm_zNewton.cu new file mode 100644 index 0000000..b872ace --- /dev/null +++ b/example_cusignm_zNewton.cu @@ -0,0 +1,108 @@ +/* MIT License + * + * Copyright (c) 2024 Maximilian Behr + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cusignm.h" + +int main(void) { + /*----------------------------------------------------------------------------- + * variables + *-----------------------------------------------------------------------------*/ + int ret = 0; // return value + const int n = 10; // size of the input matrix A n-by-n + cuDoubleComplex *A, *S; // input matrix and sign matrix + void *d_buffer = NULL; // device buffer + void *h_buffer = NULL; // host buffer + + /*----------------------------------------------------------------------------- + * allocate A, Q and H + *-----------------------------------------------------------------------------*/ + cudaMallocManaged((void **)&A, sizeof(*A) * n * n); + cudaMallocManaged((void **)&S, sizeof(*S) * n * n); + + /*----------------------------------------------------------------------------- + * create a random matrix A + *-----------------------------------------------------------------------------*/ + srand(0); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + A[i + j * n] = cuDoubleComplex({(double)rand() / RAND_MAX, (double)rand() / RAND_MAX}); + } + } + + /*----------------------------------------------------------------------------- + * perform a workspace query and allocate memory buffer on the host and device + *-----------------------------------------------------------------------------*/ + size_t d_bufferSize = 0, h_bufferSize = 0; + cusignm_zNewtonBufferSize(n, &d_bufferSize, &h_bufferSize); + + if (d_bufferSize > 0) { + cudaMalloc((void **)&d_buffer, d_bufferSize); + } + + if (h_bufferSize > 0) { + h_buffer = malloc(h_bufferSize); + } + + /*----------------------------------------------------------------------------- + * compute Sign Function S = sign(A) + *-----------------------------------------------------------------------------*/ + cusignm_zNewton(n, A, d_buffer, h_buffer, S); + + /*----------------------------------------------------------------------------- + * check sign function ||S^2 - I||_F + *-----------------------------------------------------------------------------*/ + double fronrmdiff = 0.0; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cuDoubleComplex sum = cuDoubleComplex{0.0, 0.0}; + for (int k = 0; k < n; ++k) { + sum = cuCadd(sum, cuCmul(S[i + k * n], S[k + j * n])); + } + if (i == j) { + sum = cuCsub(sum, cuDoubleComplex{1.0, 0.0}); + } + double diff = cuCabs(sum); + fronrmdiff += diff * diff; + } + } + double error = sqrt(fronrmdiff / sqrt((double)n)); + printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error); + if (error < 1e-8) { + printf("Sign Function Approximation successful\n"); + } else { + printf("Sign Function Approximation failed\n"); + ret = 1; + } + + /*----------------------------------------------------------------------------- + * clear memory + *-----------------------------------------------------------------------------*/ + cudaFree(A); + cudaFree(S); + cudaFree(d_buffer); + free(h_buffer); + return ret; +}