Skip to content

Commit

Permalink
finalized examples
Browse files Browse the repository at this point in the history
  • Loading branch information
maximilianbehr committed Apr 2, 2024
1 parent 84b49b2 commit 97d5686
Show file tree
Hide file tree
Showing 11 changed files with 570 additions and 8 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ install(TARGETS cusignm DESTINATION lib)
install(FILES cusignm.h DESTINATION include)

# examples
foreach(x d)
foreach(x s d c z)
# Newton
add_executable(example_cusignm_${x}Newton example_cusignm_${x}Newton.cu)
target_link_libraries(example_cusignm_${x}Newton PUBLIC cusignm)
Expand Down
19 changes: 17 additions & 2 deletions cusignm_halley.cu
Original file line number Diff line number Diff line change
Expand Up @@ -272,10 +272,17 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
*-----------------------------------------------------------------------------*/
// workspace query for LU factorization
CHECK_CUSOLVER(cusolverDnXgetrf_bufferSize(cusolverH, params, n, n, cusignm_traits<T>::dataType, Stmp, n, cusignm_traits<T>::computeType, &lworkdevice, &lworkhost));

// compute LU factorization
CHECK_CUSOLVER(cusolverDnXgetrf(cusolverH, params, n, n, cusignm_traits<T>::dataType, Stmp, n, d_ipiv, cusignm_traits<T>::computeType, d_work, lworkdevice, h_work, lworkhost, d_info));
// solve the linear system
// int d_info_h;
// CHECK_CUDA(cudaMemcpy(&d_info_h, d_info, sizeof(int), cudaMemcpyDeviceToHost));
// printf("iter=%d, d_info=%d\n", iter, d_info_h);

// solve the linear system
CHECK_CUSOLVER(cusolverDnXgetrs(cusolverH, params, CUBLAS_OP_N, n, n, cusignm_traits<T>::dataType, Stmp, n, d_ipiv, cusignm_traits<T>::computeType, Stmp2, n, d_info));
// CHECK_CUDA(cudaMemcpy(&d_info_h, d_info, sizeof(int), cudaMemcpyDeviceToHost));
// printf("iter=%d, d_info=%d\n", iter, d_info_h);

/*-----------------------------------------------------------------------------
* update S as S <- Sold*(I + c * Sold*Sold)\(a * I + b * Sold*Sold)
Expand All @@ -288,7 +295,7 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
Scalar diffSSold, nrmS;
CHECK_CUSIGNM(cusignm_diffnormFro(n, n, S, Sold, &diffSSold));
CHECK_CUSIGNM(cusignm_normFro(n, n, S, &nrmS));
printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
// printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);

/*-----------------------------------------------------------------------------
* stopping criteria
Expand All @@ -298,6 +305,14 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
break;
}

// NaN detected
if (isnan(diffSSold) || isnan(nrmS)) {
fprintf(stderr, "%s-%s:%d no convergence - NaN detected\n", __func__, __FILE__, __LINE__);
fflush(stderr);
ret = -1;
break;
}

// maximum number of iterations reached
if (iter == maxiter) {
fprintf(stderr, "%s-%s:%d no convergence - maximum number of iterations reached\n", __func__, __FILE__, __LINE__);
Expand Down
9 changes: 8 additions & 1 deletion cusignm_newton.cu
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ static int cusignm_Newton(const int n, const T *A, void *d_buffer, void *h_buffe
Scalar diffSSold, nrmS;
CHECK_CUSIGNM(cusignm_diffnormFro(n, n, S, Sold, &diffSSold));
CHECK_CUSIGNM(cusignm_normFro(n, n, S, &nrmS));
printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
// printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);

/*-----------------------------------------------------------------------------
* stopping criteria
Expand All @@ -237,6 +237,13 @@ static int cusignm_Newton(const int n, const T *A, void *d_buffer, void *h_buffe
break;
}

if (isnan(diffSSold) || isnan(nrmS)) {
fprintf(stderr, "%s-%s:%d no convergence - NaN detected\n", __func__, __FILE__, __LINE__);
fflush(stderr);
ret = -1;
break;
}

// maximum number of iterations reached
if (iter == maxiter) {
fprintf(stderr, "%s-%s:%d no convergence - maximum number of iterations reached\n", __func__, __FILE__, __LINE__);
Expand Down
108 changes: 108 additions & 0 deletions example_cusignm_cHalley.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/* MIT License
*
* Copyright (c) 2024 Maximilian Behr
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#include <stdio.h>
#include <stdlib.h>

#include "cusignm.h"

int main(void) {
/*-----------------------------------------------------------------------------
* variables
*-----------------------------------------------------------------------------*/
int ret = 0; // return value
const int n = 10; // size of the input matrix A n-by-n
cuComplex *A, *S; // input matrix and sign matrix
void *d_buffer = NULL; // device buffer
void *h_buffer = NULL; // host buffer

/*-----------------------------------------------------------------------------
* allocate A, Q and H
*-----------------------------------------------------------------------------*/
cudaMallocManaged((void **)&A, sizeof(*A) * n * n);
cudaMallocManaged((void **)&S, sizeof(*S) * n * n);

/*-----------------------------------------------------------------------------
* create a random matrix A
*-----------------------------------------------------------------------------*/
srand(0);
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX});
}
}

/*-----------------------------------------------------------------------------
* perform a workspace query and allocate memory buffer on the host and device
*-----------------------------------------------------------------------------*/
size_t d_bufferSize = 0, h_bufferSize = 0;
cusignm_cHalleyBufferSize(n, &d_bufferSize, &h_bufferSize);

if (d_bufferSize > 0) {
cudaMalloc((void **)&d_buffer, d_bufferSize);
}

if (h_bufferSize > 0) {
h_buffer = malloc(h_bufferSize);
}

/*-----------------------------------------------------------------------------
* compute Sign Function S = sign(A)
*-----------------------------------------------------------------------------*/
cusignm_cHalley(n, A, d_buffer, h_buffer, S);

/*-----------------------------------------------------------------------------
* check sign function ||S^2 - I||_F
*-----------------------------------------------------------------------------*/
float fronrmdiff = 0.0f;
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
cuComplex sum = cuComplex{0.0f, 0.0f};
for (int k = 0; k < n; ++k) {
sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n]));
}
if (i == j) {
sum = cuCsubf(sum, cuComplex{1.0f, 0.0f});
}
float diff = cuCabsf(sum);
fronrmdiff += diff * diff;
}
}
float error = sqrtf(fronrmdiff / sqrtf((float)n));
printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error);
if (error < 1e-4) {
printf("Sign Function Approximation successful\n");
} else {
printf("Sign Function Approximation failed\n");
ret = 1;
}

/*-----------------------------------------------------------------------------
* clear memory
*-----------------------------------------------------------------------------*/
cudaFree(A);
cudaFree(S);
cudaFree(d_buffer);
free(h_buffer);
return ret;
}
108 changes: 108 additions & 0 deletions example_cusignm_cNewton.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/* MIT License
*
* Copyright (c) 2024 Maximilian Behr
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#include <stdio.h>
#include <stdlib.h>

#include "cusignm.h"

int main(void) {
/*-----------------------------------------------------------------------------
* variables
*-----------------------------------------------------------------------------*/
int ret = 0; // return value
const int n = 10; // size of the input matrix A n-by-n
cuComplex *A, *S; // input matrix and sign matrix
void *d_buffer = NULL; // device buffer
void *h_buffer = NULL; // host buffer

/*-----------------------------------------------------------------------------
* allocate A, Q and H
*-----------------------------------------------------------------------------*/
cudaMallocManaged((void **)&A, sizeof(*A) * n * n);
cudaMallocManaged((void **)&S, sizeof(*S) * n * n);

/*-----------------------------------------------------------------------------
* create a random matrix A
*-----------------------------------------------------------------------------*/
srand(0);
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX});
}
}

/*-----------------------------------------------------------------------------
* perform a workspace query and allocate memory buffer on the host and device
*-----------------------------------------------------------------------------*/
size_t d_bufferSize = 0, h_bufferSize = 0;
cusignm_cNewtonBufferSize(n, &d_bufferSize, &h_bufferSize);

if (d_bufferSize > 0) {
cudaMalloc((void **)&d_buffer, d_bufferSize);
}

if (h_bufferSize > 0) {
h_buffer = malloc(h_bufferSize);
}

/*-----------------------------------------------------------------------------
* compute Sign Function S = sign(A)
*-----------------------------------------------------------------------------*/
cusignm_cNewton(n, A, d_buffer, h_buffer, S);

/*-----------------------------------------------------------------------------
* check sign function ||S^2 - I||_F
*-----------------------------------------------------------------------------*/
float fronrmdiff = 0.0f;
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
cuComplex sum = cuComplex{0.0f, 0.0f};
for (int k = 0; k < n; ++k) {
sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n]));
}
if (i == j) {
sum = cuCsubf(sum, cuComplex{1.0f, 0.0f});
}
float diff = cuCabsf(sum);
fronrmdiff += diff * diff;
}
}
float error = sqrtf(fronrmdiff / sqrtf((float)n));
printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error);
if (error < 1e-4) {
printf("Sign Function Approximation successful\n");
} else {
printf("Sign Function Approximation failed\n");
ret = 1;
}

/*-----------------------------------------------------------------------------
* clear memory
*-----------------------------------------------------------------------------*/
cudaFree(A);
cudaFree(S);
cudaFree(d_buffer);
free(h_buffer);
return ret;
}
2 changes: 1 addition & 1 deletion example_cusignm_dHalley.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ int main(void) {
* variables
*-----------------------------------------------------------------------------*/
int ret = 0; // return value
const int n = 100; // size of the input matrix A n-by-n
const int n = 10; // size of the input matrix A n-by-n
double *A, *S; // input matrix and sign matrix
void *d_buffer = NULL; // device buffer
void *h_buffer = NULL; // host buffer
Expand Down
2 changes: 1 addition & 1 deletion example_cusignm_dNewton.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ int main(void) {
* variables
*-----------------------------------------------------------------------------*/
int ret = 0; // return value
const int n = 100; // size of the input matrix A n-by-n
const int n = 10; // size of the input matrix A n-by-n
double *A, *S; // input matrix and sign matrix
void *d_buffer = NULL; // device buffer
void *h_buffer = NULL; // host buffer
Expand Down
4 changes: 2 additions & 2 deletions example_cusignm_sHalley.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ int main(void) {
* variables
*-----------------------------------------------------------------------------*/
int ret = 0; // return value
const int n = 100; // size of the input matrix A n-by-n
const int n = 10; // size of the input matrix A n-by-n
float *A, *S; // input matrix and sign matrix
void *d_buffer = NULL; // device buffer
void *h_buffer = NULL; // host buffer
Expand Down Expand Up @@ -84,7 +84,7 @@ int main(void) {
if (i == j) {
sum -= 1.0f;
}
sum = absf(sum);
sum = fabs(sum);
fronrmdiff += sum * sum;
}
}
Expand Down
Loading

0 comments on commit 97d5686

Please sign in to comment.