Actual source code: fninvsqrt.c
slepc-3.18.2 2023-01-26
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Inverse square root function x^(-1/2)
12: */
14: #include <slepc/private/fnimpl.h>
15: #include <slepcblaslapack.h>
17: PetscErrorCode FNEvaluateFunction_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
18: {
20: #if !defined(PETSC_USE_COMPLEX)
22: #endif
23: *y = 1.0/PetscSqrtScalar(x);
24: return 0;
25: }
27: PetscErrorCode FNEvaluateDerivative_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
28: {
30: #if !defined(PETSC_USE_COMPLEX)
32: #endif
33: *y = -1.0/(2.0*PetscPowScalarReal(x,1.5));
34: return 0;
35: }
37: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Schur(FN fn,Mat A,Mat B)
38: {
39: PetscBLASInt n=0,ld,*ipiv,info;
40: PetscScalar *Ba,*Wa;
41: PetscInt m;
42: Mat W;
44: FN_AllocateWorkMat(fn,A,&W);
45: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
46: MatDenseGetArray(B,&Ba);
47: MatDenseGetArray(W,&Wa);
48: /* compute B = sqrtm(A) */
49: MatGetSize(A,&m,NULL);
50: PetscBLASIntCast(m,&n);
51: ld = n;
52: FNSqrtmSchur(fn,n,Ba,n,PETSC_FALSE);
53: /* compute B = A\B */
54: PetscMalloc1(ld,&ipiv);
55: PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
56: SlepcCheckLapackInfo("gesv",info);
57: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
58: PetscFree(ipiv);
59: MatDenseRestoreArray(W,&Wa);
60: MatDenseRestoreArray(B,&Ba);
61: FN_FreeWorkMat(fn,&W);
62: return 0;
63: }
65: PetscErrorCode FNEvaluateFunctionMatVec_Invsqrt_Schur(FN fn,Mat A,Vec v)
66: {
67: PetscBLASInt n=0,ld,*ipiv,info,one=1;
68: PetscScalar *Ba,*Wa;
69: PetscInt m;
70: Mat B,W;
72: FN_AllocateWorkMat(fn,A,&B);
73: FN_AllocateWorkMat(fn,A,&W);
74: MatDenseGetArray(B,&Ba);
75: MatDenseGetArray(W,&Wa);
76: /* compute B_1 = sqrtm(A)*e_1 */
77: MatGetSize(A,&m,NULL);
78: PetscBLASIntCast(m,&n);
79: ld = n;
80: FNSqrtmSchur(fn,n,Ba,n,PETSC_TRUE);
81: /* compute B_1 = A\B_1 */
82: PetscMalloc1(ld,&ipiv);
83: PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&one,Wa,&ld,ipiv,Ba,&ld,&info));
84: SlepcCheckLapackInfo("gesv",info);
85: PetscFree(ipiv);
86: MatDenseRestoreArray(W,&Wa);
87: MatDenseRestoreArray(B,&Ba);
88: MatGetColumnVector(B,v,0);
89: FN_FreeWorkMat(fn,&W);
90: FN_FreeWorkMat(fn,&B);
91: return 0;
92: }
94: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP(FN fn,Mat A,Mat B)
95: {
96: PetscBLASInt n=0;
97: PetscScalar *T;
98: PetscInt m;
100: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
101: MatDenseGetArray(B,&T);
102: MatGetSize(A,&m,NULL);
103: PetscBLASIntCast(m,&n);
104: FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_TRUE);
105: MatDenseRestoreArray(B,&T);
106: return 0;
107: }
109: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS(FN fn,Mat A,Mat B)
110: {
111: PetscBLASInt n=0;
112: PetscScalar *T;
113: PetscInt m;
115: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
116: MatDenseGetArray(B,&T);
117: MatGetSize(A,&m,NULL);
118: PetscBLASIntCast(m,&n);
119: FNSqrtmNewtonSchulz(fn,n,T,n,PETSC_TRUE);
120: MatDenseRestoreArray(B,&T);
121: return 0;
122: }
124: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi(FN fn,Mat A,Mat B)
125: {
126: PetscBLASInt n=0,ld,*ipiv,info;
127: PetscScalar *Ba,*Wa;
128: PetscInt m;
129: Mat W;
131: FN_AllocateWorkMat(fn,A,&W);
132: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
133: MatDenseGetArray(B,&Ba);
134: MatDenseGetArray(W,&Wa);
135: /* compute B = sqrtm(A) */
136: MatGetSize(A,&m,NULL);
137: PetscBLASIntCast(m,&n);
138: ld = n;
139: FNSqrtmSadeghi(fn,n,Ba,n);
140: /* compute B = A\B */
141: PetscMalloc1(ld,&ipiv);
142: PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
143: SlepcCheckLapackInfo("gesv",info);
144: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
145: PetscFree(ipiv);
146: MatDenseRestoreArray(W,&Wa);
147: MatDenseRestoreArray(B,&Ba);
148: FN_FreeWorkMat(fn,&W);
149: return 0;
150: }
152: #if defined(PETSC_HAVE_CUDA)
153: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS_CUDA(FN fn,Mat A,Mat B)
154: {
155: PetscBLASInt n=0;
156: PetscScalar *Ba;
157: PetscInt m;
159: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
160: MatDenseCUDAGetArray(B,&Ba);
161: MatGetSize(A,&m,NULL);
162: PetscBLASIntCast(m,&n);
163: FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_TRUE);
164: MatDenseCUDARestoreArray(B,&Ba);
165: return 0;
166: }
168: #if defined(PETSC_HAVE_MAGMA)
169: #include <slepcmagma.h>
171: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm(FN fn,Mat A,Mat B)
172: {
173: PetscBLASInt n=0;
174: PetscScalar *T;
175: PetscInt m;
177: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
178: MatDenseCUDAGetArray(B,&T);
179: MatGetSize(A,&m,NULL);
180: PetscBLASIntCast(m,&n);
181: FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_TRUE);
182: MatDenseCUDARestoreArray(B,&T);
183: return 0;
184: }
186: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B)
187: {
188: PetscBLASInt n=0,ld,*ipiv;
189: PetscScalar *Ba,*Wa;
190: PetscInt m;
191: Mat W;
193: FN_AllocateWorkMat(fn,A,&W);
194: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
195: MatDenseCUDAGetArray(B,&Ba);
196: MatDenseCUDAGetArray(W,&Wa);
197: /* compute B = sqrtm(A) */
198: MatGetSize(A,&m,NULL);
199: PetscBLASIntCast(m,&n);
200: ld = n;
201: FNSqrtmSadeghi_CUDAm(fn,n,Ba,n);
202: /* compute B = A\B */
203: SlepcMagmaInit();
204: PetscMalloc1(ld,&ipiv);
205: PetscCallMAGMA(magma_xgesv_gpu,n,n,Wa,ld,ipiv,Ba,ld);
206: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
207: PetscFree(ipiv);
208: MatDenseCUDARestoreArray(W,&Wa);
209: MatDenseCUDARestoreArray(B,&Ba);
210: FN_FreeWorkMat(fn,&W);
211: return 0;
212: }
213: #endif /* PETSC_HAVE_MAGMA */
214: #endif /* PETSC_HAVE_CUDA */
216: PetscErrorCode FNView_Invsqrt(FN fn,PetscViewer viewer)
217: {
218: PetscBool isascii;
219: char str[50];
220: const char *methodname[] = {
221: "Schur method for inv(A)*sqrtm(A)",
222: "Denman-Beavers (product form)",
223: "Newton-Schulz iteration",
224: "Sadeghi iteration"
225: };
226: const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
228: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
229: if (isascii) {
230: if (fn->beta==(PetscScalar)1.0) {
231: if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer," inverse square root: x^(-1/2)\n");
232: else {
233: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
234: PetscViewerASCIIPrintf(viewer," inverse square root: (%s*x)^(-1/2)\n",str);
235: }
236: } else {
237: SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
238: if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer," inverse square root: %s*x^(-1/2)\n",str);
239: else {
240: PetscViewerASCIIPrintf(viewer," inverse square root: %s",str);
241: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
242: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
243: PetscViewerASCIIPrintf(viewer,"*(%s*x)^(-1/2)\n",str);
244: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
245: }
246: }
247: if (fn->method<nmeth) PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]);
248: }
249: return 0;
250: }
252: SLEPC_EXTERN PetscErrorCode FNCreate_Invsqrt(FN fn)
253: {
254: fn->ops->evaluatefunction = FNEvaluateFunction_Invsqrt;
255: fn->ops->evaluatederivative = FNEvaluateDerivative_Invsqrt;
256: fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Invsqrt_Schur;
257: fn->ops->evaluatefunctionmat[1] = FNEvaluateFunctionMat_Invsqrt_DBP;
258: fn->ops->evaluatefunctionmat[2] = FNEvaluateFunctionMat_Invsqrt_NS;
259: fn->ops->evaluatefunctionmat[3] = FNEvaluateFunctionMat_Invsqrt_Sadeghi;
260: #if defined(PETSC_HAVE_CUDA)
261: fn->ops->evaluatefunctionmatcuda[2] = FNEvaluateFunctionMat_Invsqrt_NS_CUDA;
262: #if defined(PETSC_HAVE_MAGMA)
263: fn->ops->evaluatefunctionmatcuda[1] = FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm;
264: fn->ops->evaluatefunctionmatcuda[3] = FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm;
265: #endif /* PETSC_HAVE_MAGMA */
266: #endif /* PETSC_HAVE_CUDA */
267: fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Invsqrt_Schur;
268: fn->ops->view = FNView_Invsqrt;
269: return 0;
270: }