xref: /petsc/src/mat/tests/ex70.c (revision 5aae2c7a20a4b66abfbf84daf10ee40366c74d20)
1 #include <petscmat.h>
2 
3 static char help[] = "Tests MatMatMult with MAT_REUSE_MATRIX and already allocated dense result.\n\n";
4 
5 static PetscErrorCode CheckLocal(Mat A, Mat B, PetscScalar *a, PetscScalar *b)
6 {
7   PetscErrorCode ierr;
8   PetscBool      wA = PETSC_FALSE, wB = PETSC_FALSE;
9 
10   PetscFunctionBegin;
11   if (a) {
12     const PetscScalar *Aa;
13     ierr = MatDenseGetArrayRead(A,&Aa);CHKERRQ(ierr);
14     wA   = (PetscBool)(a != Aa);
15     ierr = MatDenseRestoreArrayRead(A,&Aa);CHKERRQ(ierr);
16   }
17   if (b) {
18     const PetscScalar *Bb;
19     ierr = MatDenseGetArrayRead(B,&Bb);CHKERRQ(ierr);
20     wB   = (PetscBool)(b != Bb);
21     ierr = MatDenseRestoreArrayRead(B,&Bb);CHKERRQ(ierr);
22   }
23   if (wA || wB) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong array in first Mat? %d, Wrong array in second Mat? %d",wA,wB);
24   PetscFunctionReturn(0);
25 }
26 
27 int main(int argc,char **args)
28 {
29   Mat            X,B,A,T,T2;
30   PetscInt       m,n,k,M = 10,N = 10,K = 5, ldx = 3, ldb = 5;
31   const char     *deft = MATAIJ;
32   char           mattype[256];
33   PetscBool      flg,symm = PETSC_FALSE,testtt = PETSC_TRUE, testnest = PETSC_TRUE, testtranspose = PETSC_TRUE, testcircular = PETSC_FALSE, local = PETSC_TRUE;
34   PetscBool      xgpu = PETSC_FALSE,bgpu = PETSC_FALSE;
35   PetscScalar    *dataX = NULL,*dataB = NULL;
36   PetscScalar    *aX,*aB;
37   PetscReal      err;
38   PetscErrorCode ierr;
39 
40   ierr = PetscInitialize(&argc,&args,NULL,help);if (ierr) return ierr;
41   ierr = PetscOptionsGetInt(NULL,NULL,"-N",&N,NULL);CHKERRQ(ierr);
42   ierr = PetscOptionsGetInt(NULL,NULL,"-M",&M,NULL);CHKERRQ(ierr);
43   ierr = PetscOptionsGetInt(NULL,NULL,"-K",&K,NULL);CHKERRQ(ierr);
44   ierr = PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);CHKERRQ(ierr);
45   ierr = PetscOptionsGetBool(NULL,NULL,"-local",&local,NULL);CHKERRQ(ierr);
46   ierr = PetscOptionsGetInt(NULL,NULL,"-ldx",&ldx,NULL);CHKERRQ(ierr);
47   ierr = PetscOptionsGetInt(NULL,NULL,"-ldb",&ldb,NULL);CHKERRQ(ierr);
48   ierr = PetscOptionsGetBool(NULL,NULL,"-testtranspose",&testtranspose,NULL);CHKERRQ(ierr);
49   ierr = PetscOptionsGetBool(NULL,NULL,"-testnest",&testnest,NULL);CHKERRQ(ierr);
50   ierr = PetscOptionsGetBool(NULL,NULL,"-testtt",&testtt,NULL);CHKERRQ(ierr);
51   ierr = PetscOptionsGetBool(NULL,NULL,"-testcircular",&testcircular,NULL);CHKERRQ(ierr);
52   ierr = PetscOptionsGetBool(NULL,NULL,"-xgpu",&xgpu,NULL);CHKERRQ(ierr);
53   ierr = PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);CHKERRQ(ierr);
54 #if defined(PETSC_USE_COMPLEX)
55   testtranspose = PETSC_FALSE;
56   testtt = PETSC_FALSE;
57 #endif
58   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
59   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
60   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
61   ierr = MatSetUp(A);CHKERRQ(ierr);
62   ierr = MatSetRandom(A,NULL);CHKERRQ(ierr);
63   if (M==N && symm) {
64     Mat AT;
65 
66     ierr = MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
67     ierr = MatAXPY(A,1.0,AT,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
68     ierr = MatDestroy(&AT);CHKERRQ(ierr);
69     ierr = MatSetOption(A,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
70   }
71   ierr = MatViewFromOptions(A,NULL,"-A_init_view");CHKERRQ(ierr);
72   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,"","","");CHKERRQ(ierr);
73   ierr = PetscOptionsFList("-A_mat_type","Matrix type","MatSetType",MatList,deft,mattype,256,&flg);CHKERRQ(ierr);
74   ierr = PetscOptionsEnd();CHKERRQ(ierr);
75   if (flg) {
76     Mat A2;
77 
78     /* MATSEQAIJCUSPARSE does not support MAT_INITIAL_MATRIX */
79     ierr = MatDuplicate(A,MAT_COPY_VALUES,&A2);CHKERRQ(ierr);
80     ierr = MatConvert(A,mattype,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
81     ierr = MatMultEqual(A,A2,10,&flg);CHKERRQ(ierr);
82     if (!flg) {
83       Mat AE,A2E;
84 
85       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with convert\n");CHKERRQ(ierr);
86       ierr = MatComputeOperator(A,MATDENSE,&AE);CHKERRQ(ierr);
87       ierr = MatComputeOperator(A2,MATDENSE,&A2E);CHKERRQ(ierr);
88       ierr = MatView(AE,NULL);CHKERRQ(ierr);
89       ierr = MatView(A2E,NULL);CHKERRQ(ierr);
90       ierr = MatAXPY(A2E,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
91       ierr = MatView(A2E,NULL);CHKERRQ(ierr);
92       ierr = MatDestroy(&A2E);CHKERRQ(ierr);
93       ierr = MatDestroy(&AE);CHKERRQ(ierr);
94     }
95     ierr = MatDestroy(&A2);CHKERRQ(ierr);
96   }
97   ierr = MatViewFromOptions(A,NULL,"-A_view");CHKERRQ(ierr);
98 
99   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
100   if (local) {
101     ierr = PetscMalloc1(PetscMax((m+ldx)*K,1),&dataX);CHKERRQ(ierr);
102     ierr = PetscMalloc1(PetscMax((n+ldb)*K,1),&dataB);CHKERRQ(ierr);
103   }
104   ierr = MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,K,dataB,&B);CHKERRQ(ierr);
105   ierr = MatCreateDense(PETSC_COMM_WORLD,m,PETSC_DECIDE,M,K,dataX,&X);CHKERRQ(ierr);
106 
107   /* store pointer to dense data for testing */
108   ierr = MatDenseGetArrayRead(B,(const PetscScalar**)&dataB);CHKERRQ(ierr);
109   ierr = MatDenseGetArrayRead(X,(const PetscScalar**)&dataX);CHKERRQ(ierr);
110   aX   = dataX;
111   aB   = dataB;
112   ierr = MatDenseRestoreArrayRead(B,(const PetscScalar**)&dataB);CHKERRQ(ierr);
113   ierr = MatDenseRestoreArrayRead(X,(const PetscScalar**)&dataX);CHKERRQ(ierr);
114   if (local) {
115     dataX = aX;
116     dataB = aB;
117   }
118   ierr = MatSetRandom(B,NULL);CHKERRQ(ierr);
119   /* convert to CUDA if needed */
120   if (bgpu) {
121     ierr = MatConvert(B,MATDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
122   }
123   if (xgpu) {
124     ierr = MatAssemblyBegin(X,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
125     ierr = MatAssemblyEnd(X,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
126     ierr = MatConvert(X,MATDENSECUDA,MAT_INPLACE_MATRIX,&X);CHKERRQ(ierr);
127   }
128   ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
129 
130   /* Test reusing a previously allocated dense buffer */
131   ierr = MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
132   ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
133   ierr = MatMatMultEqual(A,B,X,10,&flg);CHKERRQ(ierr);
134   if (!flg) {
135     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage\n");CHKERRQ(ierr);
136     ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
137     ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
138     ierr = MatView(T,NULL);CHKERRQ(ierr);
139     ierr = MatDestroy(&T);CHKERRQ(ierr);
140   }
141 
142   /* Test MatDenseGetColumnVec and friends */
143   ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
144   ierr = MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&T2);CHKERRQ(ierr);
145   for (k=0;k<K;k++) {
146     Vec Xv,Tv,T2v;
147 
148     ierr = MatDenseGetColumnVecRead(X,k,&Xv);CHKERRQ(ierr);
149     ierr = MatDenseGetColumnVec(T,k,&Tv);CHKERRQ(ierr);
150     ierr = MatDenseGetColumnVecWrite(T2,k,&T2v);CHKERRQ(ierr);
151     ierr = VecCopy(Xv,T2v);CHKERRQ(ierr);
152     ierr = VecAXPY(Tv,-1.,Xv);CHKERRQ(ierr);
153     ierr = MatDenseRestoreColumnVecRead(X,k,&Xv);CHKERRQ(ierr);
154     ierr = MatDenseRestoreColumnVec(T,k,&Tv);CHKERRQ(ierr);
155     ierr = MatDenseRestoreColumnVecWrite(T2,k,&T2v);CHKERRQ(ierr);
156   }
157   ierr = MatNorm(T,NORM_FROBENIUS,&err);CHKERRQ(ierr);
158   if (err > PETSC_SMALL) {
159     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetColumnVec\n");CHKERRQ(ierr);
160     ierr = MatView(T,NULL);CHKERRQ(ierr);
161   }
162   ierr = MatAXPY(T2,-1.,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
163   ierr = MatNorm(T2,NORM_FROBENIUS,&err);CHKERRQ(ierr);
164   if (err > PETSC_SMALL) {
165     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetColumnVecWrite\n");CHKERRQ(ierr);
166     ierr = MatView(T2,NULL);CHKERRQ(ierr);
167   }
168   ierr = MatDestroy(&T);CHKERRQ(ierr);
169   ierr = MatDestroy(&T2);CHKERRQ(ierr);
170 
171   /* Test with MatShell */
172   ierr = MatConvert(A,MATSHELL,MAT_INITIAL_MATRIX,&T2);CHKERRQ(ierr);
173   ierr = MatMatMult(T2,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
174   ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
175   ierr = MatMatMultEqual(T2,B,X,10,&flg);CHKERRQ(ierr);
176   if (!flg) {
177     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MATSHELL)\n");CHKERRQ(ierr);
178     ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
179     ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
180     ierr = MatView(T,NULL);CHKERRQ(ierr);
181     ierr = MatDestroy(&T);CHKERRQ(ierr);
182   }
183   ierr = MatTransposeMatMult(T2,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
184   ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
185   ierr = MatTransposeMatMultEqual(T2,X,B,10,&flg);CHKERRQ(ierr);
186   if (!flg) {
187     ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MatTranspose, MATSHELL)\n");CHKERRQ(ierr);
188   }
189   ierr = MatDestroy(&T2);CHKERRQ(ierr);
190 
191   if (testnest) { /* test with MatNest */
192     Mat        NA;
193     const char *vtype;
194 
195     ierr = MatCreateNest(PETSC_COMM_WORLD,1,NULL,1,NULL,&A,&NA);CHKERRQ(ierr);
196     /* needed to test against CUSPARSE matrices */
197     ierr = MatGetVecType(A,&vtype);CHKERRQ(ierr);
198     ierr = MatSetVecType(NA,vtype);CHKERRQ(ierr);
199     ierr = MatViewFromOptions(NA,NULL,"-NA_view");CHKERRQ(ierr);
200     ierr = MatMatMult(NA,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
201     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
202     ierr = MatMatMultEqual(NA,B,X,10,&flg);CHKERRQ(ierr);
203     if (!flg) {
204       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Nest\n");CHKERRQ(ierr);
205       ierr = MatMatMult(NA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
206       ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
207       ierr = MatView(T,NULL);CHKERRQ(ierr);
208       ierr = MatDestroy(&T);CHKERRQ(ierr);
209     }
210     ierr = MatDestroy(&NA);CHKERRQ(ierr);
211   }
212 
213   if (testtranspose) { /* test with Transpose */
214     Mat TA;
215 
216     ierr = MatCreateHermitianTranspose(A,&TA);CHKERRQ(ierr);
217     ierr = MatMatMult(TA,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
218     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
219     ierr = MatMatMultEqual(TA,X,B,10,&flg);CHKERRQ(ierr);
220     if (!flg) {
221       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose\n");CHKERRQ(ierr);
222       ierr = MatMatMult(TA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
223       ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
224       ierr = MatView(T,NULL);CHKERRQ(ierr);
225       ierr = MatDestroy(&T);CHKERRQ(ierr);
226     }
227     ierr = MatDestroy(&TA);CHKERRQ(ierr);
228   }
229 
230   if (testtt) { /* test with Transpose(Transpose) */
231     Mat TA, TTA;
232 
233     ierr = MatCreateHermitianTranspose(A,&TA);CHKERRQ(ierr);
234     ierr = MatCreateHermitianTranspose(TA,&TTA);CHKERRQ(ierr);
235     ierr = MatMatMult(TTA,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
236     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
237     ierr = MatMatMultEqual(TTA,B,X,10,&flg);CHKERRQ(ierr);
238     if (!flg) {
239       ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose(Transpose)\n");CHKERRQ(ierr);
240       ierr = MatMatMult(TTA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr);
241       ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
242       ierr = MatView(T,NULL);CHKERRQ(ierr);
243       ierr = MatDestroy(&T);CHKERRQ(ierr);
244     }
245     ierr = MatDestroy(&TA);CHKERRQ(ierr);
246     ierr = MatDestroy(&TTA);CHKERRQ(ierr);
247   }
248 
249   if (testcircular) { /* test circular */
250     Mat AB;
251 
252     ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AB);CHKERRQ(ierr);
253     ierr = MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
254     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
255     if (M == N && N == K) {
256       ierr = MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
257     } else {
258       ierr = MatTransposeMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
259     }
260     ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr);
261     ierr = MatDestroy(&AB);CHKERRQ(ierr);
262   }
263   ierr = MatDestroy(&X);CHKERRQ(ierr);
264   ierr = MatDestroy(&B);CHKERRQ(ierr);
265   ierr = MatDestroy(&A);CHKERRQ(ierr);
266   ierr = PetscFree(dataX);CHKERRQ(ierr);
267   ierr = PetscFree(dataB);CHKERRQ(ierr);
268   ierr = PetscFinalize();
269   return ierr;
270 }
271 
272 /*TEST
273 
274   test:
275     suffix: 1
276     args: -local {{0 1}}
277 
278   test:
279     output_file: output/ex70_1.out
280     requires: cuda
281     suffix: 1_cuda
282     args: -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type {{seqaijcusparse seqaij}} -testnest 0
283 
284   test:
285     TODO: VecGetSubVector seems broken with CUDA
286     output_file: output/ex70_1.out
287     requires: cuda
288     suffix: 1_cuda_broken
289     args: -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type seqaijcusparse -testnest
290 
291   test:
292     output_file: output/ex70_1.out
293     nsize: 2
294     suffix: 1_par
295     args: -testtranspose 0 -local {{0 1}}
296 
297   test:
298     output_file: output/ex70_1.out
299     requires: cuda
300     nsize: 2
301     suffix: 1_par_cuda
302     args: -testtranspose 0 -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type {{mpiaijcusparse mpiaij}} -testnest 0
303 
304   test:
305     TODO: MPIAIJ x MPIDENSE broken for MatTransposeMatMult
306     output_file: output/ex70_1.out
307     nsize: 2
308     suffix: 1_par_broken
309     args: -testtranspose -local {{0 1}}
310 
311   test:
312     output_file: output/ex70_1.out
313     suffix: 2
314     nsize: 1
315     args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}}
316 
317   test:
318     requires: cuda
319     output_file: output/ex70_1.out
320     suffix: 2_cuda
321     nsize: 1
322     args: -M 7 -N 9 -K 2 -local {{0 1}} -testnest 0 -A_mat_type {{seqdensecuda seqdense}} -xgpu {{0 1}} -bgpu {{0 1}}
323 
324   test:
325     output_file: output/ex70_1.out
326     suffix: 2_par
327     nsize: 2
328     args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}} -testcircular 0
329 
330   test:
331     requires: cuda
332     output_file: output/ex70_1.out
333     suffix: 2_par_cuda
334     nsize: 2
335     args: -M 11 -N 9 -K 1 -local {{0 1}} -testcircular 0 -A_mat_type mpiaijcusparse -xgpu -bgpu -testnest 0
336 
337   test:
338     TODO: MatTransposeMatMultSymbolic_SeqAIJ_SeqDense plays with the destroy routine
339     output_file: output/ex70_1.out
340     suffix: 2_broken
341     nsize: 2
342     args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}} -testcircular 1
343 
344   test:
345     output_file: output/ex70_1.out
346     suffix: 3
347     nsize: {{1 3}}
348     args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type sbaij -symm -testtranspose 0
349 
350   test:
351     output_file: output/ex70_1.out
352     suffix: 4
353     nsize: 1
354     args: -M 3 -N 3 -K 3 -local {{0 1}} -testcircular
355 
356   test:
357     output_file: output/ex70_1.out
358     suffix: 5
359     nsize: {{2 4}}
360     args: -M 3 -N 3 -K 3 -local 1 -testcircular -testtranspose 0
361 
362   test:
363     TODO: MatCreateDense broken with some processors not having local rows
364     output_file: output/ex70_1.out
365     suffix: 5_broken
366     nsize: {{2 4}}
367     args: -M 3 -N 3 -K 3 -local 0 -testcircular -testtranspose 0
368 
369   test:
370     output_file: output/ex70_1.out
371     suffix: 6
372     nsize: 1
373     args: -M {{1 3}} -N {{2 5}} -K {{1 2}} -local {{0 1}} -testcircular -testtranspose 0
374 
375   test:
376     TODO: MatTransposeMatMultSymbolic_SeqAIJ_SeqDense plays with the destroy routine
377     output_file: output/ex70_1.out
378     suffix: 6_broken
379     nsize: 1
380     args: -M {{1 3}} -N {{2 5}} -K {{1 2}} -local {{0 1}} -testcircular -testtranspose
381 
382   test:
383     output_file: output/ex70_1.out
384     suffix: 7
385     nsize: 1
386     args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type dense -testnest 0 -testcircular
387 
388   test:
389     TODO: NEST x DENSE with dense nested matrices seems broken in this case
390     output_file: output/ex70_1.out
391     suffix: 7_broken
392     nsize: 1
393     args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type dense -testnest -testcircular
394 TEST*/
395