xref: /petsc/src/mat/impls/aij/seq/matrart.c (revision 3fc23ce8277e00a65c4b1be6e2b4535b56ba7978)
1 
2 /*
3   Defines projective product routines where A is a SeqAIJ matrix
4           C = R * A * R^T
5 */
6 
7 #include <../src/mat/impls/aij/seq/aij.h>
8 #include <../src/mat/utils/freespace.h>
9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
10 
11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
12 {
13   Mat_RARt       *rart = (Mat_RARt*)data;
14 
15   PetscFunctionBegin;
16   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
17   PetscCall(MatDestroy(&rart->Rt));
18   PetscCall(MatDestroy(&rart->RARt));
19   PetscCall(MatDestroy(&rart->ARt));
20   PetscCall(PetscFree(rart->work));
21   if (rart->destroy) {
22     PetscCall((*rart->destroy)(rart->data));
23   }
24   PetscCall(PetscFree(rart));
25   PetscFunctionReturn(0);
26 }
27 
28 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C)
29 {
30   Mat                  P;
31   PetscInt             *rti,*rtj;
32   Mat_RARt             *rart;
33   MatColoring          coloring;
34   MatTransposeColoring matcoloring;
35   ISColoring           iscoloring;
36   Mat                  Rt_dense,RARt_dense;
37 
38   PetscFunctionBegin;
39   MatCheckProduct(C,4);
40   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
41   /* create symbolic P=Rt */
42   PetscCall(MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj));
43   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,NULL,&P));
44 
45   /* get symbolic C=Pt*A*P */
46   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C));
47   PetscCall(MatSetBlockSizes(C,PetscAbs(R->rmap->bs),PetscAbs(R->rmap->bs)));
48   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
49 
50   /* create a supporting struct */
51   PetscCall(PetscNew(&rart));
52   C->product->data    = rart;
53   C->product->destroy = MatDestroy_SeqAIJ_RARt;
54 
55   /* ------ Use coloring ---------- */
56   /* inode causes memory problem */
57   PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE));
58 
59   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
60   PetscCall(MatColoringCreate(C,&coloring));
61   PetscCall(MatColoringSetDistance(coloring,2));
62   PetscCall(MatColoringSetType(coloring,MATCOLORINGSL));
63   PetscCall(MatColoringSetFromOptions(coloring));
64   PetscCall(MatColoringApply(coloring,&iscoloring));
65   PetscCall(MatColoringDestroy(&coloring));
66   PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring));
67 
68   rart->matcoloring = matcoloring;
69   PetscCall(ISColoringDestroy(&iscoloring));
70 
71   /* Create Rt_dense */
72   PetscCall(MatCreate(PETSC_COMM_SELF,&Rt_dense));
73   PetscCall(MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors));
74   PetscCall(MatSetType(Rt_dense,MATSEQDENSE));
75   PetscCall(MatSeqDenseSetPreallocation(Rt_dense,NULL));
76 
77   Rt_dense->assembled = PETSC_TRUE;
78   rart->Rt            = Rt_dense;
79 
80   /* Create RARt_dense = R*A*Rt_dense */
81   PetscCall(MatCreate(PETSC_COMM_SELF,&RARt_dense));
82   PetscCall(MatSetSizes(RARt_dense,C->rmap->n,matcoloring->ncolors,C->rmap->n,matcoloring->ncolors));
83   PetscCall(MatSetType(RARt_dense,MATSEQDENSE));
84   PetscCall(MatSeqDenseSetPreallocation(RARt_dense,NULL));
85 
86   rart->RARt = RARt_dense;
87 
88   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
89   PetscCall(PetscMalloc1(A->rmap->n*4,&rart->work));
90 
91   /* clean up */
92   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj));
93   PetscCall(MatDestroy(&P));
94 
95 #if defined(PETSC_USE_INFO)
96   {
97     Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
98     PetscReal density = (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n);
99     PetscCall(PetscInfo(C,"C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
100     PetscCall(PetscInfo(C,"RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,R->cmap->n,R->rmap->n,c->nz,(double)density));
101   }
102 #endif
103   PetscFunctionReturn(0);
104 }
105 
106 /*
107  RAB = R * A * B, R and A in seqaij format, B in dense format;
108 */
109 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work)
110 {
111   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data;
112   PetscScalar       r1,r2,r3,r4;
113   const PetscScalar *b,*b1,*b2,*b3,*b4;
114   MatScalar         *aa,*ra;
115   PetscInt          cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n;
116   PetscInt          am2=2*am,am3=3*am,bm4=4*bm;
117   PetscScalar       *d,*c,*c2,*c3,*c4;
118   PetscInt          *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n;
119   PetscInt         rm2=2*rm,rm3=3*rm,colrm;
120 
121   PetscFunctionBegin;
122   if (!dm || !dn) PetscFunctionReturn(0);
123   PetscCheck(bm == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,bm);
124   PetscCheck(am == R->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,R->cmap->n,am);
125   PetscCheck(R->rmap->n == RAB->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT,RAB->rmap->n,R->rmap->n);
126   PetscCheck(B->cmap->n == RAB->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT,RAB->cmap->n,B->cmap->n);
127 
128   { /*
129      This approach is not as good as original ones (will be removed later), but it reveals that
130      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
131      */
132     PetscBool via_matmatmult=PETSC_FALSE;
133     PetscCall(PetscOptionsGetBool(NULL,NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL));
134     if (via_matmatmult) {
135       Mat AB_den = NULL;
136       PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&AB_den));
137       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,AB_den));
138       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den));
139       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB));
140       PetscCall(MatDestroy(&AB_den));
141       PetscFunctionReturn(0);
142     }
143   }
144 
145   PetscCall(MatDenseGetArrayRead(B,&b));
146   PetscCall(MatDenseGetArray(RAB,&d));
147   b1   = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
148   c    = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am;
149   for (col=0; col<cn-4; col += 4) {  /* over columns of C */
150     for (i=0; i<am; i++) {        /* over rows of A in those columns */
151       r1 = r2 = r3 = r4 = 0.0;
152       n  = ai[i+1] - ai[i];
153       aj = a->j + ai[i];
154       aa = a->a + ai[i];
155       for (j=0; j<n; j++) {
156         r1 += (*aa)*b1[*aj];
157         r2 += (*aa)*b2[*aj];
158         r3 += (*aa)*b3[*aj];
159         r4 += (*aa++)*b4[*aj++];
160       }
161       c[i]       = r1;
162       c[am  + i] = r2;
163       c[am2 + i] = r3;
164       c[am3 + i] = r4;
165     }
166     b1 += bm4;
167     b2 += bm4;
168     b3 += bm4;
169     b4 += bm4;
170 
171     /* RAB[:,col] = R*C[:,col] */
172     colrm = col*rm;
173     for (i=0; i<rm; i++) {        /* over rows of R in those columns */
174       r1 = r2 = r3 = r4 = 0.0;
175       n  = r->i[i+1] - r->i[i];
176       rj = r->j + r->i[i];
177       ra = r->a + r->i[i];
178       for (j=0; j<n; j++) {
179         r1 += (*ra)*c[*rj];
180         r2 += (*ra)*c2[*rj];
181         r3 += (*ra)*c3[*rj];
182         r4 += (*ra++)*c4[*rj++];
183       }
184       d[colrm + i]       = r1;
185       d[colrm + rm + i]  = r2;
186       d[colrm + rm2 + i] = r3;
187       d[colrm + rm3 + i] = r4;
188     }
189   }
190   for (; col<cn; col++) {     /* over extra columns of C */
191     for (i=0; i<am; i++) {  /* over rows of A in those columns */
192       r1 = 0.0;
193       n  = a->i[i+1] - a->i[i];
194       aj = a->j + a->i[i];
195       aa = a->a + a->i[i];
196       for (j=0; j<n; j++) {
197         r1 += (*aa++)*b1[*aj++];
198       }
199       c[i] = r1;
200     }
201     b1 += bm;
202 
203     for (i=0; i<rm; i++) {  /* over rows of R in those columns */
204       r1 = 0.0;
205       n  = r->i[i+1] - r->i[i];
206       rj = r->j + r->i[i];
207       ra = r->a + r->i[i];
208       for (j=0; j<n; j++) {
209         r1 += (*ra++)*c[*rj++];
210       }
211       d[col*rm + i] = r1;
212     }
213   }
214   PetscCall(PetscLogFlops(cn*2.0*(a->nz + r->nz)));
215 
216   PetscCall(MatDenseRestoreArrayRead(B,&b));
217   PetscCall(MatDenseRestoreArray(RAB,&d));
218   PetscCall(MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY));
219   PetscCall(MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY));
220   PetscFunctionReturn(0);
221 }
222 
223 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C)
224 {
225   Mat_RARt             *rart;
226   MatTransposeColoring matcoloring;
227   Mat                  Rt,RARt;
228 
229   PetscFunctionBegin;
230   MatCheckProduct(C,3);
231   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
232   rart = (Mat_RARt*)C->product->data;
233 
234   /* Get dense Rt by Apply MatTransposeColoring to R */
235   matcoloring = rart->matcoloring;
236   Rt          = rart->Rt;
237   PetscCall(MatTransColoringApplySpToDen(matcoloring,R,Rt));
238 
239   /* Get dense RARt = R*A*Rt -- dominates! */
240   RARt = rart->RARt;
241   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work));
242 
243   /* Recover C from C_dense */
244   PetscCall(MatTransColoringApplyDenToSp(matcoloring,RARt,C));
245   PetscFunctionReturn(0);
246 }
247 
248 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C)
249 {
250   Mat            ARt;
251   Mat_RARt       *rart;
252   char           *alg;
253 
254   PetscFunctionBegin;
255   MatCheckProduct(C,4);
256   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
257   /* create symbolic ARt = A*R^T  */
258   PetscCall(MatProductCreate(A,R,NULL,&ARt));
259   PetscCall(MatProductSetType(ARt,MATPRODUCT_ABt));
260   PetscCall(MatProductSetAlgorithm(ARt,"sorted"));
261   PetscCall(MatProductSetFill(ARt,fill));
262   PetscCall(MatProductSetFromOptions(ARt));
263   PetscCall(MatProductSymbolic(ARt));
264 
265   /* compute symbolic C = R*ARt */
266   /* set algorithm for C = R*ARt */
267   PetscCall(PetscStrallocpy(C->product->alg,&alg));
268   PetscCall(MatProductSetAlgorithm(C,"sorted"));
269   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,C));
270   /* resume original algorithm for C */
271   PetscCall(MatProductSetAlgorithm(C,alg));
272   PetscCall(PetscFree(alg));
273 
274   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
275 
276   PetscCall(PetscNew(&rart));
277   rart->ARt = ARt;
278   C->product->data    = rart;
279   C->product->destroy = MatDestroy_SeqAIJ_RARt;
280   PetscCall(PetscInfo(C,"Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C)
285 {
286   Mat_RARt       *rart;
287 
288   PetscFunctionBegin;
289   MatCheckProduct(C,3);
290   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
291   rart = (Mat_RARt*)C->product->data;
292   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,rart->ARt)); /* dominate! */
293   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R,rart->ARt,C));
294   PetscFunctionReturn(0);
295 }
296 
297 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C)
298 {
299   Mat            Rt;
300   Mat_RARt       *rart;
301 
302   PetscFunctionBegin;
303   MatCheckProduct(C,4);
304   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
305   PetscCall(MatTranspose_SeqAIJ(R,MAT_INITIAL_MATRIX,&Rt));
306   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,fill,C));
307 
308   PetscCall(PetscNew(&rart));
309   rart->data = C->product->data;
310   rart->destroy = C->product->destroy;
311   rart->Rt = Rt;
312   C->product->data    = rart;
313   C->product->destroy = MatDestroy_SeqAIJ_RARt;
314   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
315   PetscCall(PetscInfo(C,"Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
316   PetscFunctionReturn(0);
317 }
318 
319 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C)
320 {
321   Mat_RARt       *rart;
322 
323   PetscFunctionBegin;
324   MatCheckProduct(C,3);
325   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
326   rart = (Mat_RARt*)C->product->data;
327   PetscCall(MatTranspose_SeqAIJ(R,MAT_REUSE_MATRIX,&rart->Rt));
328   /* MatMatMatMultSymbolic used a different data */
329   C->product->data = rart->data;
330   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R,A,rart->Rt,C));
331   C->product->data = rart;
332   PetscFunctionReturn(0);
333 }
334 
335 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C)
336 {
337   const char     *algTypes[3] = {"matmatmatmult","matmattransposemult","coloring_rart"};
338   PetscInt       alg=0; /* set default algorithm */
339 
340   PetscFunctionBegin;
341   if (scall == MAT_INITIAL_MATRIX) {
342     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatRARt","Mat");
343     PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,3,algTypes[0],&alg,NULL));
344     PetscOptionsEnd();
345 
346     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0));
347     PetscCall(MatCreate(PETSC_COMM_SELF,C));
348     switch (alg) {
349     case 1:
350       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
351       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,*C));
352       break;
353     case 2:
354       /* via coloring_rart: apply coloring C = R*A*R^T                          */
355       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,*C));
356       break;
357     default:
358       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
359       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,*C));
360       break;
361     }
362     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0));
363   }
364 
365   PetscCall(PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0));
366   PetscCall(((*C)->ops->rartnumeric)(A,R,*C));
367   PetscCall(PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0));
368   PetscFunctionReturn(0);
369 }
370 
371 /* ------------------------------------------------------------- */
372 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
373 {
374   Mat_Product         *product = C->product;
375   Mat                 A=product->A,R=product->B;
376   MatProductAlgorithm alg=product->alg;
377   PetscReal           fill=product->fill;
378   PetscBool           flg;
379 
380   PetscFunctionBegin;
381   PetscCall(PetscStrcmp(alg,"r*a*rt",&flg));
382   if (flg) {
383     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C));
384     goto next;
385   }
386 
387   PetscCall(PetscStrcmp(alg,"r*art",&flg));
388   if (flg) {
389     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,C));
390     goto next;
391   }
392 
393   PetscCall(PetscStrcmp(alg,"coloring_rart",&flg));
394   if (flg) {
395     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,C));
396     goto next;
397   }
398 
399   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductAlgorithm is not supported");
400 
401 next:
402   C->ops->productnumeric = MatProductNumeric_RARt;
403   PetscFunctionReturn(0);
404 }
405