xref: /petsc/src/mat/impls/aij/seq/matrart.c (revision b94d7ded0a05f1bbd5e48daa6f92b28259c75b44)
1 
2 /*
3   Defines projective product routines where A is a SeqAIJ matrix
4           C = R * A * R^T
5 */
6 
7 #include <../src/mat/impls/aij/seq/aij.h>
8 #include <../src/mat/utils/freespace.h>
9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
10 
11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
12 {
13   Mat_RARt       *rart = (Mat_RARt*)data;
14 
15   PetscFunctionBegin;
16   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
17   PetscCall(MatDestroy(&rart->Rt));
18   PetscCall(MatDestroy(&rart->RARt));
19   PetscCall(MatDestroy(&rart->ARt));
20   PetscCall(PetscFree(rart->work));
21   if (rart->destroy) PetscCall((*rart->destroy)(rart->data));
22   PetscCall(PetscFree(rart));
23   PetscFunctionReturn(0);
24 }
25 
26 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C)
27 {
28   Mat                  P;
29   PetscInt             *rti,*rtj;
30   Mat_RARt             *rart;
31   MatColoring          coloring;
32   MatTransposeColoring matcoloring;
33   ISColoring           iscoloring;
34   Mat                  Rt_dense,RARt_dense;
35 
36   PetscFunctionBegin;
37   MatCheckProduct(C,4);
38   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
39   /* create symbolic P=Rt */
40   PetscCall(MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj));
41   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,NULL,&P));
42 
43   /* get symbolic C=Pt*A*P */
44   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C));
45   PetscCall(MatSetBlockSizes(C,PetscAbs(R->rmap->bs),PetscAbs(R->rmap->bs)));
46   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
47 
48   /* create a supporting struct */
49   PetscCall(PetscNew(&rart));
50   C->product->data    = rart;
51   C->product->destroy = MatDestroy_SeqAIJ_RARt;
52 
53   /* ------ Use coloring ---------- */
54   /* inode causes memory problem */
55   PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE));
56 
57   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
58   PetscCall(MatColoringCreate(C,&coloring));
59   PetscCall(MatColoringSetDistance(coloring,2));
60   PetscCall(MatColoringSetType(coloring,MATCOLORINGSL));
61   PetscCall(MatColoringSetFromOptions(coloring));
62   PetscCall(MatColoringApply(coloring,&iscoloring));
63   PetscCall(MatColoringDestroy(&coloring));
64   PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring));
65 
66   rart->matcoloring = matcoloring;
67   PetscCall(ISColoringDestroy(&iscoloring));
68 
69   /* Create Rt_dense */
70   PetscCall(MatCreate(PETSC_COMM_SELF,&Rt_dense));
71   PetscCall(MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors));
72   PetscCall(MatSetType(Rt_dense,MATSEQDENSE));
73   PetscCall(MatSeqDenseSetPreallocation(Rt_dense,NULL));
74 
75   Rt_dense->assembled = PETSC_TRUE;
76   rart->Rt            = Rt_dense;
77 
78   /* Create RARt_dense = R*A*Rt_dense */
79   PetscCall(MatCreate(PETSC_COMM_SELF,&RARt_dense));
80   PetscCall(MatSetSizes(RARt_dense,C->rmap->n,matcoloring->ncolors,C->rmap->n,matcoloring->ncolors));
81   PetscCall(MatSetType(RARt_dense,MATSEQDENSE));
82   PetscCall(MatSeqDenseSetPreallocation(RARt_dense,NULL));
83 
84   rart->RARt = RARt_dense;
85 
86   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
87   PetscCall(PetscMalloc1(A->rmap->n*4,&rart->work));
88 
89   /* clean up */
90   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj));
91   PetscCall(MatDestroy(&P));
92 
93 #if defined(PETSC_USE_INFO)
94   {
95     Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
96     PetscReal density = (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n);
97     PetscCall(PetscInfo(C,"C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
98     PetscCall(PetscInfo(C,"RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,R->cmap->n,R->rmap->n,c->nz,(double)density));
99   }
100 #endif
101   PetscFunctionReturn(0);
102 }
103 
104 /*
105  RAB = R * A * B, R and A in seqaij format, B in dense format;
106 */
107 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work)
108 {
109   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data;
110   PetscScalar       r1,r2,r3,r4;
111   const PetscScalar *b,*b1,*b2,*b3,*b4;
112   MatScalar         *aa,*ra;
113   PetscInt          cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n;
114   PetscInt          am2=2*am,am3=3*am,bm4=4*bm;
115   PetscScalar       *d,*c,*c2,*c3,*c4;
116   PetscInt          *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n;
117   PetscInt         rm2=2*rm,rm3=3*rm,colrm;
118 
119   PetscFunctionBegin;
120   if (!dm || !dn) PetscFunctionReturn(0);
121   PetscCheck(bm == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,bm);
122   PetscCheck(am == R->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,R->cmap->n,am);
123   PetscCheck(R->rmap->n == RAB->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT,RAB->rmap->n,R->rmap->n);
124   PetscCheck(B->cmap->n == RAB->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT,RAB->cmap->n,B->cmap->n);
125 
126   { /*
127      This approach is not as good as original ones (will be removed later), but it reveals that
128      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
129      */
130     PetscBool via_matmatmult=PETSC_FALSE;
131     PetscCall(PetscOptionsGetBool(NULL,NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL));
132     if (via_matmatmult) {
133       Mat AB_den = NULL;
134       PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&AB_den));
135       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,AB_den));
136       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den));
137       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB));
138       PetscCall(MatDestroy(&AB_den));
139       PetscFunctionReturn(0);
140     }
141   }
142 
143   PetscCall(MatDenseGetArrayRead(B,&b));
144   PetscCall(MatDenseGetArray(RAB,&d));
145   b1   = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
146   c    = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am;
147   for (col=0; col<cn-4; col += 4) {  /* over columns of C */
148     for (i=0; i<am; i++) {        /* over rows of A in those columns */
149       r1 = r2 = r3 = r4 = 0.0;
150       n  = ai[i+1] - ai[i];
151       aj = a->j + ai[i];
152       aa = a->a + ai[i];
153       for (j=0; j<n; j++) {
154         r1 += (*aa)*b1[*aj];
155         r2 += (*aa)*b2[*aj];
156         r3 += (*aa)*b3[*aj];
157         r4 += (*aa++)*b4[*aj++];
158       }
159       c[i]       = r1;
160       c[am  + i] = r2;
161       c[am2 + i] = r3;
162       c[am3 + i] = r4;
163     }
164     b1 += bm4;
165     b2 += bm4;
166     b3 += bm4;
167     b4 += bm4;
168 
169     /* RAB[:,col] = R*C[:,col] */
170     colrm = col*rm;
171     for (i=0; i<rm; i++) {        /* over rows of R in those columns */
172       r1 = r2 = r3 = r4 = 0.0;
173       n  = r->i[i+1] - r->i[i];
174       rj = r->j + r->i[i];
175       ra = r->a + r->i[i];
176       for (j=0; j<n; j++) {
177         r1 += (*ra)*c[*rj];
178         r2 += (*ra)*c2[*rj];
179         r3 += (*ra)*c3[*rj];
180         r4 += (*ra++)*c4[*rj++];
181       }
182       d[colrm + i]       = r1;
183       d[colrm + rm + i]  = r2;
184       d[colrm + rm2 + i] = r3;
185       d[colrm + rm3 + i] = r4;
186     }
187   }
188   for (; col<cn; col++) {     /* over extra columns of C */
189     for (i=0; i<am; i++) {  /* over rows of A in those columns */
190       r1 = 0.0;
191       n  = a->i[i+1] - a->i[i];
192       aj = a->j + a->i[i];
193       aa = a->a + a->i[i];
194       for (j=0; j<n; j++) {
195         r1 += (*aa++)*b1[*aj++];
196       }
197       c[i] = r1;
198     }
199     b1 += bm;
200 
201     for (i=0; i<rm; i++) {  /* over rows of R in those columns */
202       r1 = 0.0;
203       n  = r->i[i+1] - r->i[i];
204       rj = r->j + r->i[i];
205       ra = r->a + r->i[i];
206       for (j=0; j<n; j++) {
207         r1 += (*ra++)*c[*rj++];
208       }
209       d[col*rm + i] = r1;
210     }
211   }
212   PetscCall(PetscLogFlops(cn*2.0*(a->nz + r->nz)));
213 
214   PetscCall(MatDenseRestoreArrayRead(B,&b));
215   PetscCall(MatDenseRestoreArray(RAB,&d));
216   PetscCall(MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY));
217   PetscCall(MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY));
218   PetscFunctionReturn(0);
219 }
220 
221 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C)
222 {
223   Mat_RARt             *rart;
224   MatTransposeColoring matcoloring;
225   Mat                  Rt,RARt;
226 
227   PetscFunctionBegin;
228   MatCheckProduct(C,3);
229   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
230   rart = (Mat_RARt*)C->product->data;
231 
232   /* Get dense Rt by Apply MatTransposeColoring to R */
233   matcoloring = rart->matcoloring;
234   Rt          = rart->Rt;
235   PetscCall(MatTransColoringApplySpToDen(matcoloring,R,Rt));
236 
237   /* Get dense RARt = R*A*Rt -- dominates! */
238   RARt = rart->RARt;
239   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work));
240 
241   /* Recover C from C_dense */
242   PetscCall(MatTransColoringApplyDenToSp(matcoloring,RARt,C));
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C)
247 {
248   Mat            ARt;
249   Mat_RARt       *rart;
250   char           *alg;
251 
252   PetscFunctionBegin;
253   MatCheckProduct(C,4);
254   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
255   /* create symbolic ARt = A*R^T  */
256   PetscCall(MatProductCreate(A,R,NULL,&ARt));
257   PetscCall(MatProductSetType(ARt,MATPRODUCT_ABt));
258   PetscCall(MatProductSetAlgorithm(ARt,"sorted"));
259   PetscCall(MatProductSetFill(ARt,fill));
260   PetscCall(MatProductSetFromOptions(ARt));
261   PetscCall(MatProductSymbolic(ARt));
262 
263   /* compute symbolic C = R*ARt */
264   /* set algorithm for C = R*ARt */
265   PetscCall(PetscStrallocpy(C->product->alg,&alg));
266   PetscCall(MatProductSetAlgorithm(C,"sorted"));
267   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,C));
268   /* resume original algorithm for C */
269   PetscCall(MatProductSetAlgorithm(C,alg));
270   PetscCall(PetscFree(alg));
271 
272   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
273 
274   PetscCall(PetscNew(&rart));
275   rart->ARt = ARt;
276   C->product->data    = rart;
277   C->product->destroy = MatDestroy_SeqAIJ_RARt;
278   PetscCall(PetscInfo(C,"Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
279   PetscFunctionReturn(0);
280 }
281 
282 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C)
283 {
284   Mat_RARt       *rart;
285 
286   PetscFunctionBegin;
287   MatCheckProduct(C,3);
288   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
289   rart = (Mat_RARt*)C->product->data;
290   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,rart->ARt)); /* dominate! */
291   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R,rart->ARt,C));
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C)
296 {
297   Mat            Rt;
298   Mat_RARt       *rart;
299 
300   PetscFunctionBegin;
301   MatCheckProduct(C,4);
302   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
303   PetscCall(MatTranspose_SeqAIJ(R,MAT_INITIAL_MATRIX,&Rt));
304   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,fill,C));
305 
306   PetscCall(PetscNew(&rart));
307   rart->data = C->product->data;
308   rart->destroy = C->product->destroy;
309   rart->Rt = Rt;
310   C->product->data    = rart;
311   C->product->destroy = MatDestroy_SeqAIJ_RARt;
312   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
313   PetscCall(PetscInfo(C,"Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
314   PetscFunctionReturn(0);
315 }
316 
317 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C)
318 {
319   Mat_RARt       *rart;
320 
321   PetscFunctionBegin;
322   MatCheckProduct(C,3);
323   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
324   rart = (Mat_RARt*)C->product->data;
325   PetscCall(MatTranspose_SeqAIJ(R,MAT_REUSE_MATRIX,&rart->Rt));
326   /* MatMatMatMultSymbolic used a different data */
327   C->product->data = rart->data;
328   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R,A,rart->Rt,C));
329   C->product->data = rart;
330   PetscFunctionReturn(0);
331 }
332 
333 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C)
334 {
335   const char     *algTypes[3] = {"matmatmatmult","matmattransposemult","coloring_rart"};
336   PetscInt       alg=0; /* set default algorithm */
337 
338   PetscFunctionBegin;
339   if (scall == MAT_INITIAL_MATRIX) {
340     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatRARt","Mat");
341     PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,3,algTypes[0],&alg,NULL));
342     PetscOptionsEnd();
343 
344     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0));
345     PetscCall(MatCreate(PETSC_COMM_SELF,C));
346     switch (alg) {
347     case 1:
348       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
349       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,*C));
350       break;
351     case 2:
352       /* via coloring_rart: apply coloring C = R*A*R^T                          */
353       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,*C));
354       break;
355     default:
356       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
357       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,*C));
358       break;
359     }
360     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0));
361   }
362 
363   PetscCall(PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0));
364   PetscCall(((*C)->ops->rartnumeric)(A,R,*C));
365   PetscCall(PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0));
366   PetscFunctionReturn(0);
367 }
368 
369 /* ------------------------------------------------------------- */
370 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
371 {
372   Mat_Product         *product = C->product;
373   Mat                 A=product->A,R=product->B;
374   MatProductAlgorithm alg=product->alg;
375   PetscReal           fill=product->fill;
376   PetscBool           flg;
377 
378   PetscFunctionBegin;
379   PetscCall(PetscStrcmp(alg,"r*a*rt",&flg));
380   if (flg) {
381     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C));
382     goto next;
383   }
384 
385   PetscCall(PetscStrcmp(alg,"r*art",&flg));
386   if (flg) {
387     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,C));
388     goto next;
389   }
390 
391   PetscCall(PetscStrcmp(alg,"coloring_rart",&flg));
392   if (flg) {
393     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,C));
394     goto next;
395   }
396 
397   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductAlgorithm is not supported");
398 
399 next:
400   C->ops->productnumeric = MatProductNumeric_RARt;
401   PetscFunctionReturn(0);
402 }
403