xref: /petsc/src/mat/impls/aij/seq/matrart.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 
2 /*
3   Defines projective product routines where A is a SeqAIJ matrix
4           C = R * A * R^T
5 */
6 
7 #include <../src/mat/impls/aij/seq/aij.h>
8 #include <../src/mat/utils/freespace.h>
9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
10 
11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
12 {
13   Mat_RARt       *rart = (Mat_RARt*)data;
14 
15   PetscFunctionBegin;
16   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
17   PetscCall(MatDestroy(&rart->Rt));
18   PetscCall(MatDestroy(&rart->RARt));
19   PetscCall(MatDestroy(&rart->ARt));
20   PetscCall(PetscFree(rart->work));
21   if (rart->destroy) PetscCall((*rart->destroy)(rart->data));
22   PetscCall(PetscFree(rart));
23   PetscFunctionReturn(0);
24 }
25 
26 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C)
27 {
28   Mat                  P;
29   Mat_RARt             *rart;
30   MatColoring          coloring;
31   MatTransposeColoring matcoloring;
32   ISColoring           iscoloring;
33   Mat                  Rt_dense,RARt_dense;
34 
35   PetscFunctionBegin;
36   MatCheckProduct(C,4);
37   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
38   /* create symbolic P=Rt */
39   PetscCall(MatTransposeSymbolic(R,&P));
40 
41   /* get symbolic C=Pt*A*P */
42   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C));
43   PetscCall(MatSetBlockSizes(C,PetscAbs(R->rmap->bs),PetscAbs(R->rmap->bs)));
44   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
45 
46   /* create a supporting struct */
47   PetscCall(PetscNew(&rart));
48   C->product->data    = rart;
49   C->product->destroy = MatDestroy_SeqAIJ_RARt;
50 
51   /* ------ Use coloring ---------- */
52   /* inode causes memory problem */
53   PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE));
54 
55   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
56   PetscCall(MatColoringCreate(C,&coloring));
57   PetscCall(MatColoringSetDistance(coloring,2));
58   PetscCall(MatColoringSetType(coloring,MATCOLORINGSL));
59   PetscCall(MatColoringSetFromOptions(coloring));
60   PetscCall(MatColoringApply(coloring,&iscoloring));
61   PetscCall(MatColoringDestroy(&coloring));
62   PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring));
63 
64   rart->matcoloring = matcoloring;
65   PetscCall(ISColoringDestroy(&iscoloring));
66 
67   /* Create Rt_dense */
68   PetscCall(MatCreate(PETSC_COMM_SELF,&Rt_dense));
69   PetscCall(MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors));
70   PetscCall(MatSetType(Rt_dense,MATSEQDENSE));
71   PetscCall(MatSeqDenseSetPreallocation(Rt_dense,NULL));
72 
73   Rt_dense->assembled = PETSC_TRUE;
74   rart->Rt            = Rt_dense;
75 
76   /* Create RARt_dense = R*A*Rt_dense */
77   PetscCall(MatCreate(PETSC_COMM_SELF,&RARt_dense));
78   PetscCall(MatSetSizes(RARt_dense,C->rmap->n,matcoloring->ncolors,C->rmap->n,matcoloring->ncolors));
79   PetscCall(MatSetType(RARt_dense,MATSEQDENSE));
80   PetscCall(MatSeqDenseSetPreallocation(RARt_dense,NULL));
81 
82   rart->RARt = RARt_dense;
83 
84   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
85   PetscCall(PetscMalloc1(A->rmap->n*4,&rart->work));
86 
87   /* clean up */
88   PetscCall(MatDestroy(&P));
89 
90 #if defined(PETSC_USE_INFO)
91   {
92     Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
93     PetscReal density = (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n);
94     PetscCall(PetscInfo(C,"C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
95     PetscCall(PetscInfo(C,"RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,R->cmap->n,R->rmap->n,c->nz,(double)density));
96   }
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 /*
102  RAB = R * A * B, R and A in seqaij format, B in dense format;
103 */
104 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work)
105 {
106   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data;
107   PetscScalar       r1,r2,r3,r4;
108   const PetscScalar *b,*b1,*b2,*b3,*b4;
109   MatScalar         *aa,*ra;
110   PetscInt          cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n;
111   PetscInt          am2=2*am,am3=3*am,bm4=4*bm;
112   PetscScalar       *d,*c,*c2,*c3,*c4;
113   PetscInt          *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n;
114   PetscInt         rm2=2*rm,rm3=3*rm,colrm;
115 
116   PetscFunctionBegin;
117   if (!dm || !dn) PetscFunctionReturn(0);
118   PetscCheck(bm == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,bm);
119   PetscCheck(am == R->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,R->cmap->n,am);
120   PetscCheck(R->rmap->n == RAB->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT,RAB->rmap->n,R->rmap->n);
121   PetscCheck(B->cmap->n == RAB->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT,RAB->cmap->n,B->cmap->n);
122 
123   { /*
124      This approach is not as good as original ones (will be removed later), but it reveals that
125      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
126      */
127     PetscBool via_matmatmult=PETSC_FALSE;
128     PetscCall(PetscOptionsGetBool(NULL,NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL));
129     if (via_matmatmult) {
130       Mat AB_den = NULL;
131       PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&AB_den));
132       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,AB_den));
133       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den));
134       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB));
135       PetscCall(MatDestroy(&AB_den));
136       PetscFunctionReturn(0);
137     }
138   }
139 
140   PetscCall(MatDenseGetArrayRead(B,&b));
141   PetscCall(MatDenseGetArray(RAB,&d));
142   b1   = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
143   c    = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am;
144   for (col=0; col<cn-4; col += 4) {  /* over columns of C */
145     for (i=0; i<am; i++) {        /* over rows of A in those columns */
146       r1 = r2 = r3 = r4 = 0.0;
147       n  = ai[i+1] - ai[i];
148       aj = a->j + ai[i];
149       aa = a->a + ai[i];
150       for (j=0; j<n; j++) {
151         r1 += (*aa)*b1[*aj];
152         r2 += (*aa)*b2[*aj];
153         r3 += (*aa)*b3[*aj];
154         r4 += (*aa++)*b4[*aj++];
155       }
156       c[i]       = r1;
157       c[am  + i] = r2;
158       c[am2 + i] = r3;
159       c[am3 + i] = r4;
160     }
161     b1 += bm4;
162     b2 += bm4;
163     b3 += bm4;
164     b4 += bm4;
165 
166     /* RAB[:,col] = R*C[:,col] */
167     colrm = col*rm;
168     for (i=0; i<rm; i++) {        /* over rows of R in those columns */
169       r1 = r2 = r3 = r4 = 0.0;
170       n  = r->i[i+1] - r->i[i];
171       rj = r->j + r->i[i];
172       ra = r->a + r->i[i];
173       for (j=0; j<n; j++) {
174         r1 += (*ra)*c[*rj];
175         r2 += (*ra)*c2[*rj];
176         r3 += (*ra)*c3[*rj];
177         r4 += (*ra++)*c4[*rj++];
178       }
179       d[colrm + i]       = r1;
180       d[colrm + rm + i]  = r2;
181       d[colrm + rm2 + i] = r3;
182       d[colrm + rm3 + i] = r4;
183     }
184   }
185   for (; col<cn; col++) {     /* over extra columns of C */
186     for (i=0; i<am; i++) {  /* over rows of A in those columns */
187       r1 = 0.0;
188       n  = a->i[i+1] - a->i[i];
189       aj = a->j + a->i[i];
190       aa = a->a + a->i[i];
191       for (j=0; j<n; j++) {
192         r1 += (*aa++)*b1[*aj++];
193       }
194       c[i] = r1;
195     }
196     b1 += bm;
197 
198     for (i=0; i<rm; i++) {  /* over rows of R in those columns */
199       r1 = 0.0;
200       n  = r->i[i+1] - r->i[i];
201       rj = r->j + r->i[i];
202       ra = r->a + r->i[i];
203       for (j=0; j<n; j++) {
204         r1 += (*ra++)*c[*rj++];
205       }
206       d[col*rm + i] = r1;
207     }
208   }
209   PetscCall(PetscLogFlops(cn*2.0*(a->nz + r->nz)));
210 
211   PetscCall(MatDenseRestoreArrayRead(B,&b));
212   PetscCall(MatDenseRestoreArray(RAB,&d));
213   PetscCall(MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY));
214   PetscCall(MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY));
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C)
219 {
220   Mat_RARt             *rart;
221   MatTransposeColoring matcoloring;
222   Mat                  Rt,RARt;
223 
224   PetscFunctionBegin;
225   MatCheckProduct(C,3);
226   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
227   rart = (Mat_RARt*)C->product->data;
228 
229   /* Get dense Rt by Apply MatTransposeColoring to R */
230   matcoloring = rart->matcoloring;
231   Rt          = rart->Rt;
232   PetscCall(MatTransColoringApplySpToDen(matcoloring,R,Rt));
233 
234   /* Get dense RARt = R*A*Rt -- dominates! */
235   RARt = rart->RARt;
236   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work));
237 
238   /* Recover C from C_dense */
239   PetscCall(MatTransColoringApplyDenToSp(matcoloring,RARt,C));
240   PetscFunctionReturn(0);
241 }
242 
243 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C)
244 {
245   Mat            ARt;
246   Mat_RARt       *rart;
247   char           *alg;
248 
249   PetscFunctionBegin;
250   MatCheckProduct(C,4);
251   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
252   /* create symbolic ARt = A*R^T  */
253   PetscCall(MatProductCreate(A,R,NULL,&ARt));
254   PetscCall(MatProductSetType(ARt,MATPRODUCT_ABt));
255   PetscCall(MatProductSetAlgorithm(ARt,"sorted"));
256   PetscCall(MatProductSetFill(ARt,fill));
257   PetscCall(MatProductSetFromOptions(ARt));
258   PetscCall(MatProductSymbolic(ARt));
259 
260   /* compute symbolic C = R*ARt */
261   /* set algorithm for C = R*ARt */
262   PetscCall(PetscStrallocpy(C->product->alg,&alg));
263   PetscCall(MatProductSetAlgorithm(C,"sorted"));
264   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,C));
265   /* resume original algorithm for C */
266   PetscCall(MatProductSetAlgorithm(C,alg));
267   PetscCall(PetscFree(alg));
268 
269   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
270 
271   PetscCall(PetscNew(&rart));
272   rart->ARt = ARt;
273   C->product->data    = rart;
274   C->product->destroy = MatDestroy_SeqAIJ_RARt;
275   PetscCall(PetscInfo(C,"Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
276   PetscFunctionReturn(0);
277 }
278 
279 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C)
280 {
281   Mat_RARt       *rart;
282 
283   PetscFunctionBegin;
284   MatCheckProduct(C,3);
285   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
286   rart = (Mat_RARt*)C->product->data;
287   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,rart->ARt)); /* dominate! */
288   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R,rart->ARt,C));
289   PetscFunctionReturn(0);
290 }
291 
292 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C)
293 {
294   Mat            Rt;
295   Mat_RARt       *rart;
296 
297   PetscFunctionBegin;
298   MatCheckProduct(C,4);
299   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
300   PetscCall(MatTranspose(R,MAT_INITIAL_MATRIX,&Rt));
301   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,fill,C));
302 
303   PetscCall(PetscNew(&rart));
304   rart->data = C->product->data;
305   rart->destroy = C->product->destroy;
306   rart->Rt = Rt;
307   C->product->data    = rart;
308   C->product->destroy = MatDestroy_SeqAIJ_RARt;
309   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
310   PetscCall(PetscInfo(C,"Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
311   PetscFunctionReturn(0);
312 }
313 
314 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C)
315 {
316   Mat_RARt       *rart;
317 
318   PetscFunctionBegin;
319   MatCheckProduct(C,3);
320   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
321   rart = (Mat_RARt*)C->product->data;
322   PetscCall(MatTranspose(R,MAT_REUSE_MATRIX,&rart->Rt));
323   /* MatMatMatMultSymbolic used a different data */
324   C->product->data = rart->data;
325   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R,A,rart->Rt,C));
326   C->product->data = rart;
327   PetscFunctionReturn(0);
328 }
329 
330 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C)
331 {
332   const char     *algTypes[3] = {"matmatmatmult","matmattransposemult","coloring_rart"};
333   PetscInt       alg=0; /* set default algorithm */
334 
335   PetscFunctionBegin;
336   if (scall == MAT_INITIAL_MATRIX) {
337     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatRARt","Mat");
338     PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,3,algTypes[0],&alg,NULL));
339     PetscOptionsEnd();
340 
341     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0));
342     PetscCall(MatCreate(PETSC_COMM_SELF,C));
343     switch (alg) {
344     case 1:
345       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
346       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,*C));
347       break;
348     case 2:
349       /* via coloring_rart: apply coloring C = R*A*R^T                          */
350       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,*C));
351       break;
352     default:
353       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
354       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,*C));
355       break;
356     }
357     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0));
358   }
359 
360   PetscCall(PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0));
361   PetscCall(((*C)->ops->rartnumeric)(A,R,*C));
362   PetscCall(PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0));
363   PetscFunctionReturn(0);
364 }
365 
366 /* ------------------------------------------------------------- */
367 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
368 {
369   Mat_Product         *product = C->product;
370   Mat                 A=product->A,R=product->B;
371   MatProductAlgorithm alg=product->alg;
372   PetscReal           fill=product->fill;
373   PetscBool           flg;
374 
375   PetscFunctionBegin;
376   PetscCall(PetscStrcmp(alg,"r*a*rt",&flg));
377   if (flg) {
378     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C));
379     goto next;
380   }
381 
382   PetscCall(PetscStrcmp(alg,"r*art",&flg));
383   if (flg) {
384     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,C));
385     goto next;
386   }
387 
388   PetscCall(PetscStrcmp(alg,"coloring_rart",&flg));
389   if (flg) {
390     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,C));
391     goto next;
392   }
393 
394   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductAlgorithm is not supported");
395 
396 next:
397   C->ops->productnumeric = MatProductNumeric_RARt;
398   PetscFunctionReturn(0);
399 }
400