xref: /petsc/src/mat/impls/aij/seq/matrart.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 
2 /*
3   Defines projective product routines where A is a SeqAIJ matrix
4           C = R * A * R^T
5 */
6 
7 #include <../src/mat/impls/aij/seq/aij.h>
8 #include <../src/mat/utils/freespace.h>
9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
10 
11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
12 {
13   Mat_RARt *rart = (Mat_RARt *)data;
14 
15   PetscFunctionBegin;
16   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
17   PetscCall(MatDestroy(&rart->Rt));
18   PetscCall(MatDestroy(&rart->RARt));
19   PetscCall(MatDestroy(&rart->ARt));
20   PetscCall(PetscFree(rart->work));
21   if (rart->destroy) PetscCall((*rart->destroy)(rart->data));
22   PetscCall(PetscFree(rart));
23   PetscFunctionReturn(0);
24 }
25 
26 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
27 {
28   Mat                  P;
29   Mat_RARt            *rart;
30   MatColoring          coloring;
31   MatTransposeColoring matcoloring;
32   ISColoring           iscoloring;
33   Mat                  Rt_dense, RARt_dense;
34 
35   PetscFunctionBegin;
36   MatCheckProduct(C, 4);
37   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
38   /* create symbolic P=Rt */
39   PetscCall(MatTransposeSymbolic(R, &P));
40 
41   /* get symbolic C=Pt*A*P */
42   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
43   PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs)));
44   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
45 
46   /* create a supporting struct */
47   PetscCall(PetscNew(&rart));
48   C->product->data    = rart;
49   C->product->destroy = MatDestroy_SeqAIJ_RARt;
50 
51   /* ------ Use coloring ---------- */
52   /* inode causes memory problem */
53   PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
54 
55   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
56   PetscCall(MatColoringCreate(C, &coloring));
57   PetscCall(MatColoringSetDistance(coloring, 2));
58   PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
59   PetscCall(MatColoringSetFromOptions(coloring));
60   PetscCall(MatColoringApply(coloring, &iscoloring));
61   PetscCall(MatColoringDestroy(&coloring));
62   PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
63 
64   rart->matcoloring = matcoloring;
65   PetscCall(ISColoringDestroy(&iscoloring));
66 
67   /* Create Rt_dense */
68   PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
69   PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
70   PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
71   PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));
72 
73   Rt_dense->assembled = PETSC_TRUE;
74   rart->Rt            = Rt_dense;
75 
76   /* Create RARt_dense = R*A*Rt_dense */
77   PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
78   PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
79   PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
80   PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));
81 
82   rart->RARt = RARt_dense;
83 
84   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
85   PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));
86 
87   /* clean up */
88   PetscCall(MatDestroy(&P));
89 
90 #if defined(PETSC_USE_INFO)
91   {
92     Mat_SeqAIJ *c       = (Mat_SeqAIJ *)C->data;
93     PetscReal   density = (PetscReal)(c->nz) / (RARt_dense->rmap->n * RARt_dense->cmap->n);
94     PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
95     PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
96   }
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 /*
102  RAB = R * A * B, R and A in seqaij format, B in dense format;
103 */
104 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
105 {
106   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
107   PetscScalar        r1, r2, r3, r4;
108   const PetscScalar *b, *b1, *b2, *b3, *b4;
109   MatScalar         *aa, *ra;
110   PetscInt           cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
111   PetscInt           am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
112   PetscScalar       *d, *c, *c2, *c3, *c4;
113   PetscInt          *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
114   PetscInt           rm2 = 2 * rm, rm3 = 3 * rm, colrm;
115 
116   PetscFunctionBegin;
117   if (!dm || !dn) PetscFunctionReturn(0);
118   PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
119   PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
120   PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
121   PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);
122 
123   { /*
124      This approach is not as good as original ones (will be removed later), but it reveals that
125      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
126      */
127     PetscBool via_matmatmult = PETSC_FALSE;
128     PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
129     if (via_matmatmult) {
130       Mat AB_den = NULL;
131       PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
132       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
133       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
134       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
135       PetscCall(MatDestroy(&AB_den));
136       PetscFunctionReturn(0);
137     }
138   }
139 
140   PetscCall(MatDenseGetArrayRead(B, &b));
141   PetscCall(MatDenseGetArray(RAB, &d));
142   b1 = b;
143   b2 = b1 + bm;
144   b3 = b2 + bm;
145   b4 = b3 + bm;
146   c  = work;
147   c2 = c + am;
148   c3 = c2 + am;
149   c4 = c3 + am;
150   for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
151     for (i = 0; i < am; i++) {            /* over rows of A in those columns */
152       r1 = r2 = r3 = r4 = 0.0;
153       n                 = ai[i + 1] - ai[i];
154       aj                = a->j + ai[i];
155       aa                = a->a + ai[i];
156       for (j = 0; j < n; j++) {
157         r1 += (*aa) * b1[*aj];
158         r2 += (*aa) * b2[*aj];
159         r3 += (*aa) * b3[*aj];
160         r4 += (*aa++) * b4[*aj++];
161       }
162       c[i]       = r1;
163       c[am + i]  = r2;
164       c[am2 + i] = r3;
165       c[am3 + i] = r4;
166     }
167     b1 += bm4;
168     b2 += bm4;
169     b3 += bm4;
170     b4 += bm4;
171 
172     /* RAB[:,col] = R*C[:,col] */
173     colrm = col * rm;
174     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
175       r1 = r2 = r3 = r4 = 0.0;
176       n                 = r->i[i + 1] - r->i[i];
177       rj                = r->j + r->i[i];
178       ra                = r->a + r->i[i];
179       for (j = 0; j < n; j++) {
180         r1 += (*ra) * c[*rj];
181         r2 += (*ra) * c2[*rj];
182         r3 += (*ra) * c3[*rj];
183         r4 += (*ra++) * c4[*rj++];
184       }
185       d[colrm + i]       = r1;
186       d[colrm + rm + i]  = r2;
187       d[colrm + rm2 + i] = r3;
188       d[colrm + rm3 + i] = r4;
189     }
190   }
191   for (; col < cn; col++) {    /* over extra columns of C */
192     for (i = 0; i < am; i++) { /* over rows of A in those columns */
193       r1 = 0.0;
194       n  = a->i[i + 1] - a->i[i];
195       aj = a->j + a->i[i];
196       aa = a->a + a->i[i];
197       for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
198       c[i] = r1;
199     }
200     b1 += bm;
201 
202     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
203       r1 = 0.0;
204       n  = r->i[i + 1] - r->i[i];
205       rj = r->j + r->i[i];
206       ra = r->a + r->i[i];
207       for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
208       d[col * rm + i] = r1;
209     }
210   }
211   PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));
212 
213   PetscCall(MatDenseRestoreArrayRead(B, &b));
214   PetscCall(MatDenseRestoreArray(RAB, &d));
215   PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
216   PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
217   PetscFunctionReturn(0);
218 }
219 
220 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
221 {
222   Mat_RARt            *rart;
223   MatTransposeColoring matcoloring;
224   Mat                  Rt, RARt;
225 
226   PetscFunctionBegin;
227   MatCheckProduct(C, 3);
228   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
229   rart = (Mat_RARt *)C->product->data;
230 
231   /* Get dense Rt by Apply MatTransposeColoring to R */
232   matcoloring = rart->matcoloring;
233   Rt          = rart->Rt;
234   PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));
235 
236   /* Get dense RARt = R*A*Rt -- dominates! */
237   RARt = rart->RARt;
238   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));
239 
240   /* Recover C from C_dense */
241   PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
242   PetscFunctionReturn(0);
243 }
244 
245 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
246 {
247   Mat       ARt;
248   Mat_RARt *rart;
249   char     *alg;
250 
251   PetscFunctionBegin;
252   MatCheckProduct(C, 4);
253   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
254   /* create symbolic ARt = A*R^T  */
255   PetscCall(MatProductCreate(A, R, NULL, &ARt));
256   PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
257   PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
258   PetscCall(MatProductSetFill(ARt, fill));
259   PetscCall(MatProductSetFromOptions(ARt));
260   PetscCall(MatProductSymbolic(ARt));
261 
262   /* compute symbolic C = R*ARt */
263   /* set algorithm for C = R*ARt */
264   PetscCall(PetscStrallocpy(C->product->alg, &alg));
265   PetscCall(MatProductSetAlgorithm(C, "sorted"));
266   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
267   /* resume original algorithm for C */
268   PetscCall(MatProductSetAlgorithm(C, alg));
269   PetscCall(PetscFree(alg));
270 
271   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
272 
273   PetscCall(PetscNew(&rart));
274   rart->ARt           = ARt;
275   C->product->data    = rart;
276   C->product->destroy = MatDestroy_SeqAIJ_RARt;
277   PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
278   PetscFunctionReturn(0);
279 }
280 
281 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
282 {
283   Mat_RARt *rart;
284 
285   PetscFunctionBegin;
286   MatCheckProduct(C, 3);
287   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
288   rart = (Mat_RARt *)C->product->data;
289   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
290   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
291   PetscFunctionReturn(0);
292 }
293 
294 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
295 {
296   Mat       Rt;
297   Mat_RARt *rart;
298 
299   PetscFunctionBegin;
300   MatCheckProduct(C, 4);
301   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
302   PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
303   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));
304 
305   PetscCall(PetscNew(&rart));
306   rart->data          = C->product->data;
307   rart->destroy       = C->product->destroy;
308   rart->Rt            = Rt;
309   C->product->data    = rart;
310   C->product->destroy = MatDestroy_SeqAIJ_RARt;
311   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
312   PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
313   PetscFunctionReturn(0);
314 }
315 
316 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
317 {
318   Mat_RARt *rart;
319 
320   PetscFunctionBegin;
321   MatCheckProduct(C, 3);
322   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
323   rart = (Mat_RARt *)C->product->data;
324   PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
325   /* MatMatMatMultSymbolic used a different data */
326   C->product->data = rart->data;
327   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
328   C->product->data = rart;
329   PetscFunctionReturn(0);
330 }
331 
332 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
333 {
334   const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
335   PetscInt    alg         = 0; /* set default algorithm */
336 
337   PetscFunctionBegin;
338   if (scall == MAT_INITIAL_MATRIX) {
339     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
340     PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
341     PetscOptionsEnd();
342 
343     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
344     PetscCall(MatCreate(PETSC_COMM_SELF, C));
345     switch (alg) {
346     case 1:
347       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
348       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
349       break;
350     case 2:
351       /* via coloring_rart: apply coloring C = R*A*R^T                          */
352       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
353       break;
354     default:
355       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
356       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
357       break;
358     }
359     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
360   }
361 
362   PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
363   PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
364   PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
365   PetscFunctionReturn(0);
366 }
367 
368 /* ------------------------------------------------------------- */
369 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
370 {
371   Mat_Product        *product = C->product;
372   Mat                 A = product->A, R = product->B;
373   MatProductAlgorithm alg  = product->alg;
374   PetscReal           fill = product->fill;
375   PetscBool           flg;
376 
377   PetscFunctionBegin;
378   PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
379   if (flg) {
380     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
381     goto next;
382   }
383 
384   PetscCall(PetscStrcmp(alg, "r*art", &flg));
385   if (flg) {
386     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
387     goto next;
388   }
389 
390   PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
391   if (flg) {
392     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
393     goto next;
394   }
395 
396   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
397 
398 next:
399   C->ops->productnumeric = MatProductNumeric_RARt;
400   PetscFunctionReturn(0);
401 }
402