xref: /petsc/src/mat/impls/aij/seq/matrart.c (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
1807171c5SHong Zhang 
23b1b9624SHong Zhang /*
33b1b9624SHong Zhang   Defines projective product routines where A is a SeqAIJ matrix
43b1b9624SHong Zhang           C = R * A * R^T
53b1b9624SHong Zhang */
63b1b9624SHong Zhang 
7807171c5SHong Zhang #include <../src/mat/impls/aij/seq/aij.h>
8807171c5SHong Zhang #include <../src/mat/utils/freespace.h>
9807171c5SHong Zhang #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
10807171c5SHong Zhang 
11*9371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) {
126718818eSStefano Zampini   Mat_RARt *rart = (Mat_RARt *)data;
13807171c5SHong Zhang 
14807171c5SHong Zhang   PetscFunctionBegin;
159566063dSJacob Faibussowitsch   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
169566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&rart->Rt));
179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&rart->RARt));
189566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&rart->ARt));
199566063dSJacob Faibussowitsch   PetscCall(PetscFree(rart->work));
201baa6e33SBarry Smith   if (rart->destroy) PetscCall((*rart->destroy)(rart->data));
219566063dSJacob Faibussowitsch   PetscCall(PetscFree(rart));
22807171c5SHong Zhang   PetscFunctionReturn(0);
23807171c5SHong Zhang }
24807171c5SHong Zhang 
25*9371c9d4SSatish Balay PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C) {
26807171c5SHong Zhang   Mat                  P;
27807171c5SHong Zhang   Mat_RARt            *rart;
28335efc43SPeter Brune   MatColoring          coloring;
29807171c5SHong Zhang   MatTransposeColoring matcoloring;
30807171c5SHong Zhang   ISColoring           iscoloring;
318d0a38e4SHong Zhang   Mat                  Rt_dense, RARt_dense;
32807171c5SHong Zhang 
33807171c5SHong Zhang   PetscFunctionBegin;
346718818eSStefano Zampini   MatCheckProduct(C, 4);
3528b400f6SJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
36807171c5SHong Zhang   /* create symbolic P=Rt */
377fb60732SBarry Smith   PetscCall(MatTransposeSymbolic(R, &P));
38807171c5SHong Zhang 
39807171c5SHong Zhang   /* get symbolic C=Pt*A*P */
409566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
419566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs)));
424222ddf1SHong Zhang   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
43807171c5SHong Zhang 
44807171c5SHong Zhang   /* create a supporting struct */
459566063dSJacob Faibussowitsch   PetscCall(PetscNew(&rart));
466718818eSStefano Zampini   C->product->data    = rart;
476718818eSStefano Zampini   C->product->destroy = MatDestroy_SeqAIJ_RARt;
48807171c5SHong Zhang 
4922e94b5dSHong Zhang   /* ------ Use coloring ---------- */
504222ddf1SHong Zhang   /* inode causes memory problem */
519566063dSJacob Faibussowitsch   PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
524d478ae7SHong Zhang 
53807171c5SHong Zhang   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
549566063dSJacob Faibussowitsch   PetscCall(MatColoringCreate(C, &coloring));
559566063dSJacob Faibussowitsch   PetscCall(MatColoringSetDistance(coloring, 2));
569566063dSJacob Faibussowitsch   PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
579566063dSJacob Faibussowitsch   PetscCall(MatColoringSetFromOptions(coloring));
589566063dSJacob Faibussowitsch   PetscCall(MatColoringApply(coloring, &iscoloring));
599566063dSJacob Faibussowitsch   PetscCall(MatColoringDestroy(&coloring));
609566063dSJacob Faibussowitsch   PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
612205254eSKarl Rupp 
62807171c5SHong Zhang   rart->matcoloring = matcoloring;
639566063dSJacob Faibussowitsch   PetscCall(ISColoringDestroy(&iscoloring));
643b1b9624SHong Zhang 
65807171c5SHong Zhang   /* Create Rt_dense */
669566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
679566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
689566063dSJacob Faibussowitsch   PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
699566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));
702205254eSKarl Rupp 
71807171c5SHong Zhang   Rt_dense->assembled = PETSC_TRUE;
72807171c5SHong Zhang   rart->Rt            = Rt_dense;
73807171c5SHong Zhang 
74807171c5SHong Zhang   /* Create RARt_dense = R*A*Rt_dense */
759566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
769566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
779566063dSJacob Faibussowitsch   PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
789566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));
792205254eSKarl Rupp 
80807171c5SHong Zhang   rart->RARt = RARt_dense;
81807171c5SHong Zhang 
82807171c5SHong Zhang   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));
84807171c5SHong Zhang 
85807171c5SHong Zhang   /* clean up */
869566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&P));
87807171c5SHong Zhang 
88807171c5SHong Zhang #if defined(PETSC_USE_INFO)
891ce71dffSSatish Balay   {
906718818eSStefano Zampini     Mat_SeqAIJ *c       = (Mat_SeqAIJ *)C->data;
91807171c5SHong Zhang     PetscReal   density = (PetscReal)(c->nz) / (RARt_dense->rmap->n * RARt_dense->cmap->n);
929566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
9363a3b9bcSJacob Faibussowitsch     PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
941ce71dffSSatish Balay   }
95807171c5SHong Zhang #endif
96807171c5SHong Zhang   PetscFunctionReturn(0);
97807171c5SHong Zhang }
98807171c5SHong Zhang 
99807171c5SHong Zhang /*
100807171c5SHong Zhang  RAB = R * A * B, R and A in seqaij format, B in dense format;
101807171c5SHong Zhang */
102*9371c9d4SSatish Balay PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work) {
103807171c5SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
1041683a169SBarry Smith   PetscScalar        r1, r2, r3, r4;
1051683a169SBarry Smith   const PetscScalar *b, *b1, *b2, *b3, *b4;
106807171c5SHong Zhang   MatScalar         *aa, *ra;
107807171c5SHong Zhang   PetscInt           cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
108807171c5SHong Zhang   PetscInt           am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
109807171c5SHong Zhang   PetscScalar       *d, *c, *c2, *c3, *c4;
110807171c5SHong Zhang   PetscInt          *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
111807171c5SHong Zhang   PetscInt           rm2 = 2 * rm, rm3 = 3 * rm, colrm;
112807171c5SHong Zhang 
113807171c5SHong Zhang   PetscFunctionBegin;
114807171c5SHong Zhang   if (!dm || !dn) PetscFunctionReturn(0);
11508401ef6SPierre Jolivet   PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
11608401ef6SPierre Jolivet   PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
11708401ef6SPierre Jolivet   PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
11808401ef6SPierre Jolivet   PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);
119807171c5SHong Zhang 
120274010c0SHong Zhang   { /*
121274010c0SHong Zhang      This approach is not as good as original ones (will be removed later), but it reveals that
122c4762a1bSJed Brown      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
123274010c0SHong Zhang      */
124274010c0SHong Zhang     PetscBool via_matmatmult = PETSC_FALSE;
1259566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
126274010c0SHong Zhang     if (via_matmatmult) {
1274222ddf1SHong Zhang       Mat AB_den = NULL;
1289566063dSJacob Faibussowitsch       PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
1299566063dSJacob Faibussowitsch       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
1309566063dSJacob Faibussowitsch       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
1319566063dSJacob Faibussowitsch       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
1329566063dSJacob Faibussowitsch       PetscCall(MatDestroy(&AB_den));
133274010c0SHong Zhang       PetscFunctionReturn(0);
134274010c0SHong Zhang     }
135274010c0SHong Zhang   }
136274010c0SHong Zhang 
1379566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
1389566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(RAB, &d));
139*9371c9d4SSatish Balay   b1 = b;
140*9371c9d4SSatish Balay   b2 = b1 + bm;
141*9371c9d4SSatish Balay   b3 = b2 + bm;
142*9371c9d4SSatish Balay   b4 = b3 + bm;
143*9371c9d4SSatish Balay   c  = work;
144*9371c9d4SSatish Balay   c2 = c + am;
145*9371c9d4SSatish Balay   c3 = c2 + am;
146*9371c9d4SSatish Balay   c4 = c3 + am;
147807171c5SHong Zhang   for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
148807171c5SHong Zhang     for (i = 0; i < am; i++) {            /* over rows of A in those columns */
149807171c5SHong Zhang       r1 = r2 = r3 = r4 = 0.0;
150807171c5SHong Zhang       n                 = ai[i + 1] - ai[i];
151807171c5SHong Zhang       aj                = a->j + ai[i];
152807171c5SHong Zhang       aa                = a->a + ai[i];
153807171c5SHong Zhang       for (j = 0; j < n; j++) {
154807171c5SHong Zhang         r1 += (*aa) * b1[*aj];
155807171c5SHong Zhang         r2 += (*aa) * b2[*aj];
156807171c5SHong Zhang         r3 += (*aa) * b3[*aj];
157807171c5SHong Zhang         r4 += (*aa++) * b4[*aj++];
158807171c5SHong Zhang       }
159807171c5SHong Zhang       c[i]       = r1;
160807171c5SHong Zhang       c[am + i]  = r2;
161807171c5SHong Zhang       c[am2 + i] = r3;
162807171c5SHong Zhang       c[am3 + i] = r4;
163807171c5SHong Zhang     }
164807171c5SHong Zhang     b1 += bm4;
165807171c5SHong Zhang     b2 += bm4;
166807171c5SHong Zhang     b3 += bm4;
167807171c5SHong Zhang     b4 += bm4;
168807171c5SHong Zhang 
169807171c5SHong Zhang     /* RAB[:,col] = R*C[:,col] */
170807171c5SHong Zhang     colrm = col * rm;
171807171c5SHong Zhang     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
172807171c5SHong Zhang       r1 = r2 = r3 = r4 = 0.0;
173807171c5SHong Zhang       n                 = r->i[i + 1] - r->i[i];
174807171c5SHong Zhang       rj                = r->j + r->i[i];
175807171c5SHong Zhang       ra                = r->a + r->i[i];
176807171c5SHong Zhang       for (j = 0; j < n; j++) {
177807171c5SHong Zhang         r1 += (*ra) * c[*rj];
178807171c5SHong Zhang         r2 += (*ra) * c2[*rj];
179807171c5SHong Zhang         r3 += (*ra) * c3[*rj];
180807171c5SHong Zhang         r4 += (*ra++) * c4[*rj++];
181807171c5SHong Zhang       }
182807171c5SHong Zhang       d[colrm + i]       = r1;
183807171c5SHong Zhang       d[colrm + rm + i]  = r2;
184807171c5SHong Zhang       d[colrm + rm2 + i] = r3;
185807171c5SHong Zhang       d[colrm + rm3 + i] = r4;
186807171c5SHong Zhang     }
187807171c5SHong Zhang   }
188807171c5SHong Zhang   for (; col < cn; col++) {    /* over extra columns of C */
189807171c5SHong Zhang     for (i = 0; i < am; i++) { /* over rows of A in those columns */
190807171c5SHong Zhang       r1 = 0.0;
191807171c5SHong Zhang       n  = a->i[i + 1] - a->i[i];
192807171c5SHong Zhang       aj = a->j + a->i[i];
193807171c5SHong Zhang       aa = a->a + a->i[i];
194*9371c9d4SSatish Balay       for (j = 0; j < n; j++) { r1 += (*aa++) * b1[*aj++]; }
195807171c5SHong Zhang       c[i] = r1;
196807171c5SHong Zhang     }
197807171c5SHong Zhang     b1 += bm;
198807171c5SHong Zhang 
199807171c5SHong Zhang     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
200807171c5SHong Zhang       r1 = 0.0;
201807171c5SHong Zhang       n  = r->i[i + 1] - r->i[i];
202807171c5SHong Zhang       rj = r->j + r->i[i];
203807171c5SHong Zhang       ra = r->a + r->i[i];
204*9371c9d4SSatish Balay       for (j = 0; j < n; j++) { r1 += (*ra++) * c[*rj++]; }
205807171c5SHong Zhang       d[col * rm + i] = r1;
206807171c5SHong Zhang     }
207807171c5SHong Zhang   }
2089566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));
209807171c5SHong Zhang 
2109566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
2119566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(RAB, &d));
2129566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
2139566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
214807171c5SHong Zhang   PetscFunctionReturn(0);
215807171c5SHong Zhang }
216807171c5SHong Zhang 
217*9371c9d4SSatish Balay PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C) {
2186718818eSStefano Zampini   Mat_RARt            *rart;
219807171c5SHong Zhang   MatTransposeColoring matcoloring;
2208d0a38e4SHong Zhang   Mat                  Rt, RARt;
221807171c5SHong Zhang 
222807171c5SHong Zhang   PetscFunctionBegin;
2236718818eSStefano Zampini   MatCheckProduct(C, 3);
22428b400f6SJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2256718818eSStefano Zampini   rart = (Mat_RARt *)C->product->data;
2266718818eSStefano Zampini 
227807171c5SHong Zhang   /* Get dense Rt by Apply MatTransposeColoring to R */
228807171c5SHong Zhang   matcoloring = rart->matcoloring;
229807171c5SHong Zhang   Rt          = rart->Rt;
2309566063dSJacob Faibussowitsch   PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));
231807171c5SHong Zhang 
23250647e95SHong Zhang   /* Get dense RARt = R*A*Rt -- dominates! */
233807171c5SHong Zhang   RARt = rart->RARt;
2349566063dSJacob Faibussowitsch   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));
235807171c5SHong Zhang 
236807171c5SHong Zhang   /* Recover C from C_dense */
2379566063dSJacob Faibussowitsch   PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
238807171c5SHong Zhang   PetscFunctionReturn(0);
239807171c5SHong Zhang }
240807171c5SHong Zhang 
241*9371c9d4SSatish Balay PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C) {
2424222ddf1SHong Zhang   Mat       ARt;
2434fa85fdeSHong Zhang   Mat_RARt *rart;
2446718818eSStefano Zampini   char     *alg;
2454fa85fdeSHong Zhang 
24695a72cc5SHong Zhang   PetscFunctionBegin;
2476718818eSStefano Zampini   MatCheckProduct(C, 4);
24828b400f6SJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2494222ddf1SHong Zhang   /* create symbolic ARt = A*R^T  */
2509566063dSJacob Faibussowitsch   PetscCall(MatProductCreate(A, R, NULL, &ARt));
2519566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
2529566063dSJacob Faibussowitsch   PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
2539566063dSJacob Faibussowitsch   PetscCall(MatProductSetFill(ARt, fill));
2549566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(ARt));
2559566063dSJacob Faibussowitsch   PetscCall(MatProductSymbolic(ARt));
2564222ddf1SHong Zhang 
2574222ddf1SHong Zhang   /* compute symbolic C = R*ARt */
2587a3c3d58SStefano Zampini   /* set algorithm for C = R*ARt */
2599566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(C->product->alg, &alg));
2609566063dSJacob Faibussowitsch   PetscCall(MatProductSetAlgorithm(C, "sorted"));
2619566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
2627a3c3d58SStefano Zampini   /* resume original algorithm for C */
2639566063dSJacob Faibussowitsch   PetscCall(MatProductSetAlgorithm(C, alg));
2649566063dSJacob Faibussowitsch   PetscCall(PetscFree(alg));
2654222ddf1SHong Zhang 
2664222ddf1SHong Zhang   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
26755bea0ebSHong Zhang 
2689566063dSJacob Faibussowitsch   PetscCall(PetscNew(&rart));
2693b1b9624SHong Zhang   rart->ARt           = ARt;
2706718818eSStefano Zampini   C->product->data    = rart;
2716718818eSStefano Zampini   C->product->destroy = MatDestroy_SeqAIJ_RARt;
2729566063dSJacob Faibussowitsch   PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
27355bea0ebSHong Zhang   PetscFunctionReturn(0);
27436104f73SHong Zhang }
27555bea0ebSHong Zhang 
276*9371c9d4SSatish Balay PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C) {
2776718818eSStefano Zampini   Mat_RARt *rart;
27855bea0ebSHong Zhang 
27955bea0ebSHong Zhang   PetscFunctionBegin;
2806718818eSStefano Zampini   MatCheckProduct(C, 3);
28128b400f6SJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2826718818eSStefano Zampini   rart = (Mat_RARt *)C->product->data;
2839566063dSJacob Faibussowitsch   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
2849566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
28555bea0ebSHong Zhang   PetscFunctionReturn(0);
286807171c5SHong Zhang }
28755bea0ebSHong Zhang 
288*9371c9d4SSatish Balay PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C) {
28995a72cc5SHong Zhang   Mat       Rt;
29055bea0ebSHong Zhang   Mat_RARt *rart;
29155bea0ebSHong Zhang 
29255bea0ebSHong Zhang   PetscFunctionBegin;
2936718818eSStefano Zampini   MatCheckProduct(C, 4);
29428b400f6SJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
295acd337a6SBarry Smith   PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
2969566063dSJacob Faibussowitsch   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));
29795a72cc5SHong Zhang 
2989566063dSJacob Faibussowitsch   PetscCall(PetscNew(&rart));
2996718818eSStefano Zampini   rart->data          = C->product->data;
3006718818eSStefano Zampini   rart->destroy       = C->product->destroy;
30195a72cc5SHong Zhang   rart->Rt            = Rt;
3026718818eSStefano Zampini   C->product->data    = rart;
3036718818eSStefano Zampini   C->product->destroy = MatDestroy_SeqAIJ_RARt;
3044222ddf1SHong Zhang   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
3059566063dSJacob Faibussowitsch   PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
30655bea0ebSHong Zhang   PetscFunctionReturn(0);
30755bea0ebSHong Zhang }
30855bea0ebSHong Zhang 
309*9371c9d4SSatish Balay PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C) {
3106718818eSStefano Zampini   Mat_RARt *rart;
31155bea0ebSHong Zhang 
31255bea0ebSHong Zhang   PetscFunctionBegin;
3136718818eSStefano Zampini   MatCheckProduct(C, 3);
31428b400f6SJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3156718818eSStefano Zampini   rart = (Mat_RARt *)C->product->data;
316acd337a6SBarry Smith   PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
3176718818eSStefano Zampini   /* MatMatMatMultSymbolic used a different data */
3186718818eSStefano Zampini   C->product->data = rart->data;
3199566063dSJacob Faibussowitsch   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
3206718818eSStefano Zampini   C->product->data = rart;
32155bea0ebSHong Zhang   PetscFunctionReturn(0);
32255bea0ebSHong Zhang }
32355bea0ebSHong Zhang 
324*9371c9d4SSatish Balay PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C) {
32555bea0ebSHong Zhang   const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
32655bea0ebSHong Zhang   PetscInt    alg         = 0; /* set default algorithm */
32755bea0ebSHong Zhang 
32855bea0ebSHong Zhang   PetscFunctionBegin;
32955bea0ebSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
330d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
3319566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
332d0609cedSBarry Smith     PetscOptionsEnd();
333b56132cbSHong Zhang 
3349566063dSJacob Faibussowitsch     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
3359566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, C));
33655bea0ebSHong Zhang     switch (alg) {
33755bea0ebSHong Zhang     case 1:
33855bea0ebSHong Zhang       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
3399566063dSJacob Faibussowitsch       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
34055bea0ebSHong Zhang       break;
34155bea0ebSHong Zhang     case 2:
34255bea0ebSHong Zhang       /* via coloring_rart: apply coloring C = R*A*R^T                          */
3439566063dSJacob Faibussowitsch       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
34455bea0ebSHong Zhang       break;
34555bea0ebSHong Zhang     default:
34655bea0ebSHong Zhang       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
3479566063dSJacob Faibussowitsch       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
34855bea0ebSHong Zhang       break;
34955bea0ebSHong Zhang     }
3509566063dSJacob Faibussowitsch     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
3515008f5a7SHong Zhang   }
3525008f5a7SHong Zhang 
3539566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
3549566063dSJacob Faibussowitsch   PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
3559566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
356807171c5SHong Zhang   PetscFunctionReturn(0);
357807171c5SHong Zhang }
3584222ddf1SHong Zhang 
3594222ddf1SHong Zhang /* ------------------------------------------------------------- */
360*9371c9d4SSatish Balay PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) {
3614222ddf1SHong Zhang   Mat_Product        *product = C->product;
3624222ddf1SHong Zhang   Mat                 A = product->A, R = product->B;
3634222ddf1SHong Zhang   MatProductAlgorithm alg  = product->alg;
3644222ddf1SHong Zhang   PetscReal           fill = product->fill;
3654222ddf1SHong Zhang   PetscBool           flg;
3664222ddf1SHong Zhang 
3674222ddf1SHong Zhang   PetscFunctionBegin;
3689566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
3694222ddf1SHong Zhang   if (flg) {
3709566063dSJacob Faibussowitsch     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
3714222ddf1SHong Zhang     goto next;
3724222ddf1SHong Zhang   }
3734222ddf1SHong Zhang 
3749566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg, "r*art", &flg));
3754222ddf1SHong Zhang   if (flg) {
3769566063dSJacob Faibussowitsch     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
3774222ddf1SHong Zhang     goto next;
3784222ddf1SHong Zhang   }
3794222ddf1SHong Zhang 
3809566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
3814222ddf1SHong Zhang   if (flg) {
3829566063dSJacob Faibussowitsch     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
3834222ddf1SHong Zhang     goto next;
3844222ddf1SHong Zhang   }
3854222ddf1SHong Zhang 
3864222ddf1SHong Zhang   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
3874222ddf1SHong Zhang 
3884222ddf1SHong Zhang next:
3894222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_RARt;
3904222ddf1SHong Zhang   PetscFunctionReturn(0);
3914222ddf1SHong Zhang }
392