xref: /petsc/src/mat/impls/aij/seq/matrart.c (revision 2ef1f0ff6e3530e8731eb06ad663081f5844f49f)
1807171c5SHong Zhang 
23b1b9624SHong Zhang /*
33b1b9624SHong Zhang   Defines projective product routines where A is a SeqAIJ matrix
43b1b9624SHong Zhang           C = R * A * R^T
53b1b9624SHong Zhang */
63b1b9624SHong Zhang 
7807171c5SHong Zhang #include <../src/mat/impls/aij/seq/aij.h>
8807171c5SHong Zhang #include <../src/mat/utils/freespace.h>
9807171c5SHong Zhang #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
10807171c5SHong Zhang 
11d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
12d71ae5a4SJacob Faibussowitsch {
136718818eSStefano Zampini   Mat_RARt *rart = (Mat_RARt *)data;
14807171c5SHong Zhang 
15807171c5SHong Zhang   PetscFunctionBegin;
169566063dSJacob Faibussowitsch   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&rart->Rt));
189566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&rart->RARt));
199566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&rart->ARt));
209566063dSJacob Faibussowitsch   PetscCall(PetscFree(rart->work));
211baa6e33SBarry Smith   if (rart->destroy) PetscCall((*rart->destroy)(rart->data));
229566063dSJacob Faibussowitsch   PetscCall(PetscFree(rart));
233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24807171c5SHong Zhang }
25807171c5SHong Zhang 
26d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
27d71ae5a4SJacob Faibussowitsch {
28807171c5SHong Zhang   Mat                  P;
29807171c5SHong Zhang   Mat_RARt            *rart;
30335efc43SPeter Brune   MatColoring          coloring;
31807171c5SHong Zhang   MatTransposeColoring matcoloring;
32807171c5SHong Zhang   ISColoring           iscoloring;
338d0a38e4SHong Zhang   Mat                  Rt_dense, RARt_dense;
34807171c5SHong Zhang 
35807171c5SHong Zhang   PetscFunctionBegin;
366718818eSStefano Zampini   MatCheckProduct(C, 4);
3728b400f6SJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
38807171c5SHong Zhang   /* create symbolic P=Rt */
397fb60732SBarry Smith   PetscCall(MatTransposeSymbolic(R, &P));
40807171c5SHong Zhang 
41807171c5SHong Zhang   /* get symbolic C=Pt*A*P */
429566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
439566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs)));
444222ddf1SHong Zhang   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
45807171c5SHong Zhang 
46807171c5SHong Zhang   /* create a supporting struct */
479566063dSJacob Faibussowitsch   PetscCall(PetscNew(&rart));
486718818eSStefano Zampini   C->product->data    = rart;
496718818eSStefano Zampini   C->product->destroy = MatDestroy_SeqAIJ_RARt;
50807171c5SHong Zhang 
51*2ef1f0ffSBarry Smith   /* Use coloring  */
524222ddf1SHong Zhang   /* inode causes memory problem */
539566063dSJacob Faibussowitsch   PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
544d478ae7SHong Zhang 
55807171c5SHong Zhang   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
569566063dSJacob Faibussowitsch   PetscCall(MatColoringCreate(C, &coloring));
579566063dSJacob Faibussowitsch   PetscCall(MatColoringSetDistance(coloring, 2));
589566063dSJacob Faibussowitsch   PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
599566063dSJacob Faibussowitsch   PetscCall(MatColoringSetFromOptions(coloring));
609566063dSJacob Faibussowitsch   PetscCall(MatColoringApply(coloring, &iscoloring));
619566063dSJacob Faibussowitsch   PetscCall(MatColoringDestroy(&coloring));
629566063dSJacob Faibussowitsch   PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
632205254eSKarl Rupp 
64807171c5SHong Zhang   rart->matcoloring = matcoloring;
659566063dSJacob Faibussowitsch   PetscCall(ISColoringDestroy(&iscoloring));
663b1b9624SHong Zhang 
67807171c5SHong Zhang   /* Create Rt_dense */
689566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
699566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
709566063dSJacob Faibussowitsch   PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
719566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));
722205254eSKarl Rupp 
73807171c5SHong Zhang   Rt_dense->assembled = PETSC_TRUE;
74807171c5SHong Zhang   rart->Rt            = Rt_dense;
75807171c5SHong Zhang 
76807171c5SHong Zhang   /* Create RARt_dense = R*A*Rt_dense */
779566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
789566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
799566063dSJacob Faibussowitsch   PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
809566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));
812205254eSKarl Rupp 
82807171c5SHong Zhang   rart->RARt = RARt_dense;
83807171c5SHong Zhang 
84807171c5SHong Zhang   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));
86807171c5SHong Zhang 
87807171c5SHong Zhang   /* clean up */
889566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&P));
89807171c5SHong Zhang 
90807171c5SHong Zhang #if defined(PETSC_USE_INFO)
911ce71dffSSatish Balay   {
926718818eSStefano Zampini     Mat_SeqAIJ *c       = (Mat_SeqAIJ *)C->data;
93807171c5SHong Zhang     PetscReal   density = (PetscReal)(c->nz) / (RARt_dense->rmap->n * RARt_dense->cmap->n);
949566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
9563a3b9bcSJacob Faibussowitsch     PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
961ce71dffSSatish Balay   }
97807171c5SHong Zhang #endif
983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
99807171c5SHong Zhang }
100807171c5SHong Zhang 
101807171c5SHong Zhang /*
102807171c5SHong Zhang  RAB = R * A * B, R and A in seqaij format, B in dense format;
103807171c5SHong Zhang */
104d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
105d71ae5a4SJacob Faibussowitsch {
106807171c5SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
1071683a169SBarry Smith   PetscScalar        r1, r2, r3, r4;
1081683a169SBarry Smith   const PetscScalar *b, *b1, *b2, *b3, *b4;
109807171c5SHong Zhang   MatScalar         *aa, *ra;
110807171c5SHong Zhang   PetscInt           cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
111807171c5SHong Zhang   PetscInt           am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
112807171c5SHong Zhang   PetscScalar       *d, *c, *c2, *c3, *c4;
113807171c5SHong Zhang   PetscInt          *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
114807171c5SHong Zhang   PetscInt           rm2 = 2 * rm, rm3 = 3 * rm, colrm;
115807171c5SHong Zhang 
116807171c5SHong Zhang   PetscFunctionBegin;
1173ba16761SJacob Faibussowitsch   if (!dm || !dn) PetscFunctionReturn(PETSC_SUCCESS);
11808401ef6SPierre Jolivet   PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
11908401ef6SPierre Jolivet   PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
12008401ef6SPierre Jolivet   PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
12108401ef6SPierre Jolivet   PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);
122807171c5SHong Zhang 
123274010c0SHong Zhang   { /*
124274010c0SHong Zhang      This approach is not as good as original ones (will be removed later), but it reveals that
125c4762a1bSJed Brown      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
126274010c0SHong Zhang      */
127274010c0SHong Zhang     PetscBool via_matmatmult = PETSC_FALSE;
1289566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
129274010c0SHong Zhang     if (via_matmatmult) {
1304222ddf1SHong Zhang       Mat AB_den = NULL;
1319566063dSJacob Faibussowitsch       PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
1329566063dSJacob Faibussowitsch       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
1339566063dSJacob Faibussowitsch       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
1349566063dSJacob Faibussowitsch       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
1359566063dSJacob Faibussowitsch       PetscCall(MatDestroy(&AB_den));
1363ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
137274010c0SHong Zhang     }
138274010c0SHong Zhang   }
139274010c0SHong Zhang 
1409566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
1419566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(RAB, &d));
1429371c9d4SSatish Balay   b1 = b;
1439371c9d4SSatish Balay   b2 = b1 + bm;
1449371c9d4SSatish Balay   b3 = b2 + bm;
1459371c9d4SSatish Balay   b4 = b3 + bm;
1469371c9d4SSatish Balay   c  = work;
1479371c9d4SSatish Balay   c2 = c + am;
1489371c9d4SSatish Balay   c3 = c2 + am;
1499371c9d4SSatish Balay   c4 = c3 + am;
150807171c5SHong Zhang   for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
151807171c5SHong Zhang     for (i = 0; i < am; i++) {            /* over rows of A in those columns */
152807171c5SHong Zhang       r1 = r2 = r3 = r4 = 0.0;
153807171c5SHong Zhang       n                 = ai[i + 1] - ai[i];
154807171c5SHong Zhang       aj                = a->j + ai[i];
155807171c5SHong Zhang       aa                = a->a + ai[i];
156807171c5SHong Zhang       for (j = 0; j < n; j++) {
157807171c5SHong Zhang         r1 += (*aa) * b1[*aj];
158807171c5SHong Zhang         r2 += (*aa) * b2[*aj];
159807171c5SHong Zhang         r3 += (*aa) * b3[*aj];
160807171c5SHong Zhang         r4 += (*aa++) * b4[*aj++];
161807171c5SHong Zhang       }
162807171c5SHong Zhang       c[i]       = r1;
163807171c5SHong Zhang       c[am + i]  = r2;
164807171c5SHong Zhang       c[am2 + i] = r3;
165807171c5SHong Zhang       c[am3 + i] = r4;
166807171c5SHong Zhang     }
167807171c5SHong Zhang     b1 += bm4;
168807171c5SHong Zhang     b2 += bm4;
169807171c5SHong Zhang     b3 += bm4;
170807171c5SHong Zhang     b4 += bm4;
171807171c5SHong Zhang 
172807171c5SHong Zhang     /* RAB[:,col] = R*C[:,col] */
173807171c5SHong Zhang     colrm = col * rm;
174807171c5SHong Zhang     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
175807171c5SHong Zhang       r1 = r2 = r3 = r4 = 0.0;
176807171c5SHong Zhang       n                 = r->i[i + 1] - r->i[i];
177807171c5SHong Zhang       rj                = r->j + r->i[i];
178807171c5SHong Zhang       ra                = r->a + r->i[i];
179807171c5SHong Zhang       for (j = 0; j < n; j++) {
180807171c5SHong Zhang         r1 += (*ra) * c[*rj];
181807171c5SHong Zhang         r2 += (*ra) * c2[*rj];
182807171c5SHong Zhang         r3 += (*ra) * c3[*rj];
183807171c5SHong Zhang         r4 += (*ra++) * c4[*rj++];
184807171c5SHong Zhang       }
185807171c5SHong Zhang       d[colrm + i]       = r1;
186807171c5SHong Zhang       d[colrm + rm + i]  = r2;
187807171c5SHong Zhang       d[colrm + rm2 + i] = r3;
188807171c5SHong Zhang       d[colrm + rm3 + i] = r4;
189807171c5SHong Zhang     }
190807171c5SHong Zhang   }
191807171c5SHong Zhang   for (; col < cn; col++) {    /* over extra columns of C */
192807171c5SHong Zhang     for (i = 0; i < am; i++) { /* over rows of A in those columns */
193807171c5SHong Zhang       r1 = 0.0;
194807171c5SHong Zhang       n  = a->i[i + 1] - a->i[i];
195807171c5SHong Zhang       aj = a->j + a->i[i];
196807171c5SHong Zhang       aa = a->a + a->i[i];
197ad540459SPierre Jolivet       for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
198807171c5SHong Zhang       c[i] = r1;
199807171c5SHong Zhang     }
200807171c5SHong Zhang     b1 += bm;
201807171c5SHong Zhang 
202807171c5SHong Zhang     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
203807171c5SHong Zhang       r1 = 0.0;
204807171c5SHong Zhang       n  = r->i[i + 1] - r->i[i];
205807171c5SHong Zhang       rj = r->j + r->i[i];
206807171c5SHong Zhang       ra = r->a + r->i[i];
207ad540459SPierre Jolivet       for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
208807171c5SHong Zhang       d[col * rm + i] = r1;
209807171c5SHong Zhang     }
210807171c5SHong Zhang   }
2119566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));
212807171c5SHong Zhang 
2139566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
2149566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(RAB, &d));
2159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
2169566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
2173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
218807171c5SHong Zhang }
219807171c5SHong Zhang 
220d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
221d71ae5a4SJacob Faibussowitsch {
2226718818eSStefano Zampini   Mat_RARt            *rart;
223807171c5SHong Zhang   MatTransposeColoring matcoloring;
2248d0a38e4SHong Zhang   Mat                  Rt, RARt;
225807171c5SHong Zhang 
226807171c5SHong Zhang   PetscFunctionBegin;
2276718818eSStefano Zampini   MatCheckProduct(C, 3);
22828b400f6SJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2296718818eSStefano Zampini   rart = (Mat_RARt *)C->product->data;
2306718818eSStefano Zampini 
231807171c5SHong Zhang   /* Get dense Rt by Apply MatTransposeColoring to R */
232807171c5SHong Zhang   matcoloring = rart->matcoloring;
233807171c5SHong Zhang   Rt          = rart->Rt;
2349566063dSJacob Faibussowitsch   PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));
235807171c5SHong Zhang 
23650647e95SHong Zhang   /* Get dense RARt = R*A*Rt -- dominates! */
237807171c5SHong Zhang   RARt = rart->RARt;
2389566063dSJacob Faibussowitsch   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));
239807171c5SHong Zhang 
240807171c5SHong Zhang   /* Recover C from C_dense */
2419566063dSJacob Faibussowitsch   PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
2423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
243807171c5SHong Zhang }
244807171c5SHong Zhang 
245d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
246d71ae5a4SJacob Faibussowitsch {
2474222ddf1SHong Zhang   Mat       ARt;
2484fa85fdeSHong Zhang   Mat_RARt *rart;
2496718818eSStefano Zampini   char     *alg;
2504fa85fdeSHong Zhang 
25195a72cc5SHong Zhang   PetscFunctionBegin;
2526718818eSStefano Zampini   MatCheckProduct(C, 4);
25328b400f6SJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2544222ddf1SHong Zhang   /* create symbolic ARt = A*R^T  */
2559566063dSJacob Faibussowitsch   PetscCall(MatProductCreate(A, R, NULL, &ARt));
2569566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
2579566063dSJacob Faibussowitsch   PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
2589566063dSJacob Faibussowitsch   PetscCall(MatProductSetFill(ARt, fill));
2599566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(ARt));
2609566063dSJacob Faibussowitsch   PetscCall(MatProductSymbolic(ARt));
2614222ddf1SHong Zhang 
2624222ddf1SHong Zhang   /* compute symbolic C = R*ARt */
2637a3c3d58SStefano Zampini   /* set algorithm for C = R*ARt */
2649566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(C->product->alg, &alg));
2659566063dSJacob Faibussowitsch   PetscCall(MatProductSetAlgorithm(C, "sorted"));
2669566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
2677a3c3d58SStefano Zampini   /* resume original algorithm for C */
2689566063dSJacob Faibussowitsch   PetscCall(MatProductSetAlgorithm(C, alg));
2699566063dSJacob Faibussowitsch   PetscCall(PetscFree(alg));
2704222ddf1SHong Zhang 
2714222ddf1SHong Zhang   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
27255bea0ebSHong Zhang 
2739566063dSJacob Faibussowitsch   PetscCall(PetscNew(&rart));
2743b1b9624SHong Zhang   rart->ARt           = ARt;
2756718818eSStefano Zampini   C->product->data    = rart;
2766718818eSStefano Zampini   C->product->destroy = MatDestroy_SeqAIJ_RARt;
2779566063dSJacob Faibussowitsch   PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
2783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27936104f73SHong Zhang }
28055bea0ebSHong Zhang 
281d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
282d71ae5a4SJacob Faibussowitsch {
2836718818eSStefano Zampini   Mat_RARt *rart;
28455bea0ebSHong Zhang 
28555bea0ebSHong Zhang   PetscFunctionBegin;
2866718818eSStefano Zampini   MatCheckProduct(C, 3);
28728b400f6SJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2886718818eSStefano Zampini   rart = (Mat_RARt *)C->product->data;
2899566063dSJacob Faibussowitsch   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
2909566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
2913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
292807171c5SHong Zhang }
29355bea0ebSHong Zhang 
294d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
295d71ae5a4SJacob Faibussowitsch {
29695a72cc5SHong Zhang   Mat       Rt;
29755bea0ebSHong Zhang   Mat_RARt *rart;
29855bea0ebSHong Zhang 
29955bea0ebSHong Zhang   PetscFunctionBegin;
3006718818eSStefano Zampini   MatCheckProduct(C, 4);
30128b400f6SJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
302acd337a6SBarry Smith   PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
3039566063dSJacob Faibussowitsch   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));
30495a72cc5SHong Zhang 
3059566063dSJacob Faibussowitsch   PetscCall(PetscNew(&rart));
3066718818eSStefano Zampini   rart->data          = C->product->data;
3076718818eSStefano Zampini   rart->destroy       = C->product->destroy;
30895a72cc5SHong Zhang   rart->Rt            = Rt;
3096718818eSStefano Zampini   C->product->data    = rart;
3106718818eSStefano Zampini   C->product->destroy = MatDestroy_SeqAIJ_RARt;
3114222ddf1SHong Zhang   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
3129566063dSJacob Faibussowitsch   PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
3133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31455bea0ebSHong Zhang }
31555bea0ebSHong Zhang 
316d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
317d71ae5a4SJacob Faibussowitsch {
3186718818eSStefano Zampini   Mat_RARt *rart;
31955bea0ebSHong Zhang 
32055bea0ebSHong Zhang   PetscFunctionBegin;
3216718818eSStefano Zampini   MatCheckProduct(C, 3);
32228b400f6SJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3236718818eSStefano Zampini   rart = (Mat_RARt *)C->product->data;
324acd337a6SBarry Smith   PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
3256718818eSStefano Zampini   /* MatMatMatMultSymbolic used a different data */
3266718818eSStefano Zampini   C->product->data = rart->data;
3279566063dSJacob Faibussowitsch   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
3286718818eSStefano Zampini   C->product->data = rart;
3293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
33055bea0ebSHong Zhang }
33155bea0ebSHong Zhang 
332d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
333d71ae5a4SJacob Faibussowitsch {
33455bea0ebSHong Zhang   const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
33555bea0ebSHong Zhang   PetscInt    alg         = 0; /* set default algorithm */
33655bea0ebSHong Zhang 
33755bea0ebSHong Zhang   PetscFunctionBegin;
33855bea0ebSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
339d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
3409566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
341d0609cedSBarry Smith     PetscOptionsEnd();
342b56132cbSHong Zhang 
3439566063dSJacob Faibussowitsch     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
3449566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, C));
34555bea0ebSHong Zhang     switch (alg) {
34655bea0ebSHong Zhang     case 1:
34755bea0ebSHong Zhang       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
3489566063dSJacob Faibussowitsch       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
34955bea0ebSHong Zhang       break;
35055bea0ebSHong Zhang     case 2:
35155bea0ebSHong Zhang       /* via coloring_rart: apply coloring C = R*A*R^T                          */
3529566063dSJacob Faibussowitsch       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
35355bea0ebSHong Zhang       break;
35455bea0ebSHong Zhang     default:
35555bea0ebSHong Zhang       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
3569566063dSJacob Faibussowitsch       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
35755bea0ebSHong Zhang       break;
35855bea0ebSHong Zhang     }
3599566063dSJacob Faibussowitsch     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
3605008f5a7SHong Zhang   }
3615008f5a7SHong Zhang 
3629566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
3639566063dSJacob Faibussowitsch   PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
3649566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
3653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
366807171c5SHong Zhang }
3674222ddf1SHong Zhang 
368d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
369d71ae5a4SJacob Faibussowitsch {
3704222ddf1SHong Zhang   Mat_Product        *product = C->product;
3714222ddf1SHong Zhang   Mat                 A = product->A, R = product->B;
3724222ddf1SHong Zhang   MatProductAlgorithm alg  = product->alg;
3734222ddf1SHong Zhang   PetscReal           fill = product->fill;
3744222ddf1SHong Zhang   PetscBool           flg;
3754222ddf1SHong Zhang 
3764222ddf1SHong Zhang   PetscFunctionBegin;
3779566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
3784222ddf1SHong Zhang   if (flg) {
3799566063dSJacob Faibussowitsch     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
3804222ddf1SHong Zhang     goto next;
3814222ddf1SHong Zhang   }
3824222ddf1SHong Zhang 
3839566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg, "r*art", &flg));
3844222ddf1SHong Zhang   if (flg) {
3859566063dSJacob Faibussowitsch     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
3864222ddf1SHong Zhang     goto next;
3874222ddf1SHong Zhang   }
3884222ddf1SHong Zhang 
3899566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
3904222ddf1SHong Zhang   if (flg) {
3919566063dSJacob Faibussowitsch     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
3924222ddf1SHong Zhang     goto next;
3934222ddf1SHong Zhang   }
3944222ddf1SHong Zhang 
3954222ddf1SHong Zhang   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
3964222ddf1SHong Zhang 
3974222ddf1SHong Zhang next:
3984222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_RARt;
3993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4004222ddf1SHong Zhang }
401