13b1b9624SHong Zhang /* 23b1b9624SHong Zhang Defines projective product routines where A is a SeqAIJ matrix 33b1b9624SHong Zhang C = R * A * R^T 43b1b9624SHong Zhang */ 53b1b9624SHong Zhang 6807171c5SHong Zhang #include <../src/mat/impls/aij/seq/aij.h> 7807171c5SHong Zhang #include <../src/mat/utils/freespace.h> 8807171c5SHong Zhang #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 9807171c5SHong Zhang 1066976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) 11d71ae5a4SJacob Faibussowitsch { 126718818eSStefano Zampini Mat_RARt *rart = (Mat_RARt *)data; 13807171c5SHong Zhang 14807171c5SHong Zhang PetscFunctionBegin; 159566063dSJacob Faibussowitsch PetscCall(MatTransposeColoringDestroy(&rart->matcoloring)); 169566063dSJacob Faibussowitsch PetscCall(MatDestroy(&rart->Rt)); 179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&rart->RARt)); 189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&rart->ARt)); 199566063dSJacob Faibussowitsch PetscCall(PetscFree(rart->work)); 201baa6e33SBarry Smith if (rart->destroy) PetscCall((*rart->destroy)(rart->data)); 219566063dSJacob Faibussowitsch PetscCall(PetscFree(rart)); 223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 23807171c5SHong Zhang } 24807171c5SHong Zhang 25d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C) 26d71ae5a4SJacob Faibussowitsch { 27807171c5SHong Zhang Mat P; 28807171c5SHong Zhang Mat_RARt *rart; 29335efc43SPeter Brune MatColoring coloring; 30807171c5SHong Zhang MatTransposeColoring matcoloring; 31807171c5SHong Zhang ISColoring iscoloring; 328d0a38e4SHong Zhang Mat Rt_dense, RARt_dense; 33807171c5SHong Zhang 34807171c5SHong Zhang PetscFunctionBegin; 356718818eSStefano Zampini MatCheckProduct(C, 4); 3628b400f6SJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 37807171c5SHong Zhang /* create symbolic P=Rt */ 387fb60732SBarry Smith PetscCall(MatTransposeSymbolic(R, &P)); 39807171c5SHong Zhang 40807171c5SHong Zhang /* get symbolic C=Pt*A*P */ 419566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C)); 429566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs))); 434222ddf1SHong Zhang C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 44807171c5SHong Zhang 45807171c5SHong Zhang /* create a supporting struct */ 469566063dSJacob Faibussowitsch PetscCall(PetscNew(&rart)); 476718818eSStefano Zampini C->product->data = rart; 486718818eSStefano Zampini C->product->destroy = MatDestroy_SeqAIJ_RARt; 49807171c5SHong Zhang 502ef1f0ffSBarry Smith /* Use coloring */ 514222ddf1SHong Zhang /* inode causes memory problem */ 529566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE)); 534d478ae7SHong Zhang 54807171c5SHong Zhang /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 559566063dSJacob Faibussowitsch PetscCall(MatColoringCreate(C, &coloring)); 569566063dSJacob Faibussowitsch PetscCall(MatColoringSetDistance(coloring, 2)); 579566063dSJacob Faibussowitsch PetscCall(MatColoringSetType(coloring, MATCOLORINGSL)); 589566063dSJacob Faibussowitsch PetscCall(MatColoringSetFromOptions(coloring)); 599566063dSJacob Faibussowitsch PetscCall(MatColoringApply(coloring, &iscoloring)); 609566063dSJacob Faibussowitsch PetscCall(MatColoringDestroy(&coloring)); 619566063dSJacob Faibussowitsch PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring)); 622205254eSKarl Rupp 63807171c5SHong Zhang rart->matcoloring = matcoloring; 649566063dSJacob Faibussowitsch PetscCall(ISColoringDestroy(&iscoloring)); 653b1b9624SHong Zhang 66807171c5SHong Zhang /* Create Rt_dense */ 679566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense)); 689566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors)); 699566063dSJacob Faibussowitsch PetscCall(MatSetType(Rt_dense, MATSEQDENSE)); 709566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL)); 712205254eSKarl Rupp 72807171c5SHong Zhang Rt_dense->assembled = PETSC_TRUE; 73807171c5SHong Zhang rart->Rt = Rt_dense; 74807171c5SHong Zhang 75807171c5SHong Zhang /* Create RARt_dense = R*A*Rt_dense */ 769566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense)); 779566063dSJacob Faibussowitsch PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors)); 789566063dSJacob Faibussowitsch PetscCall(MatSetType(RARt_dense, MATSEQDENSE)); 799566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL)); 802205254eSKarl Rupp 81807171c5SHong Zhang rart->RARt = RARt_dense; 82807171c5SHong Zhang 83807171c5SHong Zhang /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work)); 85807171c5SHong Zhang 86807171c5SHong Zhang /* clean up */ 879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&P)); 88807171c5SHong Zhang 89807171c5SHong Zhang #if defined(PETSC_USE_INFO) 901ce71dffSSatish Balay { 916718818eSStefano Zampini Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 92*f4f49eeaSPierre Jolivet PetscReal density = (PetscReal)c->nz / (RARt_dense->rmap->n * RARt_dense->cmap->n); 939566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n")); 9463a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density)); 951ce71dffSSatish Balay } 96807171c5SHong Zhang #endif 973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 98807171c5SHong Zhang } 99807171c5SHong Zhang 100807171c5SHong Zhang /* 101807171c5SHong Zhang RAB = R * A * B, R and A in seqaij format, B in dense format; 102807171c5SHong Zhang */ 10366976f2fSJacob Faibussowitsch static PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work) 104d71ae5a4SJacob Faibussowitsch { 105807171c5SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data; 1061683a169SBarry Smith PetscScalar r1, r2, r3, r4; 1071683a169SBarry Smith const PetscScalar *b, *b1, *b2, *b3, *b4; 108807171c5SHong Zhang MatScalar *aa, *ra; 109807171c5SHong Zhang PetscInt cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n; 110807171c5SHong Zhang PetscInt am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm; 111807171c5SHong Zhang PetscScalar *d, *c, *c2, *c3, *c4; 112807171c5SHong Zhang PetscInt *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n; 113807171c5SHong Zhang PetscInt rm2 = 2 * rm, rm3 = 3 * rm, colrm; 114807171c5SHong Zhang 115807171c5SHong Zhang PetscFunctionBegin; 1163ba16761SJacob Faibussowitsch if (!dm || !dn) PetscFunctionReturn(PETSC_SUCCESS); 11708401ef6SPierre Jolivet PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm); 11808401ef6SPierre Jolivet PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am); 11908401ef6SPierre Jolivet PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n); 12008401ef6SPierre Jolivet PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n); 121807171c5SHong Zhang 122274010c0SHong Zhang { /* 123274010c0SHong Zhang This approach is not as good as original ones (will be removed later), but it reveals that 124c4762a1bSJed Brown AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c 125274010c0SHong Zhang */ 126274010c0SHong Zhang PetscBool via_matmatmult = PETSC_FALSE; 1279566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL)); 128274010c0SHong Zhang if (via_matmatmult) { 1294222ddf1SHong Zhang Mat AB_den = NULL; 1309566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den)); 1319566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den)); 1329566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den)); 1339566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB)); 1349566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AB_den)); 1353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 136274010c0SHong Zhang } 137274010c0SHong Zhang } 138274010c0SHong Zhang 1399566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 1409566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(RAB, &d)); 1419371c9d4SSatish Balay b1 = b; 1429371c9d4SSatish Balay b2 = b1 + bm; 1439371c9d4SSatish Balay b3 = b2 + bm; 1449371c9d4SSatish Balay b4 = b3 + bm; 1459371c9d4SSatish Balay c = work; 1469371c9d4SSatish Balay c2 = c + am; 1479371c9d4SSatish Balay c3 = c2 + am; 1489371c9d4SSatish Balay c4 = c3 + am; 149807171c5SHong Zhang for (col = 0; col < cn - 4; col += 4) { /* over columns of C */ 150807171c5SHong Zhang for (i = 0; i < am; i++) { /* over rows of A in those columns */ 151807171c5SHong Zhang r1 = r2 = r3 = r4 = 0.0; 152807171c5SHong Zhang n = ai[i + 1] - ai[i]; 153807171c5SHong Zhang aj = a->j + ai[i]; 154807171c5SHong Zhang aa = a->a + ai[i]; 155807171c5SHong Zhang for (j = 0; j < n; j++) { 156807171c5SHong Zhang r1 += (*aa) * b1[*aj]; 157807171c5SHong Zhang r2 += (*aa) * b2[*aj]; 158807171c5SHong Zhang r3 += (*aa) * b3[*aj]; 159807171c5SHong Zhang r4 += (*aa++) * b4[*aj++]; 160807171c5SHong Zhang } 161807171c5SHong Zhang c[i] = r1; 162807171c5SHong Zhang c[am + i] = r2; 163807171c5SHong Zhang c[am2 + i] = r3; 164807171c5SHong Zhang c[am3 + i] = r4; 165807171c5SHong Zhang } 166807171c5SHong Zhang b1 += bm4; 167807171c5SHong Zhang b2 += bm4; 168807171c5SHong Zhang b3 += bm4; 169807171c5SHong Zhang b4 += bm4; 170807171c5SHong Zhang 171807171c5SHong Zhang /* RAB[:,col] = R*C[:,col] */ 172807171c5SHong Zhang colrm = col * rm; 173807171c5SHong Zhang for (i = 0; i < rm; i++) { /* over rows of R in those columns */ 174807171c5SHong Zhang r1 = r2 = r3 = r4 = 0.0; 175807171c5SHong Zhang n = r->i[i + 1] - r->i[i]; 176807171c5SHong Zhang rj = r->j + r->i[i]; 177807171c5SHong Zhang ra = r->a + r->i[i]; 178807171c5SHong Zhang for (j = 0; j < n; j++) { 179807171c5SHong Zhang r1 += (*ra) * c[*rj]; 180807171c5SHong Zhang r2 += (*ra) * c2[*rj]; 181807171c5SHong Zhang r3 += (*ra) * c3[*rj]; 182807171c5SHong Zhang r4 += (*ra++) * c4[*rj++]; 183807171c5SHong Zhang } 184807171c5SHong Zhang d[colrm + i] = r1; 185807171c5SHong Zhang d[colrm + rm + i] = r2; 186807171c5SHong Zhang d[colrm + rm2 + i] = r3; 187807171c5SHong Zhang d[colrm + rm3 + i] = r4; 188807171c5SHong Zhang } 189807171c5SHong Zhang } 190807171c5SHong Zhang for (; col < cn; col++) { /* over extra columns of C */ 191807171c5SHong Zhang for (i = 0; i < am; i++) { /* over rows of A in those columns */ 192807171c5SHong Zhang r1 = 0.0; 193807171c5SHong Zhang n = a->i[i + 1] - a->i[i]; 194807171c5SHong Zhang aj = a->j + a->i[i]; 195807171c5SHong Zhang aa = a->a + a->i[i]; 196ad540459SPierre Jolivet for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++]; 197807171c5SHong Zhang c[i] = r1; 198807171c5SHong Zhang } 199807171c5SHong Zhang b1 += bm; 200807171c5SHong Zhang 201807171c5SHong Zhang for (i = 0; i < rm; i++) { /* over rows of R in those columns */ 202807171c5SHong Zhang r1 = 0.0; 203807171c5SHong Zhang n = r->i[i + 1] - r->i[i]; 204807171c5SHong Zhang rj = r->j + r->i[i]; 205807171c5SHong Zhang ra = r->a + r->i[i]; 206ad540459SPierre Jolivet for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++]; 207807171c5SHong Zhang d[col * rm + i] = r1; 208807171c5SHong Zhang } 209807171c5SHong Zhang } 2109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz))); 211807171c5SHong Zhang 2129566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 2139566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(RAB, &d)); 2149566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY)); 2159566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY)); 2163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 217807171c5SHong Zhang } 218807171c5SHong Zhang 219d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C) 220d71ae5a4SJacob Faibussowitsch { 2216718818eSStefano Zampini Mat_RARt *rart; 222807171c5SHong Zhang MatTransposeColoring matcoloring; 2238d0a38e4SHong Zhang Mat Rt, RARt; 224807171c5SHong Zhang 225807171c5SHong Zhang PetscFunctionBegin; 2266718818eSStefano Zampini MatCheckProduct(C, 3); 22728b400f6SJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2286718818eSStefano Zampini rart = (Mat_RARt *)C->product->data; 2296718818eSStefano Zampini 230807171c5SHong Zhang /* Get dense Rt by Apply MatTransposeColoring to R */ 231807171c5SHong Zhang matcoloring = rart->matcoloring; 232807171c5SHong Zhang Rt = rart->Rt; 2339566063dSJacob Faibussowitsch PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt)); 234807171c5SHong Zhang 23550647e95SHong Zhang /* Get dense RARt = R*A*Rt -- dominates! */ 236807171c5SHong Zhang RARt = rart->RARt; 2379566063dSJacob Faibussowitsch PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work)); 238807171c5SHong Zhang 239807171c5SHong Zhang /* Recover C from C_dense */ 2409566063dSJacob Faibussowitsch PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C)); 2413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 242807171c5SHong Zhang } 243807171c5SHong Zhang 244d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C) 245d71ae5a4SJacob Faibussowitsch { 2464222ddf1SHong Zhang Mat ARt; 2474fa85fdeSHong Zhang Mat_RARt *rart; 2486718818eSStefano Zampini char *alg; 2494fa85fdeSHong Zhang 25095a72cc5SHong Zhang PetscFunctionBegin; 2516718818eSStefano Zampini MatCheckProduct(C, 4); 25228b400f6SJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2534222ddf1SHong Zhang /* create symbolic ARt = A*R^T */ 2549566063dSJacob Faibussowitsch PetscCall(MatProductCreate(A, R, NULL, &ARt)); 2559566063dSJacob Faibussowitsch PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt)); 2569566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(ARt, "sorted")); 2579566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(ARt, fill)); 2589566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(ARt)); 2599566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(ARt)); 2604222ddf1SHong Zhang 2614222ddf1SHong Zhang /* compute symbolic C = R*ARt */ 2627a3c3d58SStefano Zampini /* set algorithm for C = R*ARt */ 2639566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(C->product->alg, &alg)); 2649566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, "sorted")); 2659566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C)); 2667a3c3d58SStefano Zampini /* resume original algorithm for C */ 2679566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, alg)); 2689566063dSJacob Faibussowitsch PetscCall(PetscFree(alg)); 2694222ddf1SHong Zhang 2704222ddf1SHong Zhang C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 27155bea0ebSHong Zhang 2729566063dSJacob Faibussowitsch PetscCall(PetscNew(&rart)); 2733b1b9624SHong Zhang rart->ARt = ARt; 2746718818eSStefano Zampini C->product->data = rart; 2756718818eSStefano Zampini C->product->destroy = MatDestroy_SeqAIJ_RARt; 2769566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n")); 2773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27836104f73SHong Zhang } 27955bea0ebSHong Zhang 280d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C) 281d71ae5a4SJacob Faibussowitsch { 2826718818eSStefano Zampini Mat_RARt *rart; 28355bea0ebSHong Zhang 28455bea0ebSHong Zhang PetscFunctionBegin; 2856718818eSStefano Zampini MatCheckProduct(C, 3); 28628b400f6SJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2876718818eSStefano Zampini rart = (Mat_RARt *)C->product->data; 2889566063dSJacob Faibussowitsch PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */ 2899566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C)); 2903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 291807171c5SHong Zhang } 29255bea0ebSHong Zhang 293d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C) 294d71ae5a4SJacob Faibussowitsch { 29595a72cc5SHong Zhang Mat Rt; 29655bea0ebSHong Zhang Mat_RARt *rart; 29755bea0ebSHong Zhang 29855bea0ebSHong Zhang PetscFunctionBegin; 2996718818eSStefano Zampini MatCheckProduct(C, 4); 30028b400f6SJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 301acd337a6SBarry Smith PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt)); 3029566063dSJacob Faibussowitsch PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C)); 30395a72cc5SHong Zhang 3049566063dSJacob Faibussowitsch PetscCall(PetscNew(&rart)); 3056718818eSStefano Zampini rart->data = C->product->data; 3066718818eSStefano Zampini rart->destroy = C->product->destroy; 30795a72cc5SHong Zhang rart->Rt = Rt; 3086718818eSStefano Zampini C->product->data = rart; 3096718818eSStefano Zampini C->product->destroy = MatDestroy_SeqAIJ_RARt; 3104222ddf1SHong Zhang C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 3119566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n")); 3123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31355bea0ebSHong Zhang } 31455bea0ebSHong Zhang 315d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C) 316d71ae5a4SJacob Faibussowitsch { 3176718818eSStefano Zampini Mat_RARt *rart; 31855bea0ebSHong Zhang 31955bea0ebSHong Zhang PetscFunctionBegin; 3206718818eSStefano Zampini MatCheckProduct(C, 3); 32128b400f6SJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3226718818eSStefano Zampini rart = (Mat_RARt *)C->product->data; 323acd337a6SBarry Smith PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt)); 3246718818eSStefano Zampini /* MatMatMatMultSymbolic used a different data */ 3256718818eSStefano Zampini C->product->data = rart->data; 3269566063dSJacob Faibussowitsch PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C)); 3276718818eSStefano Zampini C->product->data = rart; 3283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32955bea0ebSHong Zhang } 33055bea0ebSHong Zhang 331d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C) 332d71ae5a4SJacob Faibussowitsch { 33355bea0ebSHong Zhang const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"}; 33455bea0ebSHong Zhang PetscInt alg = 0; /* set default algorithm */ 33555bea0ebSHong Zhang 33655bea0ebSHong Zhang PetscFunctionBegin; 33755bea0ebSHong Zhang if (scall == MAT_INITIAL_MATRIX) { 338d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat"); 3399566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL)); 340d0609cedSBarry Smith PetscOptionsEnd(); 341b56132cbSHong Zhang 3429566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0)); 3439566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, C)); 34455bea0ebSHong Zhang switch (alg) { 34555bea0ebSHong Zhang case 1: 34655bea0ebSHong Zhang /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 3479566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C)); 34855bea0ebSHong Zhang break; 34955bea0ebSHong Zhang case 2: 35055bea0ebSHong Zhang /* via coloring_rart: apply coloring C = R*A*R^T */ 3519566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C)); 35255bea0ebSHong Zhang break; 35355bea0ebSHong Zhang default: 35455bea0ebSHong Zhang /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 3559566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C)); 35655bea0ebSHong Zhang break; 35755bea0ebSHong Zhang } 3589566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0)); 3595008f5a7SHong Zhang } 3605008f5a7SHong Zhang 3619566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0)); 3629566063dSJacob Faibussowitsch PetscCall(((*C)->ops->rartnumeric)(A, R, *C)); 3639566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0)); 3643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 365807171c5SHong Zhang } 3664222ddf1SHong Zhang 367d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) 368d71ae5a4SJacob Faibussowitsch { 3694222ddf1SHong Zhang Mat_Product *product = C->product; 3704222ddf1SHong Zhang Mat A = product->A, R = product->B; 3714222ddf1SHong Zhang MatProductAlgorithm alg = product->alg; 3724222ddf1SHong Zhang PetscReal fill = product->fill; 3734222ddf1SHong Zhang PetscBool flg; 3744222ddf1SHong Zhang 3754222ddf1SHong Zhang PetscFunctionBegin; 3769566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "r*a*rt", &flg)); 3774222ddf1SHong Zhang if (flg) { 3789566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C)); 3794222ddf1SHong Zhang goto next; 3804222ddf1SHong Zhang } 3814222ddf1SHong Zhang 3829566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "r*art", &flg)); 3834222ddf1SHong Zhang if (flg) { 3849566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C)); 3854222ddf1SHong Zhang goto next; 3864222ddf1SHong Zhang } 3874222ddf1SHong Zhang 3889566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "coloring_rart", &flg)); 3894222ddf1SHong Zhang if (flg) { 3909566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C)); 3914222ddf1SHong Zhang goto next; 3924222ddf1SHong Zhang } 3934222ddf1SHong Zhang 3944222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported"); 3954222ddf1SHong Zhang 3964222ddf1SHong Zhang next: 3974222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_RARt; 3983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3994222ddf1SHong Zhang } 400