1be1d678aSKris Buschelman 2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 482b1193eSBarry Smith 5b350b9acSSatish Balay /*@ 611a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7ff585edeSJed Brown 811a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9ff585edeSJed Brown 10ff585edeSJed Brown Input Parameter: 1111a5261eSBarry Smith . A - the `MATMAIJ` matrix 12ff585edeSJed Brown 13ff585edeSJed Brown Output Parameter: 1411a5261eSBarry Smith . B - the `MATAIJ` matrix 15ff585edeSJed Brown 16ff585edeSJed Brown Level: advanced 17ff585edeSJed Brown 1811a5261eSBarry Smith Note: 1911a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20ff585edeSJed Brown 2111a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22ff585edeSJed Brown @*/ 239371c9d4SSatish Balay PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) { 24ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 25b9b97703SBarry Smith 26b9b97703SBarry Smith PetscFunctionBegin; 279566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 29b9b97703SBarry Smith if (ismpimaij) { 30b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 31b9b97703SBarry Smith 32b9b97703SBarry Smith *B = b->A; 33b9b97703SBarry Smith } else if (isseqmaij) { 34b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 35b9b97703SBarry Smith 36b9b97703SBarry Smith *B = b->AIJ; 37b9b97703SBarry Smith } else { 38b9b97703SBarry Smith *B = A; 39b9b97703SBarry Smith } 40b9b97703SBarry Smith PetscFunctionReturn(0); 41b9b97703SBarry Smith } 42b9b97703SBarry Smith 43480dffcfSJed Brown /*@ 4411a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 45ff585edeSJed Brown 463f9fe445SBarry Smith Logically Collective 47ff585edeSJed Brown 48d8d19677SJose E. Roman Input Parameters: 4911a5261eSBarry Smith + A - the `MATMAIJ` matrix 50ff585edeSJed Brown - dof - the block size for the new matrix 51ff585edeSJed Brown 52ff585edeSJed Brown Output Parameter: 5311a5261eSBarry Smith . B - the new `MATMAIJ` matrix 54ff585edeSJed Brown 55ff585edeSJed Brown Level: advanced 56ff585edeSJed Brown 5711a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()` 58ff585edeSJed Brown @*/ 599371c9d4SSatish Balay PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) { 600298fd71SBarry Smith Mat Aij = NULL; 61b9b97703SBarry Smith 62b9b97703SBarry Smith PetscFunctionBegin; 63c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 649566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 659566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 66b9b97703SBarry Smith PetscFunctionReturn(0); 67b9b97703SBarry Smith } 68b9b97703SBarry Smith 699371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqMAIJ(Mat A) { 704cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7182b1193eSBarry Smith 7282b1193eSBarry Smith PetscFunctionBegin; 739566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 749566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 752e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 779566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 784cb9d9b8SBarry Smith PetscFunctionReturn(0); 794cb9d9b8SBarry Smith } 804cb9d9b8SBarry Smith 819371c9d4SSatish Balay PetscErrorCode MatSetUp_MAIJ(Mat A) { 82356c569eSBarry Smith PetscFunctionBegin; 83ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 84356c569eSBarry Smith } 85356c569eSBarry Smith 869371c9d4SSatish Balay PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) { 870fd73130SBarry Smith Mat B; 880fd73130SBarry Smith 890fd73130SBarry Smith PetscFunctionBegin; 909566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 919566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 929566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 930fd73130SBarry Smith PetscFunctionReturn(0); 940fd73130SBarry Smith } 950fd73130SBarry Smith 969371c9d4SSatish Balay PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) { 970fd73130SBarry Smith Mat B; 980fd73130SBarry Smith 990fd73130SBarry Smith PetscFunctionBegin; 1009566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1019566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1030fd73130SBarry Smith PetscFunctionReturn(0); 1040fd73130SBarry Smith } 1050fd73130SBarry Smith 1069371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIMAIJ(Mat A) { 1074cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1084cb9d9b8SBarry Smith 1094cb9d9b8SBarry Smith PetscFunctionBegin; 1109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1129566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1139566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1149566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1159566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1162e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1199566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 120b9b97703SBarry Smith PetscFunctionReturn(0); 121b9b97703SBarry Smith } 122b9b97703SBarry Smith 1230bad9183SKris Buschelman /*MC 124fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1250bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 12611a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1270bad9183SKris Buschelman 1280bad9183SKris Buschelman Operations provided: 12967b8a455SSatish Balay .vb 13011a5261eSBarry Smith MatMult() 13111a5261eSBarry Smith MatMultTranspose() 13211a5261eSBarry Smith MatMultAdd() 13311a5261eSBarry Smith MatMultTransposeAdd() 13467b8a455SSatish Balay .ve 1350bad9183SKris Buschelman 1360bad9183SKris Buschelman Level: advanced 1370bad9183SKris Buschelman 13811a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1390bad9183SKris Buschelman M*/ 1400bad9183SKris Buschelman 1419371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) { 1424cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 14364b52464SHong Zhang PetscMPIInt size; 14482b1193eSBarry Smith 14582b1193eSBarry Smith PetscFunctionBegin; 146*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 147b0a32e0cSBarry Smith A->data = (void *)b; 14826fbe8dcSKarl Rupp 1499566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15026fbe8dcSKarl Rupp 151356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 152f4a53059SBarry Smith 153f4259b30SLisandro Dalcin b->AIJ = NULL; 154cd3bbe55SBarry Smith b->dof = 0; 155f4259b30SLisandro Dalcin b->OAIJ = NULL; 156f4259b30SLisandro Dalcin b->ctx = NULL; 157f4259b30SLisandro Dalcin b->w = NULL; 1589566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 15964b52464SHong Zhang if (size == 1) { 1609566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16164b52464SHong Zhang } else { 1629566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 16364b52464SHong Zhang } 16432e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 16532e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 16682b1193eSBarry Smith PetscFunctionReturn(0); 16782b1193eSBarry Smith } 16882b1193eSBarry Smith 169cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1709371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 1714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 172bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 173d2840e09SBarry Smith const PetscScalar *x, *v; 174d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 175d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 176d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 17782b1193eSBarry Smith 178bcc973b7SBarry Smith PetscFunctionBegin; 1799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1809566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 181bcc973b7SBarry Smith idx = a->j; 182bcc973b7SBarry Smith v = a->a; 183bcc973b7SBarry Smith ii = a->i; 184bcc973b7SBarry Smith 185bcc973b7SBarry Smith for (i = 0; i < m; i++) { 186bcc973b7SBarry Smith jrow = ii[i]; 187bcc973b7SBarry Smith n = ii[i + 1] - jrow; 188bcc973b7SBarry Smith sum1 = 0.0; 189bcc973b7SBarry Smith sum2 = 0.0; 19026fbe8dcSKarl Rupp 191b3c51c66SMatthew Knepley nonzerorow += (n > 0); 192bcc973b7SBarry Smith for (j = 0; j < n; j++) { 193bcc973b7SBarry Smith sum1 += v[jrow] * x[2 * idx[jrow]]; 194bcc973b7SBarry Smith sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 195bcc973b7SBarry Smith jrow++; 196bcc973b7SBarry Smith } 197bcc973b7SBarry Smith y[2 * i] = sum1; 198bcc973b7SBarry Smith y[2 * i + 1] = sum2; 199bcc973b7SBarry Smith } 200bcc973b7SBarry Smith 2019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow)); 2029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 20482b1193eSBarry Smith PetscFunctionReturn(0); 20582b1193eSBarry Smith } 206bcc973b7SBarry Smith 2079371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 2084cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 209bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 210d2840e09SBarry Smith const PetscScalar *x, *v; 211d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2; 212d2840e09SBarry Smith PetscInt n, i; 213d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 21482b1193eSBarry Smith 215bcc973b7SBarry Smith PetscFunctionBegin; 2169566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 2179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2189566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 219bfec09a0SHong Zhang 220bcc973b7SBarry Smith for (i = 0; i < m; i++) { 221bfec09a0SHong Zhang idx = a->j + a->i[i]; 222bfec09a0SHong Zhang v = a->a + a->i[i]; 223bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 224bcc973b7SBarry Smith alpha1 = x[2 * i]; 225bcc973b7SBarry Smith alpha2 = x[2 * i + 1]; 22626fbe8dcSKarl Rupp while (n-- > 0) { 22726fbe8dcSKarl Rupp y[2 * (*idx)] += alpha1 * (*v); 22826fbe8dcSKarl Rupp y[2 * (*idx) + 1] += alpha2 * (*v); 2299371c9d4SSatish Balay idx++; 2309371c9d4SSatish Balay v++; 23126fbe8dcSKarl Rupp } 232bcc973b7SBarry Smith } 2339566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 2349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 23682b1193eSBarry Smith PetscFunctionReturn(0); 23782b1193eSBarry Smith } 238bcc973b7SBarry Smith 2399371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 2404cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 241bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 242d2840e09SBarry Smith const PetscScalar *x, *v; 243d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 244b24ad042SBarry Smith PetscInt n, i, jrow, j; 245d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 24682b1193eSBarry Smith 247bcc973b7SBarry Smith PetscFunctionBegin; 2489566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 2499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2509566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 251bcc973b7SBarry Smith idx = a->j; 252bcc973b7SBarry Smith v = a->a; 253bcc973b7SBarry Smith ii = a->i; 254bcc973b7SBarry Smith 255bcc973b7SBarry Smith for (i = 0; i < m; i++) { 256bcc973b7SBarry Smith jrow = ii[i]; 257bcc973b7SBarry Smith n = ii[i + 1] - jrow; 258bcc973b7SBarry Smith sum1 = 0.0; 259bcc973b7SBarry Smith sum2 = 0.0; 260bcc973b7SBarry Smith for (j = 0; j < n; j++) { 261bcc973b7SBarry Smith sum1 += v[jrow] * x[2 * idx[jrow]]; 262bcc973b7SBarry Smith sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 263bcc973b7SBarry Smith jrow++; 264bcc973b7SBarry Smith } 265bcc973b7SBarry Smith y[2 * i] += sum1; 266bcc973b7SBarry Smith y[2 * i + 1] += sum2; 267bcc973b7SBarry Smith } 268bcc973b7SBarry Smith 2699566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 272bcc973b7SBarry Smith PetscFunctionReturn(0); 27382b1193eSBarry Smith } 2749371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 2754cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 276bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 277d2840e09SBarry Smith const PetscScalar *x, *v; 278d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2; 279d2840e09SBarry Smith PetscInt n, i; 280d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 28182b1193eSBarry Smith 282bcc973b7SBarry Smith PetscFunctionBegin; 2839566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 2849566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2859566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 286bfec09a0SHong Zhang 287bcc973b7SBarry Smith for (i = 0; i < m; i++) { 288bfec09a0SHong Zhang idx = a->j + a->i[i]; 289bfec09a0SHong Zhang v = a->a + a->i[i]; 290bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 291bcc973b7SBarry Smith alpha1 = x[2 * i]; 292bcc973b7SBarry Smith alpha2 = x[2 * i + 1]; 29326fbe8dcSKarl Rupp while (n-- > 0) { 29426fbe8dcSKarl Rupp y[2 * (*idx)] += alpha1 * (*v); 29526fbe8dcSKarl Rupp y[2 * (*idx) + 1] += alpha2 * (*v); 2969371c9d4SSatish Balay idx++; 2979371c9d4SSatish Balay v++; 29826fbe8dcSKarl Rupp } 299bcc973b7SBarry Smith } 3009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 3019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3029566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 303bcc973b7SBarry Smith PetscFunctionReturn(0); 30482b1193eSBarry Smith } 305cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3069371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 3074cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 308bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 309d2840e09SBarry Smith const PetscScalar *x, *v; 310d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3; 311d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 312d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 31382b1193eSBarry Smith 314bcc973b7SBarry Smith PetscFunctionBegin; 3159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3169566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 317bcc973b7SBarry Smith idx = a->j; 318bcc973b7SBarry Smith v = a->a; 319bcc973b7SBarry Smith ii = a->i; 320bcc973b7SBarry Smith 321bcc973b7SBarry Smith for (i = 0; i < m; i++) { 322bcc973b7SBarry Smith jrow = ii[i]; 323bcc973b7SBarry Smith n = ii[i + 1] - jrow; 324bcc973b7SBarry Smith sum1 = 0.0; 325bcc973b7SBarry Smith sum2 = 0.0; 326bcc973b7SBarry Smith sum3 = 0.0; 32726fbe8dcSKarl Rupp 328b3c51c66SMatthew Knepley nonzerorow += (n > 0); 329bcc973b7SBarry Smith for (j = 0; j < n; j++) { 330bcc973b7SBarry Smith sum1 += v[jrow] * x[3 * idx[jrow]]; 331bcc973b7SBarry Smith sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 332bcc973b7SBarry Smith sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 333bcc973b7SBarry Smith jrow++; 334bcc973b7SBarry Smith } 335bcc973b7SBarry Smith y[3 * i] = sum1; 336bcc973b7SBarry Smith y[3 * i + 1] = sum2; 337bcc973b7SBarry Smith y[3 * i + 2] = sum3; 338bcc973b7SBarry Smith } 339bcc973b7SBarry Smith 3409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow)); 3419566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 343bcc973b7SBarry Smith PetscFunctionReturn(0); 344bcc973b7SBarry Smith } 345bcc973b7SBarry Smith 3469371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 3474cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 348bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 349d2840e09SBarry Smith const PetscScalar *x, *v; 350d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3; 351d2840e09SBarry Smith PetscInt n, i; 352d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 353bcc973b7SBarry Smith 354bcc973b7SBarry Smith PetscFunctionBegin; 3559566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 3569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3579566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 358bfec09a0SHong Zhang 359bcc973b7SBarry Smith for (i = 0; i < m; i++) { 360bfec09a0SHong Zhang idx = a->j + a->i[i]; 361bfec09a0SHong Zhang v = a->a + a->i[i]; 362bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 363bcc973b7SBarry Smith alpha1 = x[3 * i]; 364bcc973b7SBarry Smith alpha2 = x[3 * i + 1]; 365bcc973b7SBarry Smith alpha3 = x[3 * i + 2]; 366bcc973b7SBarry Smith while (n-- > 0) { 367bcc973b7SBarry Smith y[3 * (*idx)] += alpha1 * (*v); 368bcc973b7SBarry Smith y[3 * (*idx) + 1] += alpha2 * (*v); 369bcc973b7SBarry Smith y[3 * (*idx) + 2] += alpha3 * (*v); 3709371c9d4SSatish Balay idx++; 3719371c9d4SSatish Balay v++; 372bcc973b7SBarry Smith } 373bcc973b7SBarry Smith } 3749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 3759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 377bcc973b7SBarry Smith PetscFunctionReturn(0); 378bcc973b7SBarry Smith } 379bcc973b7SBarry Smith 3809371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 3814cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 382bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 383d2840e09SBarry Smith const PetscScalar *x, *v; 384d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3; 385b24ad042SBarry Smith PetscInt n, i, jrow, j; 386d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 387bcc973b7SBarry Smith 388bcc973b7SBarry Smith PetscFunctionBegin; 3899566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 3909566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3919566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 392bcc973b7SBarry Smith idx = a->j; 393bcc973b7SBarry Smith v = a->a; 394bcc973b7SBarry Smith ii = a->i; 395bcc973b7SBarry Smith 396bcc973b7SBarry Smith for (i = 0; i < m; i++) { 397bcc973b7SBarry Smith jrow = ii[i]; 398bcc973b7SBarry Smith n = ii[i + 1] - jrow; 399bcc973b7SBarry Smith sum1 = 0.0; 400bcc973b7SBarry Smith sum2 = 0.0; 401bcc973b7SBarry Smith sum3 = 0.0; 402bcc973b7SBarry Smith for (j = 0; j < n; j++) { 403bcc973b7SBarry Smith sum1 += v[jrow] * x[3 * idx[jrow]]; 404bcc973b7SBarry Smith sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 405bcc973b7SBarry Smith sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 406bcc973b7SBarry Smith jrow++; 407bcc973b7SBarry Smith } 408bcc973b7SBarry Smith y[3 * i] += sum1; 409bcc973b7SBarry Smith y[3 * i + 1] += sum2; 410bcc973b7SBarry Smith y[3 * i + 2] += sum3; 411bcc973b7SBarry Smith } 412bcc973b7SBarry Smith 4139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 4149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 416bcc973b7SBarry Smith PetscFunctionReturn(0); 417bcc973b7SBarry Smith } 4189371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 4194cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 420bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 421d2840e09SBarry Smith const PetscScalar *x, *v; 422d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3; 423d2840e09SBarry Smith PetscInt n, i; 424d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 425bcc973b7SBarry Smith 426bcc973b7SBarry Smith PetscFunctionBegin; 4279566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4299566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 430bcc973b7SBarry Smith for (i = 0; i < m; i++) { 431bfec09a0SHong Zhang idx = a->j + a->i[i]; 432bfec09a0SHong Zhang v = a->a + a->i[i]; 433bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 434bcc973b7SBarry Smith alpha1 = x[3 * i]; 435bcc973b7SBarry Smith alpha2 = x[3 * i + 1]; 436bcc973b7SBarry Smith alpha3 = x[3 * i + 2]; 437bcc973b7SBarry Smith while (n-- > 0) { 438bcc973b7SBarry Smith y[3 * (*idx)] += alpha1 * (*v); 439bcc973b7SBarry Smith y[3 * (*idx) + 1] += alpha2 * (*v); 440bcc973b7SBarry Smith y[3 * (*idx) + 2] += alpha3 * (*v); 4419371c9d4SSatish Balay idx++; 4429371c9d4SSatish Balay v++; 443bcc973b7SBarry Smith } 444bcc973b7SBarry Smith } 4459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 4469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4479566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 448bcc973b7SBarry Smith PetscFunctionReturn(0); 449bcc973b7SBarry Smith } 450bcc973b7SBarry Smith 451bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4529371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 4534cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 454bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 455d2840e09SBarry Smith const PetscScalar *x, *v; 456d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4; 457d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 458d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 459bcc973b7SBarry Smith 460bcc973b7SBarry Smith PetscFunctionBegin; 4619566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4629566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 463bcc973b7SBarry Smith idx = a->j; 464bcc973b7SBarry Smith v = a->a; 465bcc973b7SBarry Smith ii = a->i; 466bcc973b7SBarry Smith 467bcc973b7SBarry Smith for (i = 0; i < m; i++) { 468bcc973b7SBarry Smith jrow = ii[i]; 469bcc973b7SBarry Smith n = ii[i + 1] - jrow; 470bcc973b7SBarry Smith sum1 = 0.0; 471bcc973b7SBarry Smith sum2 = 0.0; 472bcc973b7SBarry Smith sum3 = 0.0; 473bcc973b7SBarry Smith sum4 = 0.0; 474b3c51c66SMatthew Knepley nonzerorow += (n > 0); 475bcc973b7SBarry Smith for (j = 0; j < n; j++) { 476bcc973b7SBarry Smith sum1 += v[jrow] * x[4 * idx[jrow]]; 477bcc973b7SBarry Smith sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 478bcc973b7SBarry Smith sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 479bcc973b7SBarry Smith sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 480bcc973b7SBarry Smith jrow++; 481bcc973b7SBarry Smith } 482bcc973b7SBarry Smith y[4 * i] = sum1; 483bcc973b7SBarry Smith y[4 * i + 1] = sum2; 484bcc973b7SBarry Smith y[4 * i + 2] = sum3; 485bcc973b7SBarry Smith y[4 * i + 3] = sum4; 486bcc973b7SBarry Smith } 487bcc973b7SBarry Smith 4889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow)); 4899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 491bcc973b7SBarry Smith PetscFunctionReturn(0); 492bcc973b7SBarry Smith } 493bcc973b7SBarry Smith 4949371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 4954cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 496bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 497d2840e09SBarry Smith const PetscScalar *x, *v; 498d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 499d2840e09SBarry Smith PetscInt n, i; 500d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 501bcc973b7SBarry Smith 502bcc973b7SBarry Smith PetscFunctionBegin; 5039566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 5049566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5059566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 506bcc973b7SBarry Smith for (i = 0; i < m; i++) { 507bfec09a0SHong Zhang idx = a->j + a->i[i]; 508bfec09a0SHong Zhang v = a->a + a->i[i]; 509bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 510bcc973b7SBarry Smith alpha1 = x[4 * i]; 511bcc973b7SBarry Smith alpha2 = x[4 * i + 1]; 512bcc973b7SBarry Smith alpha3 = x[4 * i + 2]; 513bcc973b7SBarry Smith alpha4 = x[4 * i + 3]; 514bcc973b7SBarry Smith while (n-- > 0) { 515bcc973b7SBarry Smith y[4 * (*idx)] += alpha1 * (*v); 516bcc973b7SBarry Smith y[4 * (*idx) + 1] += alpha2 * (*v); 517bcc973b7SBarry Smith y[4 * (*idx) + 2] += alpha3 * (*v); 518bcc973b7SBarry Smith y[4 * (*idx) + 3] += alpha4 * (*v); 5199371c9d4SSatish Balay idx++; 5209371c9d4SSatish Balay v++; 521bcc973b7SBarry Smith } 522bcc973b7SBarry Smith } 5239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 5249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 526bcc973b7SBarry Smith PetscFunctionReturn(0); 527bcc973b7SBarry Smith } 528bcc973b7SBarry Smith 5299371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 5304cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 531f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 532d2840e09SBarry Smith const PetscScalar *x, *v; 533d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4; 534b24ad042SBarry Smith PetscInt n, i, jrow, j; 535d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 536f9fae5adSBarry Smith 537f9fae5adSBarry Smith PetscFunctionBegin; 5389566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 5399566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5409566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 541f9fae5adSBarry Smith idx = a->j; 542f9fae5adSBarry Smith v = a->a; 543f9fae5adSBarry Smith ii = a->i; 544f9fae5adSBarry Smith 545f9fae5adSBarry Smith for (i = 0; i < m; i++) { 546f9fae5adSBarry Smith jrow = ii[i]; 547f9fae5adSBarry Smith n = ii[i + 1] - jrow; 548f9fae5adSBarry Smith sum1 = 0.0; 549f9fae5adSBarry Smith sum2 = 0.0; 550f9fae5adSBarry Smith sum3 = 0.0; 551f9fae5adSBarry Smith sum4 = 0.0; 552f9fae5adSBarry Smith for (j = 0; j < n; j++) { 553f9fae5adSBarry Smith sum1 += v[jrow] * x[4 * idx[jrow]]; 554f9fae5adSBarry Smith sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 555f9fae5adSBarry Smith sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 556f9fae5adSBarry Smith sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 557f9fae5adSBarry Smith jrow++; 558f9fae5adSBarry Smith } 559f9fae5adSBarry Smith y[4 * i] += sum1; 560f9fae5adSBarry Smith y[4 * i + 1] += sum2; 561f9fae5adSBarry Smith y[4 * i + 2] += sum3; 562f9fae5adSBarry Smith y[4 * i + 3] += sum4; 563f9fae5adSBarry Smith } 564f9fae5adSBarry Smith 5659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 5669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5679566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 568f9fae5adSBarry Smith PetscFunctionReturn(0); 569bcc973b7SBarry Smith } 5709371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 5714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 572f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 573d2840e09SBarry Smith const PetscScalar *x, *v; 574d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 575d2840e09SBarry Smith PetscInt n, i; 576d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 577f9fae5adSBarry Smith 578f9fae5adSBarry Smith PetscFunctionBegin; 5799566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 5809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5819566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 582bfec09a0SHong Zhang 583f9fae5adSBarry Smith for (i = 0; i < m; i++) { 584bfec09a0SHong Zhang idx = a->j + a->i[i]; 585bfec09a0SHong Zhang v = a->a + a->i[i]; 586f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 587f9fae5adSBarry Smith alpha1 = x[4 * i]; 588f9fae5adSBarry Smith alpha2 = x[4 * i + 1]; 589f9fae5adSBarry Smith alpha3 = x[4 * i + 2]; 590f9fae5adSBarry Smith alpha4 = x[4 * i + 3]; 591f9fae5adSBarry Smith while (n-- > 0) { 592f9fae5adSBarry Smith y[4 * (*idx)] += alpha1 * (*v); 593f9fae5adSBarry Smith y[4 * (*idx) + 1] += alpha2 * (*v); 594f9fae5adSBarry Smith y[4 * (*idx) + 2] += alpha3 * (*v); 595f9fae5adSBarry Smith y[4 * (*idx) + 3] += alpha4 * (*v); 5969371c9d4SSatish Balay idx++; 5979371c9d4SSatish Balay v++; 598f9fae5adSBarry Smith } 599f9fae5adSBarry Smith } 6009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 6019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6029566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 603f9fae5adSBarry Smith PetscFunctionReturn(0); 604f9fae5adSBarry Smith } 605f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6066cd98798SBarry Smith 6079371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 6084cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 609f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 610d2840e09SBarry Smith const PetscScalar *x, *v; 611d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 612d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 613d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 614f9fae5adSBarry Smith 615f9fae5adSBarry Smith PetscFunctionBegin; 6169566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6179566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 618f9fae5adSBarry Smith idx = a->j; 619f9fae5adSBarry Smith v = a->a; 620f9fae5adSBarry Smith ii = a->i; 621f9fae5adSBarry Smith 622f9fae5adSBarry Smith for (i = 0; i < m; i++) { 623f9fae5adSBarry Smith jrow = ii[i]; 624f9fae5adSBarry Smith n = ii[i + 1] - jrow; 625f9fae5adSBarry Smith sum1 = 0.0; 626f9fae5adSBarry Smith sum2 = 0.0; 627f9fae5adSBarry Smith sum3 = 0.0; 628f9fae5adSBarry Smith sum4 = 0.0; 629f9fae5adSBarry Smith sum5 = 0.0; 63026fbe8dcSKarl Rupp 631b3c51c66SMatthew Knepley nonzerorow += (n > 0); 632f9fae5adSBarry Smith for (j = 0; j < n; j++) { 633f9fae5adSBarry Smith sum1 += v[jrow] * x[5 * idx[jrow]]; 634f9fae5adSBarry Smith sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 635f9fae5adSBarry Smith sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 636f9fae5adSBarry Smith sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 637f9fae5adSBarry Smith sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 638f9fae5adSBarry Smith jrow++; 639f9fae5adSBarry Smith } 640f9fae5adSBarry Smith y[5 * i] = sum1; 641f9fae5adSBarry Smith y[5 * i + 1] = sum2; 642f9fae5adSBarry Smith y[5 * i + 2] = sum3; 643f9fae5adSBarry Smith y[5 * i + 3] = sum4; 644f9fae5adSBarry Smith y[5 * i + 4] = sum5; 645f9fae5adSBarry Smith } 646f9fae5adSBarry Smith 6479566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow)); 6489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 650f9fae5adSBarry Smith PetscFunctionReturn(0); 651f9fae5adSBarry Smith } 652f9fae5adSBarry Smith 6539371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 6544cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 655f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 656d2840e09SBarry Smith const PetscScalar *x, *v; 657d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 658d2840e09SBarry Smith PetscInt n, i; 659d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 660f9fae5adSBarry Smith 661f9fae5adSBarry Smith PetscFunctionBegin; 6629566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 6639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6649566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 665bfec09a0SHong Zhang 666f9fae5adSBarry Smith for (i = 0; i < m; i++) { 667bfec09a0SHong Zhang idx = a->j + a->i[i]; 668bfec09a0SHong Zhang v = a->a + a->i[i]; 669f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 670f9fae5adSBarry Smith alpha1 = x[5 * i]; 671f9fae5adSBarry Smith alpha2 = x[5 * i + 1]; 672f9fae5adSBarry Smith alpha3 = x[5 * i + 2]; 673f9fae5adSBarry Smith alpha4 = x[5 * i + 3]; 674f9fae5adSBarry Smith alpha5 = x[5 * i + 4]; 675f9fae5adSBarry Smith while (n-- > 0) { 676f9fae5adSBarry Smith y[5 * (*idx)] += alpha1 * (*v); 677f9fae5adSBarry Smith y[5 * (*idx) + 1] += alpha2 * (*v); 678f9fae5adSBarry Smith y[5 * (*idx) + 2] += alpha3 * (*v); 679f9fae5adSBarry Smith y[5 * (*idx) + 3] += alpha4 * (*v); 680f9fae5adSBarry Smith y[5 * (*idx) + 4] += alpha5 * (*v); 6819371c9d4SSatish Balay idx++; 6829371c9d4SSatish Balay v++; 683f9fae5adSBarry Smith } 684f9fae5adSBarry Smith } 6859566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 6869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 688f9fae5adSBarry Smith PetscFunctionReturn(0); 689f9fae5adSBarry Smith } 690f9fae5adSBarry Smith 6919371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 6924cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 693f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 694d2840e09SBarry Smith const PetscScalar *x, *v; 695d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 696b24ad042SBarry Smith PetscInt n, i, jrow, j; 697d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 698f9fae5adSBarry Smith 699f9fae5adSBarry Smith PetscFunctionBegin; 7009566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7019566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7029566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 703f9fae5adSBarry Smith idx = a->j; 704f9fae5adSBarry Smith v = a->a; 705f9fae5adSBarry Smith ii = a->i; 706f9fae5adSBarry Smith 707f9fae5adSBarry Smith for (i = 0; i < m; i++) { 708f9fae5adSBarry Smith jrow = ii[i]; 709f9fae5adSBarry Smith n = ii[i + 1] - jrow; 710f9fae5adSBarry Smith sum1 = 0.0; 711f9fae5adSBarry Smith sum2 = 0.0; 712f9fae5adSBarry Smith sum3 = 0.0; 713f9fae5adSBarry Smith sum4 = 0.0; 714f9fae5adSBarry Smith sum5 = 0.0; 715f9fae5adSBarry Smith for (j = 0; j < n; j++) { 716f9fae5adSBarry Smith sum1 += v[jrow] * x[5 * idx[jrow]]; 717f9fae5adSBarry Smith sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 718f9fae5adSBarry Smith sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 719f9fae5adSBarry Smith sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 720f9fae5adSBarry Smith sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 721f9fae5adSBarry Smith jrow++; 722f9fae5adSBarry Smith } 723f9fae5adSBarry Smith y[5 * i] += sum1; 724f9fae5adSBarry Smith y[5 * i + 1] += sum2; 725f9fae5adSBarry Smith y[5 * i + 2] += sum3; 726f9fae5adSBarry Smith y[5 * i + 3] += sum4; 727f9fae5adSBarry Smith y[5 * i + 4] += sum5; 728f9fae5adSBarry Smith } 729f9fae5adSBarry Smith 7309566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7319566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7329566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 733f9fae5adSBarry Smith PetscFunctionReturn(0); 734f9fae5adSBarry Smith } 735f9fae5adSBarry Smith 7369371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 7374cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 738f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 739d2840e09SBarry Smith const PetscScalar *x, *v; 740d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 741d2840e09SBarry Smith PetscInt n, i; 742d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 743f9fae5adSBarry Smith 744f9fae5adSBarry Smith PetscFunctionBegin; 7459566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7479566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 748bfec09a0SHong Zhang 749f9fae5adSBarry Smith for (i = 0; i < m; i++) { 750bfec09a0SHong Zhang idx = a->j + a->i[i]; 751bfec09a0SHong Zhang v = a->a + a->i[i]; 752f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 753f9fae5adSBarry Smith alpha1 = x[5 * i]; 754f9fae5adSBarry Smith alpha2 = x[5 * i + 1]; 755f9fae5adSBarry Smith alpha3 = x[5 * i + 2]; 756f9fae5adSBarry Smith alpha4 = x[5 * i + 3]; 757f9fae5adSBarry Smith alpha5 = x[5 * i + 4]; 758f9fae5adSBarry Smith while (n-- > 0) { 759f9fae5adSBarry Smith y[5 * (*idx)] += alpha1 * (*v); 760f9fae5adSBarry Smith y[5 * (*idx) + 1] += alpha2 * (*v); 761f9fae5adSBarry Smith y[5 * (*idx) + 2] += alpha3 * (*v); 762f9fae5adSBarry Smith y[5 * (*idx) + 3] += alpha4 * (*v); 763f9fae5adSBarry Smith y[5 * (*idx) + 4] += alpha5 * (*v); 7649371c9d4SSatish Balay idx++; 7659371c9d4SSatish Balay v++; 766f9fae5adSBarry Smith } 767f9fae5adSBarry Smith } 7689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 771f9fae5adSBarry Smith PetscFunctionReturn(0); 772bcc973b7SBarry Smith } 773bcc973b7SBarry Smith 7746cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7759371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 7766cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7776cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 778d2840e09SBarry Smith const PetscScalar *x, *v; 779d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 780d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 781d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 7826cd98798SBarry Smith 7836cd98798SBarry Smith PetscFunctionBegin; 7849566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7859566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 7866cd98798SBarry Smith idx = a->j; 7876cd98798SBarry Smith v = a->a; 7886cd98798SBarry Smith ii = a->i; 7896cd98798SBarry Smith 7906cd98798SBarry Smith for (i = 0; i < m; i++) { 7916cd98798SBarry Smith jrow = ii[i]; 7926cd98798SBarry Smith n = ii[i + 1] - jrow; 7936cd98798SBarry Smith sum1 = 0.0; 7946cd98798SBarry Smith sum2 = 0.0; 7956cd98798SBarry Smith sum3 = 0.0; 7966cd98798SBarry Smith sum4 = 0.0; 7976cd98798SBarry Smith sum5 = 0.0; 7986cd98798SBarry Smith sum6 = 0.0; 79926fbe8dcSKarl Rupp 800b3c51c66SMatthew Knepley nonzerorow += (n > 0); 8016cd98798SBarry Smith for (j = 0; j < n; j++) { 8026cd98798SBarry Smith sum1 += v[jrow] * x[6 * idx[jrow]]; 8036cd98798SBarry Smith sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 8046cd98798SBarry Smith sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 8056cd98798SBarry Smith sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 8066cd98798SBarry Smith sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 8076cd98798SBarry Smith sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 8086cd98798SBarry Smith jrow++; 8096cd98798SBarry Smith } 8106cd98798SBarry Smith y[6 * i] = sum1; 8116cd98798SBarry Smith y[6 * i + 1] = sum2; 8126cd98798SBarry Smith y[6 * i + 2] = sum3; 8136cd98798SBarry Smith y[6 * i + 3] = sum4; 8146cd98798SBarry Smith y[6 * i + 4] = sum5; 8156cd98798SBarry Smith y[6 * i + 5] = sum6; 8166cd98798SBarry Smith } 8176cd98798SBarry Smith 8189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow)); 8199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 8209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 8216cd98798SBarry Smith PetscFunctionReturn(0); 8226cd98798SBarry Smith } 8236cd98798SBarry Smith 8249371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 8256cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8266cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 827d2840e09SBarry Smith const PetscScalar *x, *v; 828d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 829d2840e09SBarry Smith PetscInt n, i; 830d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 8316cd98798SBarry Smith 8326cd98798SBarry Smith PetscFunctionBegin; 8339566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 8349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8359566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 836bfec09a0SHong Zhang 8376cd98798SBarry Smith for (i = 0; i < m; i++) { 838bfec09a0SHong Zhang idx = a->j + a->i[i]; 839bfec09a0SHong Zhang v = a->a + a->i[i]; 8406cd98798SBarry Smith n = a->i[i + 1] - a->i[i]; 8416cd98798SBarry Smith alpha1 = x[6 * i]; 8426cd98798SBarry Smith alpha2 = x[6 * i + 1]; 8436cd98798SBarry Smith alpha3 = x[6 * i + 2]; 8446cd98798SBarry Smith alpha4 = x[6 * i + 3]; 8456cd98798SBarry Smith alpha5 = x[6 * i + 4]; 8466cd98798SBarry Smith alpha6 = x[6 * i + 5]; 8476cd98798SBarry Smith while (n-- > 0) { 8486cd98798SBarry Smith y[6 * (*idx)] += alpha1 * (*v); 8496cd98798SBarry Smith y[6 * (*idx) + 1] += alpha2 * (*v); 8506cd98798SBarry Smith y[6 * (*idx) + 2] += alpha3 * (*v); 8516cd98798SBarry Smith y[6 * (*idx) + 3] += alpha4 * (*v); 8526cd98798SBarry Smith y[6 * (*idx) + 4] += alpha5 * (*v); 8536cd98798SBarry Smith y[6 * (*idx) + 5] += alpha6 * (*v); 8549371c9d4SSatish Balay idx++; 8559371c9d4SSatish Balay v++; 8566cd98798SBarry Smith } 8576cd98798SBarry Smith } 8589566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 8599566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 8609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 8616cd98798SBarry Smith PetscFunctionReturn(0); 8626cd98798SBarry Smith } 8636cd98798SBarry Smith 8649371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 8656cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8666cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 867d2840e09SBarry Smith const PetscScalar *x, *v; 868d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 869b24ad042SBarry Smith PetscInt n, i, jrow, j; 870d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 8716cd98798SBarry Smith 8726cd98798SBarry Smith PetscFunctionBegin; 8739566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 8749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8759566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 8766cd98798SBarry Smith idx = a->j; 8776cd98798SBarry Smith v = a->a; 8786cd98798SBarry Smith ii = a->i; 8796cd98798SBarry Smith 8806cd98798SBarry Smith for (i = 0; i < m; i++) { 8816cd98798SBarry Smith jrow = ii[i]; 8826cd98798SBarry Smith n = ii[i + 1] - jrow; 8836cd98798SBarry Smith sum1 = 0.0; 8846cd98798SBarry Smith sum2 = 0.0; 8856cd98798SBarry Smith sum3 = 0.0; 8866cd98798SBarry Smith sum4 = 0.0; 8876cd98798SBarry Smith sum5 = 0.0; 8886cd98798SBarry Smith sum6 = 0.0; 8896cd98798SBarry Smith for (j = 0; j < n; j++) { 8906cd98798SBarry Smith sum1 += v[jrow] * x[6 * idx[jrow]]; 8916cd98798SBarry Smith sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 8926cd98798SBarry Smith sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 8936cd98798SBarry Smith sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 8946cd98798SBarry Smith sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 8956cd98798SBarry Smith sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 8966cd98798SBarry Smith jrow++; 8976cd98798SBarry Smith } 8986cd98798SBarry Smith y[6 * i] += sum1; 8996cd98798SBarry Smith y[6 * i + 1] += sum2; 9006cd98798SBarry Smith y[6 * i + 2] += sum3; 9016cd98798SBarry Smith y[6 * i + 3] += sum4; 9026cd98798SBarry Smith y[6 * i + 4] += sum5; 9036cd98798SBarry Smith y[6 * i + 5] += sum6; 9046cd98798SBarry Smith } 9056cd98798SBarry Smith 9069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 9079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 9089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 9096cd98798SBarry Smith PetscFunctionReturn(0); 9106cd98798SBarry Smith } 9116cd98798SBarry Smith 9129371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 9136cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 9146cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 915d2840e09SBarry Smith const PetscScalar *x, *v; 916d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 917d2840e09SBarry Smith PetscInt n, i; 918d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 9196cd98798SBarry Smith 9206cd98798SBarry Smith PetscFunctionBegin; 9219566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 9229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9239566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 924bfec09a0SHong Zhang 9256cd98798SBarry Smith for (i = 0; i < m; i++) { 926bfec09a0SHong Zhang idx = a->j + a->i[i]; 927bfec09a0SHong Zhang v = a->a + a->i[i]; 9286cd98798SBarry Smith n = a->i[i + 1] - a->i[i]; 9296cd98798SBarry Smith alpha1 = x[6 * i]; 9306cd98798SBarry Smith alpha2 = x[6 * i + 1]; 9316cd98798SBarry Smith alpha3 = x[6 * i + 2]; 9326cd98798SBarry Smith alpha4 = x[6 * i + 3]; 9336cd98798SBarry Smith alpha5 = x[6 * i + 4]; 9346cd98798SBarry Smith alpha6 = x[6 * i + 5]; 9356cd98798SBarry Smith while (n-- > 0) { 9366cd98798SBarry Smith y[6 * (*idx)] += alpha1 * (*v); 9376cd98798SBarry Smith y[6 * (*idx) + 1] += alpha2 * (*v); 9386cd98798SBarry Smith y[6 * (*idx) + 2] += alpha3 * (*v); 9396cd98798SBarry Smith y[6 * (*idx) + 3] += alpha4 * (*v); 9406cd98798SBarry Smith y[6 * (*idx) + 4] += alpha5 * (*v); 9416cd98798SBarry Smith y[6 * (*idx) + 5] += alpha6 * (*v); 9429371c9d4SSatish Balay idx++; 9439371c9d4SSatish Balay v++; 9446cd98798SBarry Smith } 9456cd98798SBarry Smith } 9469566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 9479566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 9489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 9496cd98798SBarry Smith PetscFunctionReturn(0); 9506cd98798SBarry Smith } 9516cd98798SBarry Smith 95266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 9539371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 954ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 955ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 956d2840e09SBarry Smith const PetscScalar *x, *v; 957d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 958d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 959d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 960ed8eea03SBarry Smith 961ed8eea03SBarry Smith PetscFunctionBegin; 9629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9639566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 964ed8eea03SBarry Smith idx = a->j; 965ed8eea03SBarry Smith v = a->a; 966ed8eea03SBarry Smith ii = a->i; 967ed8eea03SBarry Smith 968ed8eea03SBarry Smith for (i = 0; i < m; i++) { 969ed8eea03SBarry Smith jrow = ii[i]; 970ed8eea03SBarry Smith n = ii[i + 1] - jrow; 971ed8eea03SBarry Smith sum1 = 0.0; 972ed8eea03SBarry Smith sum2 = 0.0; 973ed8eea03SBarry Smith sum3 = 0.0; 974ed8eea03SBarry Smith sum4 = 0.0; 975ed8eea03SBarry Smith sum5 = 0.0; 976ed8eea03SBarry Smith sum6 = 0.0; 977ed8eea03SBarry Smith sum7 = 0.0; 97826fbe8dcSKarl Rupp 979b3c51c66SMatthew Knepley nonzerorow += (n > 0); 980ed8eea03SBarry Smith for (j = 0; j < n; j++) { 981ed8eea03SBarry Smith sum1 += v[jrow] * x[7 * idx[jrow]]; 982ed8eea03SBarry Smith sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 983ed8eea03SBarry Smith sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 984ed8eea03SBarry Smith sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 985ed8eea03SBarry Smith sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 986ed8eea03SBarry Smith sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 987ed8eea03SBarry Smith sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 988ed8eea03SBarry Smith jrow++; 989ed8eea03SBarry Smith } 990ed8eea03SBarry Smith y[7 * i] = sum1; 991ed8eea03SBarry Smith y[7 * i + 1] = sum2; 992ed8eea03SBarry Smith y[7 * i + 2] = sum3; 993ed8eea03SBarry Smith y[7 * i + 3] = sum4; 994ed8eea03SBarry Smith y[7 * i + 4] = sum5; 995ed8eea03SBarry Smith y[7 * i + 5] = sum6; 996ed8eea03SBarry Smith y[7 * i + 6] = sum7; 997ed8eea03SBarry Smith } 998ed8eea03SBarry Smith 9999566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow)); 10009566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10019566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1002ed8eea03SBarry Smith PetscFunctionReturn(0); 1003ed8eea03SBarry Smith } 1004ed8eea03SBarry Smith 10059371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 1006ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1007ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1008d2840e09SBarry Smith const PetscScalar *x, *v; 1009d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1010d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1011d2840e09SBarry Smith PetscInt n, i; 1012ed8eea03SBarry Smith 1013ed8eea03SBarry Smith PetscFunctionBegin; 10149566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 10159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 10169566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1017ed8eea03SBarry Smith 1018ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1019ed8eea03SBarry Smith idx = a->j + a->i[i]; 1020ed8eea03SBarry Smith v = a->a + a->i[i]; 1021ed8eea03SBarry Smith n = a->i[i + 1] - a->i[i]; 1022ed8eea03SBarry Smith alpha1 = x[7 * i]; 1023ed8eea03SBarry Smith alpha2 = x[7 * i + 1]; 1024ed8eea03SBarry Smith alpha3 = x[7 * i + 2]; 1025ed8eea03SBarry Smith alpha4 = x[7 * i + 3]; 1026ed8eea03SBarry Smith alpha5 = x[7 * i + 4]; 1027ed8eea03SBarry Smith alpha6 = x[7 * i + 5]; 1028ed8eea03SBarry Smith alpha7 = x[7 * i + 6]; 1029ed8eea03SBarry Smith while (n-- > 0) { 1030ed8eea03SBarry Smith y[7 * (*idx)] += alpha1 * (*v); 1031ed8eea03SBarry Smith y[7 * (*idx) + 1] += alpha2 * (*v); 1032ed8eea03SBarry Smith y[7 * (*idx) + 2] += alpha3 * (*v); 1033ed8eea03SBarry Smith y[7 * (*idx) + 3] += alpha4 * (*v); 1034ed8eea03SBarry Smith y[7 * (*idx) + 4] += alpha5 * (*v); 1035ed8eea03SBarry Smith y[7 * (*idx) + 5] += alpha6 * (*v); 1036ed8eea03SBarry Smith y[7 * (*idx) + 6] += alpha7 * (*v); 10379371c9d4SSatish Balay idx++; 10389371c9d4SSatish Balay v++; 1039ed8eea03SBarry Smith } 1040ed8eea03SBarry Smith } 10419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 10429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1044ed8eea03SBarry Smith PetscFunctionReturn(0); 1045ed8eea03SBarry Smith } 1046ed8eea03SBarry Smith 10479371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1048ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1049ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1050d2840e09SBarry Smith const PetscScalar *x, *v; 1051d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1052d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1053ed8eea03SBarry Smith PetscInt n, i, jrow, j; 1054ed8eea03SBarry Smith 1055ed8eea03SBarry Smith PetscFunctionBegin; 10569566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 10579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 10589566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1059ed8eea03SBarry Smith idx = a->j; 1060ed8eea03SBarry Smith v = a->a; 1061ed8eea03SBarry Smith ii = a->i; 1062ed8eea03SBarry Smith 1063ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1064ed8eea03SBarry Smith jrow = ii[i]; 1065ed8eea03SBarry Smith n = ii[i + 1] - jrow; 1066ed8eea03SBarry Smith sum1 = 0.0; 1067ed8eea03SBarry Smith sum2 = 0.0; 1068ed8eea03SBarry Smith sum3 = 0.0; 1069ed8eea03SBarry Smith sum4 = 0.0; 1070ed8eea03SBarry Smith sum5 = 0.0; 1071ed8eea03SBarry Smith sum6 = 0.0; 1072ed8eea03SBarry Smith sum7 = 0.0; 1073ed8eea03SBarry Smith for (j = 0; j < n; j++) { 1074ed8eea03SBarry Smith sum1 += v[jrow] * x[7 * idx[jrow]]; 1075ed8eea03SBarry Smith sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1076ed8eea03SBarry Smith sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1077ed8eea03SBarry Smith sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1078ed8eea03SBarry Smith sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1079ed8eea03SBarry Smith sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1080ed8eea03SBarry Smith sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1081ed8eea03SBarry Smith jrow++; 1082ed8eea03SBarry Smith } 1083ed8eea03SBarry Smith y[7 * i] += sum1; 1084ed8eea03SBarry Smith y[7 * i + 1] += sum2; 1085ed8eea03SBarry Smith y[7 * i + 2] += sum3; 1086ed8eea03SBarry Smith y[7 * i + 3] += sum4; 1087ed8eea03SBarry Smith y[7 * i + 4] += sum5; 1088ed8eea03SBarry Smith y[7 * i + 5] += sum6; 1089ed8eea03SBarry Smith y[7 * i + 6] += sum7; 1090ed8eea03SBarry Smith } 1091ed8eea03SBarry Smith 10929566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 10939566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10949566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1095ed8eea03SBarry Smith PetscFunctionReturn(0); 1096ed8eea03SBarry Smith } 1097ed8eea03SBarry Smith 10989371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1099ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1100ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1101d2840e09SBarry Smith const PetscScalar *x, *v; 1102d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1103d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1104d2840e09SBarry Smith PetscInt n, i; 1105ed8eea03SBarry Smith 1106ed8eea03SBarry Smith PetscFunctionBegin; 11079566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 11089566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11099566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1110ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1111ed8eea03SBarry Smith idx = a->j + a->i[i]; 1112ed8eea03SBarry Smith v = a->a + a->i[i]; 1113ed8eea03SBarry Smith n = a->i[i + 1] - a->i[i]; 1114ed8eea03SBarry Smith alpha1 = x[7 * i]; 1115ed8eea03SBarry Smith alpha2 = x[7 * i + 1]; 1116ed8eea03SBarry Smith alpha3 = x[7 * i + 2]; 1117ed8eea03SBarry Smith alpha4 = x[7 * i + 3]; 1118ed8eea03SBarry Smith alpha5 = x[7 * i + 4]; 1119ed8eea03SBarry Smith alpha6 = x[7 * i + 5]; 1120ed8eea03SBarry Smith alpha7 = x[7 * i + 6]; 1121ed8eea03SBarry Smith while (n-- > 0) { 1122ed8eea03SBarry Smith y[7 * (*idx)] += alpha1 * (*v); 1123ed8eea03SBarry Smith y[7 * (*idx) + 1] += alpha2 * (*v); 1124ed8eea03SBarry Smith y[7 * (*idx) + 2] += alpha3 * (*v); 1125ed8eea03SBarry Smith y[7 * (*idx) + 3] += alpha4 * (*v); 1126ed8eea03SBarry Smith y[7 * (*idx) + 4] += alpha5 * (*v); 1127ed8eea03SBarry Smith y[7 * (*idx) + 5] += alpha6 * (*v); 1128ed8eea03SBarry Smith y[7 * (*idx) + 6] += alpha7 * (*v); 11299371c9d4SSatish Balay idx++; 11309371c9d4SSatish Balay v++; 1131ed8eea03SBarry Smith } 1132ed8eea03SBarry Smith } 11339566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 11349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1136ed8eea03SBarry Smith PetscFunctionReturn(0); 1137ed8eea03SBarry Smith } 1138ed8eea03SBarry Smith 11399371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 114066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 114166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1142d2840e09SBarry Smith const PetscScalar *x, *v; 1143d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1144d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1145d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 114666ed3db0SBarry Smith 114766ed3db0SBarry Smith PetscFunctionBegin; 11489566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11499566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 115066ed3db0SBarry Smith idx = a->j; 115166ed3db0SBarry Smith v = a->a; 115266ed3db0SBarry Smith ii = a->i; 115366ed3db0SBarry Smith 115466ed3db0SBarry Smith for (i = 0; i < m; i++) { 115566ed3db0SBarry Smith jrow = ii[i]; 115666ed3db0SBarry Smith n = ii[i + 1] - jrow; 115766ed3db0SBarry Smith sum1 = 0.0; 115866ed3db0SBarry Smith sum2 = 0.0; 115966ed3db0SBarry Smith sum3 = 0.0; 116066ed3db0SBarry Smith sum4 = 0.0; 116166ed3db0SBarry Smith sum5 = 0.0; 116266ed3db0SBarry Smith sum6 = 0.0; 116366ed3db0SBarry Smith sum7 = 0.0; 116466ed3db0SBarry Smith sum8 = 0.0; 116526fbe8dcSKarl Rupp 1166b3c51c66SMatthew Knepley nonzerorow += (n > 0); 116766ed3db0SBarry Smith for (j = 0; j < n; j++) { 116866ed3db0SBarry Smith sum1 += v[jrow] * x[8 * idx[jrow]]; 116966ed3db0SBarry Smith sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 117066ed3db0SBarry Smith sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 117166ed3db0SBarry Smith sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 117266ed3db0SBarry Smith sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 117366ed3db0SBarry Smith sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 117466ed3db0SBarry Smith sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 117566ed3db0SBarry Smith sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 117666ed3db0SBarry Smith jrow++; 117766ed3db0SBarry Smith } 117866ed3db0SBarry Smith y[8 * i] = sum1; 117966ed3db0SBarry Smith y[8 * i + 1] = sum2; 118066ed3db0SBarry Smith y[8 * i + 2] = sum3; 118166ed3db0SBarry Smith y[8 * i + 3] = sum4; 118266ed3db0SBarry Smith y[8 * i + 4] = sum5; 118366ed3db0SBarry Smith y[8 * i + 5] = sum6; 118466ed3db0SBarry Smith y[8 * i + 6] = sum7; 118566ed3db0SBarry Smith y[8 * i + 7] = sum8; 118666ed3db0SBarry Smith } 118766ed3db0SBarry Smith 11889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow)); 11899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 119166ed3db0SBarry Smith PetscFunctionReturn(0); 119266ed3db0SBarry Smith } 119366ed3db0SBarry Smith 11949371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 119566ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 119666ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1197d2840e09SBarry Smith const PetscScalar *x, *v; 1198d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1199d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1200d2840e09SBarry Smith PetscInt n, i; 120166ed3db0SBarry Smith 120266ed3db0SBarry Smith PetscFunctionBegin; 12039566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 12049566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 12059566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1206bfec09a0SHong Zhang 120766ed3db0SBarry Smith for (i = 0; i < m; i++) { 1208bfec09a0SHong Zhang idx = a->j + a->i[i]; 1209bfec09a0SHong Zhang v = a->a + a->i[i]; 121066ed3db0SBarry Smith n = a->i[i + 1] - a->i[i]; 121166ed3db0SBarry Smith alpha1 = x[8 * i]; 121266ed3db0SBarry Smith alpha2 = x[8 * i + 1]; 121366ed3db0SBarry Smith alpha3 = x[8 * i + 2]; 121466ed3db0SBarry Smith alpha4 = x[8 * i + 3]; 121566ed3db0SBarry Smith alpha5 = x[8 * i + 4]; 121666ed3db0SBarry Smith alpha6 = x[8 * i + 5]; 121766ed3db0SBarry Smith alpha7 = x[8 * i + 6]; 121866ed3db0SBarry Smith alpha8 = x[8 * i + 7]; 121966ed3db0SBarry Smith while (n-- > 0) { 122066ed3db0SBarry Smith y[8 * (*idx)] += alpha1 * (*v); 122166ed3db0SBarry Smith y[8 * (*idx) + 1] += alpha2 * (*v); 122266ed3db0SBarry Smith y[8 * (*idx) + 2] += alpha3 * (*v); 122366ed3db0SBarry Smith y[8 * (*idx) + 3] += alpha4 * (*v); 122466ed3db0SBarry Smith y[8 * (*idx) + 4] += alpha5 * (*v); 122566ed3db0SBarry Smith y[8 * (*idx) + 5] += alpha6 * (*v); 122666ed3db0SBarry Smith y[8 * (*idx) + 6] += alpha7 * (*v); 122766ed3db0SBarry Smith y[8 * (*idx) + 7] += alpha8 * (*v); 12289371c9d4SSatish Balay idx++; 12299371c9d4SSatish Balay v++; 123066ed3db0SBarry Smith } 123166ed3db0SBarry Smith } 12329566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 12339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 12349566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 123566ed3db0SBarry Smith PetscFunctionReturn(0); 123666ed3db0SBarry Smith } 123766ed3db0SBarry Smith 12389371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 123966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 124066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1241d2840e09SBarry Smith const PetscScalar *x, *v; 1242d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1243d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1244b24ad042SBarry Smith PetscInt n, i, jrow, j; 124566ed3db0SBarry Smith 124666ed3db0SBarry Smith PetscFunctionBegin; 12479566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 12489566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 12499566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 125066ed3db0SBarry Smith idx = a->j; 125166ed3db0SBarry Smith v = a->a; 125266ed3db0SBarry Smith ii = a->i; 125366ed3db0SBarry Smith 125466ed3db0SBarry Smith for (i = 0; i < m; i++) { 125566ed3db0SBarry Smith jrow = ii[i]; 125666ed3db0SBarry Smith n = ii[i + 1] - jrow; 125766ed3db0SBarry Smith sum1 = 0.0; 125866ed3db0SBarry Smith sum2 = 0.0; 125966ed3db0SBarry Smith sum3 = 0.0; 126066ed3db0SBarry Smith sum4 = 0.0; 126166ed3db0SBarry Smith sum5 = 0.0; 126266ed3db0SBarry Smith sum6 = 0.0; 126366ed3db0SBarry Smith sum7 = 0.0; 126466ed3db0SBarry Smith sum8 = 0.0; 126566ed3db0SBarry Smith for (j = 0; j < n; j++) { 126666ed3db0SBarry Smith sum1 += v[jrow] * x[8 * idx[jrow]]; 126766ed3db0SBarry Smith sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 126866ed3db0SBarry Smith sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 126966ed3db0SBarry Smith sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 127066ed3db0SBarry Smith sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 127166ed3db0SBarry Smith sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 127266ed3db0SBarry Smith sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 127366ed3db0SBarry Smith sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 127466ed3db0SBarry Smith jrow++; 127566ed3db0SBarry Smith } 127666ed3db0SBarry Smith y[8 * i] += sum1; 127766ed3db0SBarry Smith y[8 * i + 1] += sum2; 127866ed3db0SBarry Smith y[8 * i + 2] += sum3; 127966ed3db0SBarry Smith y[8 * i + 3] += sum4; 128066ed3db0SBarry Smith y[8 * i + 4] += sum5; 128166ed3db0SBarry Smith y[8 * i + 5] += sum6; 128266ed3db0SBarry Smith y[8 * i + 6] += sum7; 128366ed3db0SBarry Smith y[8 * i + 7] += sum8; 128466ed3db0SBarry Smith } 128566ed3db0SBarry Smith 12869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 12879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 12889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 128966ed3db0SBarry Smith PetscFunctionReturn(0); 129066ed3db0SBarry Smith } 129166ed3db0SBarry Smith 12929371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 129366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 129466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1295d2840e09SBarry Smith const PetscScalar *x, *v; 1296d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1297d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1298d2840e09SBarry Smith PetscInt n, i; 129966ed3db0SBarry Smith 130066ed3db0SBarry Smith PetscFunctionBegin; 13019566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 13029566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 13039566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 130466ed3db0SBarry Smith for (i = 0; i < m; i++) { 1305bfec09a0SHong Zhang idx = a->j + a->i[i]; 1306bfec09a0SHong Zhang v = a->a + a->i[i]; 130766ed3db0SBarry Smith n = a->i[i + 1] - a->i[i]; 130866ed3db0SBarry Smith alpha1 = x[8 * i]; 130966ed3db0SBarry Smith alpha2 = x[8 * i + 1]; 131066ed3db0SBarry Smith alpha3 = x[8 * i + 2]; 131166ed3db0SBarry Smith alpha4 = x[8 * i + 3]; 131266ed3db0SBarry Smith alpha5 = x[8 * i + 4]; 131366ed3db0SBarry Smith alpha6 = x[8 * i + 5]; 131466ed3db0SBarry Smith alpha7 = x[8 * i + 6]; 131566ed3db0SBarry Smith alpha8 = x[8 * i + 7]; 131666ed3db0SBarry Smith while (n-- > 0) { 131766ed3db0SBarry Smith y[8 * (*idx)] += alpha1 * (*v); 131866ed3db0SBarry Smith y[8 * (*idx) + 1] += alpha2 * (*v); 131966ed3db0SBarry Smith y[8 * (*idx) + 2] += alpha3 * (*v); 132066ed3db0SBarry Smith y[8 * (*idx) + 3] += alpha4 * (*v); 132166ed3db0SBarry Smith y[8 * (*idx) + 4] += alpha5 * (*v); 132266ed3db0SBarry Smith y[8 * (*idx) + 5] += alpha6 * (*v); 132366ed3db0SBarry Smith y[8 * (*idx) + 6] += alpha7 * (*v); 132466ed3db0SBarry Smith y[8 * (*idx) + 7] += alpha8 * (*v); 13259371c9d4SSatish Balay idx++; 13269371c9d4SSatish Balay v++; 132766ed3db0SBarry Smith } 132866ed3db0SBarry Smith } 13299566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 13309566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 13319566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 13322f7816d4SBarry Smith PetscFunctionReturn(0); 13332f7816d4SBarry Smith } 13342f7816d4SBarry Smith 13352b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 13369371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 13372b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 13382b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1339d2840e09SBarry Smith const PetscScalar *x, *v; 1340d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1341d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1342d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 13432b692628SMatthew Knepley 13442b692628SMatthew Knepley PetscFunctionBegin; 13459566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 13469566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 13472b692628SMatthew Knepley idx = a->j; 13482b692628SMatthew Knepley v = a->a; 13492b692628SMatthew Knepley ii = a->i; 13502b692628SMatthew Knepley 13512b692628SMatthew Knepley for (i = 0; i < m; i++) { 13522b692628SMatthew Knepley jrow = ii[i]; 13532b692628SMatthew Knepley n = ii[i + 1] - jrow; 13542b692628SMatthew Knepley sum1 = 0.0; 13552b692628SMatthew Knepley sum2 = 0.0; 13562b692628SMatthew Knepley sum3 = 0.0; 13572b692628SMatthew Knepley sum4 = 0.0; 13582b692628SMatthew Knepley sum5 = 0.0; 13592b692628SMatthew Knepley sum6 = 0.0; 13602b692628SMatthew Knepley sum7 = 0.0; 13612b692628SMatthew Knepley sum8 = 0.0; 13622b692628SMatthew Knepley sum9 = 0.0; 136326fbe8dcSKarl Rupp 1364b3c51c66SMatthew Knepley nonzerorow += (n > 0); 13652b692628SMatthew Knepley for (j = 0; j < n; j++) { 13662b692628SMatthew Knepley sum1 += v[jrow] * x[9 * idx[jrow]]; 13672b692628SMatthew Knepley sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 13682b692628SMatthew Knepley sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 13692b692628SMatthew Knepley sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 13702b692628SMatthew Knepley sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 13712b692628SMatthew Knepley sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 13722b692628SMatthew Knepley sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 13732b692628SMatthew Knepley sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 13742b692628SMatthew Knepley sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 13752b692628SMatthew Knepley jrow++; 13762b692628SMatthew Knepley } 13772b692628SMatthew Knepley y[9 * i] = sum1; 13782b692628SMatthew Knepley y[9 * i + 1] = sum2; 13792b692628SMatthew Knepley y[9 * i + 2] = sum3; 13802b692628SMatthew Knepley y[9 * i + 3] = sum4; 13812b692628SMatthew Knepley y[9 * i + 4] = sum5; 13822b692628SMatthew Knepley y[9 * i + 5] = sum6; 13832b692628SMatthew Knepley y[9 * i + 6] = sum7; 13842b692628SMatthew Knepley y[9 * i + 7] = sum8; 13852b692628SMatthew Knepley y[9 * i + 8] = sum9; 13862b692628SMatthew Knepley } 13872b692628SMatthew Knepley 13889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow)); 13899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 13909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 13912b692628SMatthew Knepley PetscFunctionReturn(0); 13922b692628SMatthew Knepley } 13932b692628SMatthew Knepley 1394b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1395b9cda22cSBarry Smith 13969371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 13972b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 13982b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1399d2840e09SBarry Smith const PetscScalar *x, *v; 1400d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1401d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1402d2840e09SBarry Smith PetscInt n, i; 14032b692628SMatthew Knepley 14042b692628SMatthew Knepley PetscFunctionBegin; 14059566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 14069566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 14079566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 14082b692628SMatthew Knepley 14092b692628SMatthew Knepley for (i = 0; i < m; i++) { 14102b692628SMatthew Knepley idx = a->j + a->i[i]; 14112b692628SMatthew Knepley v = a->a + a->i[i]; 14122b692628SMatthew Knepley n = a->i[i + 1] - a->i[i]; 14132b692628SMatthew Knepley alpha1 = x[9 * i]; 14142b692628SMatthew Knepley alpha2 = x[9 * i + 1]; 14152b692628SMatthew Knepley alpha3 = x[9 * i + 2]; 14162b692628SMatthew Knepley alpha4 = x[9 * i + 3]; 14172b692628SMatthew Knepley alpha5 = x[9 * i + 4]; 14182b692628SMatthew Knepley alpha6 = x[9 * i + 5]; 14192b692628SMatthew Knepley alpha7 = x[9 * i + 6]; 14202b692628SMatthew Knepley alpha8 = x[9 * i + 7]; 14212f6bd0c6SMatthew Knepley alpha9 = x[9 * i + 8]; 14222b692628SMatthew Knepley while (n-- > 0) { 14232b692628SMatthew Knepley y[9 * (*idx)] += alpha1 * (*v); 14242b692628SMatthew Knepley y[9 * (*idx) + 1] += alpha2 * (*v); 14252b692628SMatthew Knepley y[9 * (*idx) + 2] += alpha3 * (*v); 14262b692628SMatthew Knepley y[9 * (*idx) + 3] += alpha4 * (*v); 14272b692628SMatthew Knepley y[9 * (*idx) + 4] += alpha5 * (*v); 14282b692628SMatthew Knepley y[9 * (*idx) + 5] += alpha6 * (*v); 14292b692628SMatthew Knepley y[9 * (*idx) + 6] += alpha7 * (*v); 14302b692628SMatthew Knepley y[9 * (*idx) + 7] += alpha8 * (*v); 14312b692628SMatthew Knepley y[9 * (*idx) + 8] += alpha9 * (*v); 14329371c9d4SSatish Balay idx++; 14339371c9d4SSatish Balay v++; 14342b692628SMatthew Knepley } 14352b692628SMatthew Knepley } 14369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 14379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 14389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 14392b692628SMatthew Knepley PetscFunctionReturn(0); 14402b692628SMatthew Knepley } 14412b692628SMatthew Knepley 14429371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 14432b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 14442b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1445d2840e09SBarry Smith const PetscScalar *x, *v; 1446d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1447d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1448b24ad042SBarry Smith PetscInt n, i, jrow, j; 14492b692628SMatthew Knepley 14502b692628SMatthew Knepley PetscFunctionBegin; 14519566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 14529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 14539566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 14542b692628SMatthew Knepley idx = a->j; 14552b692628SMatthew Knepley v = a->a; 14562b692628SMatthew Knepley ii = a->i; 14572b692628SMatthew Knepley 14582b692628SMatthew Knepley for (i = 0; i < m; i++) { 14592b692628SMatthew Knepley jrow = ii[i]; 14602b692628SMatthew Knepley n = ii[i + 1] - jrow; 14612b692628SMatthew Knepley sum1 = 0.0; 14622b692628SMatthew Knepley sum2 = 0.0; 14632b692628SMatthew Knepley sum3 = 0.0; 14642b692628SMatthew Knepley sum4 = 0.0; 14652b692628SMatthew Knepley sum5 = 0.0; 14662b692628SMatthew Knepley sum6 = 0.0; 14672b692628SMatthew Knepley sum7 = 0.0; 14682b692628SMatthew Knepley sum8 = 0.0; 14692b692628SMatthew Knepley sum9 = 0.0; 14702b692628SMatthew Knepley for (j = 0; j < n; j++) { 14712b692628SMatthew Knepley sum1 += v[jrow] * x[9 * idx[jrow]]; 14722b692628SMatthew Knepley sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 14732b692628SMatthew Knepley sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 14742b692628SMatthew Knepley sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 14752b692628SMatthew Knepley sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 14762b692628SMatthew Knepley sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 14772b692628SMatthew Knepley sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 14782b692628SMatthew Knepley sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 14792b692628SMatthew Knepley sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 14802b692628SMatthew Knepley jrow++; 14812b692628SMatthew Knepley } 14822b692628SMatthew Knepley y[9 * i] += sum1; 14832b692628SMatthew Knepley y[9 * i + 1] += sum2; 14842b692628SMatthew Knepley y[9 * i + 2] += sum3; 14852b692628SMatthew Knepley y[9 * i + 3] += sum4; 14862b692628SMatthew Knepley y[9 * i + 4] += sum5; 14872b692628SMatthew Knepley y[9 * i + 5] += sum6; 14882b692628SMatthew Knepley y[9 * i + 6] += sum7; 14892b692628SMatthew Knepley y[9 * i + 7] += sum8; 14902b692628SMatthew Knepley y[9 * i + 8] += sum9; 14912b692628SMatthew Knepley } 14922b692628SMatthew Knepley 14939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 14949566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 14959566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 14962b692628SMatthew Knepley PetscFunctionReturn(0); 14972b692628SMatthew Knepley } 14982b692628SMatthew Knepley 14999371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 15002b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 15012b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1502d2840e09SBarry Smith const PetscScalar *x, *v; 1503d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1504d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1505d2840e09SBarry Smith PetscInt n, i; 15062b692628SMatthew Knepley 15072b692628SMatthew Knepley PetscFunctionBegin; 15089566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 15099566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 15109566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 15112b692628SMatthew Knepley for (i = 0; i < m; i++) { 15122b692628SMatthew Knepley idx = a->j + a->i[i]; 15132b692628SMatthew Knepley v = a->a + a->i[i]; 15142b692628SMatthew Knepley n = a->i[i + 1] - a->i[i]; 15152b692628SMatthew Knepley alpha1 = x[9 * i]; 15162b692628SMatthew Knepley alpha2 = x[9 * i + 1]; 15172b692628SMatthew Knepley alpha3 = x[9 * i + 2]; 15182b692628SMatthew Knepley alpha4 = x[9 * i + 3]; 15192b692628SMatthew Knepley alpha5 = x[9 * i + 4]; 15202b692628SMatthew Knepley alpha6 = x[9 * i + 5]; 15212b692628SMatthew Knepley alpha7 = x[9 * i + 6]; 15222b692628SMatthew Knepley alpha8 = x[9 * i + 7]; 15232b692628SMatthew Knepley alpha9 = x[9 * i + 8]; 15242b692628SMatthew Knepley while (n-- > 0) { 15252b692628SMatthew Knepley y[9 * (*idx)] += alpha1 * (*v); 15262b692628SMatthew Knepley y[9 * (*idx) + 1] += alpha2 * (*v); 15272b692628SMatthew Knepley y[9 * (*idx) + 2] += alpha3 * (*v); 15282b692628SMatthew Knepley y[9 * (*idx) + 3] += alpha4 * (*v); 15292b692628SMatthew Knepley y[9 * (*idx) + 4] += alpha5 * (*v); 15302b692628SMatthew Knepley y[9 * (*idx) + 5] += alpha6 * (*v); 15312b692628SMatthew Knepley y[9 * (*idx) + 6] += alpha7 * (*v); 15322b692628SMatthew Knepley y[9 * (*idx) + 7] += alpha8 * (*v); 15332b692628SMatthew Knepley y[9 * (*idx) + 8] += alpha9 * (*v); 15349371c9d4SSatish Balay idx++; 15359371c9d4SSatish Balay v++; 15362b692628SMatthew Knepley } 15372b692628SMatthew Knepley } 15389566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 15399566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 15409566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 15412b692628SMatthew Knepley PetscFunctionReturn(0); 15422b692628SMatthew Knepley } 15439371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1544b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1545b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1546d2840e09SBarry Smith const PetscScalar *x, *v; 1547d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1548d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1549d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 1550b9cda22cSBarry Smith 1551b9cda22cSBarry Smith PetscFunctionBegin; 15529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 15539566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1554b9cda22cSBarry Smith idx = a->j; 1555b9cda22cSBarry Smith v = a->a; 1556b9cda22cSBarry Smith ii = a->i; 1557b9cda22cSBarry Smith 1558b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1559b9cda22cSBarry Smith jrow = ii[i]; 1560b9cda22cSBarry Smith n = ii[i + 1] - jrow; 1561b9cda22cSBarry Smith sum1 = 0.0; 1562b9cda22cSBarry Smith sum2 = 0.0; 1563b9cda22cSBarry Smith sum3 = 0.0; 1564b9cda22cSBarry Smith sum4 = 0.0; 1565b9cda22cSBarry Smith sum5 = 0.0; 1566b9cda22cSBarry Smith sum6 = 0.0; 1567b9cda22cSBarry Smith sum7 = 0.0; 1568b9cda22cSBarry Smith sum8 = 0.0; 1569b9cda22cSBarry Smith sum9 = 0.0; 1570b9cda22cSBarry Smith sum10 = 0.0; 157126fbe8dcSKarl Rupp 1572b3c51c66SMatthew Knepley nonzerorow += (n > 0); 1573b9cda22cSBarry Smith for (j = 0; j < n; j++) { 1574b9cda22cSBarry Smith sum1 += v[jrow] * x[10 * idx[jrow]]; 1575b9cda22cSBarry Smith sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1576b9cda22cSBarry Smith sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1577b9cda22cSBarry Smith sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1578b9cda22cSBarry Smith sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1579b9cda22cSBarry Smith sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1580b9cda22cSBarry Smith sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1581b9cda22cSBarry Smith sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1582b9cda22cSBarry Smith sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1583b9cda22cSBarry Smith sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1584b9cda22cSBarry Smith jrow++; 1585b9cda22cSBarry Smith } 1586b9cda22cSBarry Smith y[10 * i] = sum1; 1587b9cda22cSBarry Smith y[10 * i + 1] = sum2; 1588b9cda22cSBarry Smith y[10 * i + 2] = sum3; 1589b9cda22cSBarry Smith y[10 * i + 3] = sum4; 1590b9cda22cSBarry Smith y[10 * i + 4] = sum5; 1591b9cda22cSBarry Smith y[10 * i + 5] = sum6; 1592b9cda22cSBarry Smith y[10 * i + 6] = sum7; 1593b9cda22cSBarry Smith y[10 * i + 7] = sum8; 1594b9cda22cSBarry Smith y[10 * i + 8] = sum9; 1595b9cda22cSBarry Smith y[10 * i + 9] = sum10; 1596b9cda22cSBarry Smith } 1597b9cda22cSBarry Smith 15989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow)); 15999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 16009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1601b9cda22cSBarry Smith PetscFunctionReturn(0); 1602b9cda22cSBarry Smith } 1603b9cda22cSBarry Smith 16049371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1605b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1606b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1607d2840e09SBarry Smith const PetscScalar *x, *v; 1608d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1609d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1610b9cda22cSBarry Smith PetscInt n, i, jrow, j; 1611b9cda22cSBarry Smith 1612b9cda22cSBarry Smith PetscFunctionBegin; 16139566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 16149566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 16159566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1616b9cda22cSBarry Smith idx = a->j; 1617b9cda22cSBarry Smith v = a->a; 1618b9cda22cSBarry Smith ii = a->i; 1619b9cda22cSBarry Smith 1620b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1621b9cda22cSBarry Smith jrow = ii[i]; 1622b9cda22cSBarry Smith n = ii[i + 1] - jrow; 1623b9cda22cSBarry Smith sum1 = 0.0; 1624b9cda22cSBarry Smith sum2 = 0.0; 1625b9cda22cSBarry Smith sum3 = 0.0; 1626b9cda22cSBarry Smith sum4 = 0.0; 1627b9cda22cSBarry Smith sum5 = 0.0; 1628b9cda22cSBarry Smith sum6 = 0.0; 1629b9cda22cSBarry Smith sum7 = 0.0; 1630b9cda22cSBarry Smith sum8 = 0.0; 1631b9cda22cSBarry Smith sum9 = 0.0; 1632b9cda22cSBarry Smith sum10 = 0.0; 1633b9cda22cSBarry Smith for (j = 0; j < n; j++) { 1634b9cda22cSBarry Smith sum1 += v[jrow] * x[10 * idx[jrow]]; 1635b9cda22cSBarry Smith sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1636b9cda22cSBarry Smith sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1637b9cda22cSBarry Smith sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1638b9cda22cSBarry Smith sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1639b9cda22cSBarry Smith sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1640b9cda22cSBarry Smith sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1641b9cda22cSBarry Smith sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1642b9cda22cSBarry Smith sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1643b9cda22cSBarry Smith sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1644b9cda22cSBarry Smith jrow++; 1645b9cda22cSBarry Smith } 1646b9cda22cSBarry Smith y[10 * i] += sum1; 1647b9cda22cSBarry Smith y[10 * i + 1] += sum2; 1648b9cda22cSBarry Smith y[10 * i + 2] += sum3; 1649b9cda22cSBarry Smith y[10 * i + 3] += sum4; 1650b9cda22cSBarry Smith y[10 * i + 4] += sum5; 1651b9cda22cSBarry Smith y[10 * i + 5] += sum6; 1652b9cda22cSBarry Smith y[10 * i + 6] += sum7; 1653b9cda22cSBarry Smith y[10 * i + 7] += sum8; 1654b9cda22cSBarry Smith y[10 * i + 8] += sum9; 1655b9cda22cSBarry Smith y[10 * i + 9] += sum10; 1656b9cda22cSBarry Smith } 1657b9cda22cSBarry Smith 16589566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 16599566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 16609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1661b9cda22cSBarry Smith PetscFunctionReturn(0); 1662b9cda22cSBarry Smith } 1663b9cda22cSBarry Smith 16649371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1665b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1666b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1667d2840e09SBarry Smith const PetscScalar *x, *v; 1668d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1669d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1670d2840e09SBarry Smith PetscInt n, i; 1671b9cda22cSBarry Smith 1672b9cda22cSBarry Smith PetscFunctionBegin; 16739566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 16749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 16759566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1676b9cda22cSBarry Smith 1677b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1678b9cda22cSBarry Smith idx = a->j + a->i[i]; 1679b9cda22cSBarry Smith v = a->a + a->i[i]; 1680b9cda22cSBarry Smith n = a->i[i + 1] - a->i[i]; 1681e29fdc22SBarry Smith alpha1 = x[10 * i]; 1682e29fdc22SBarry Smith alpha2 = x[10 * i + 1]; 1683e29fdc22SBarry Smith alpha3 = x[10 * i + 2]; 1684e29fdc22SBarry Smith alpha4 = x[10 * i + 3]; 1685e29fdc22SBarry Smith alpha5 = x[10 * i + 4]; 1686e29fdc22SBarry Smith alpha6 = x[10 * i + 5]; 1687e29fdc22SBarry Smith alpha7 = x[10 * i + 6]; 1688e29fdc22SBarry Smith alpha8 = x[10 * i + 7]; 1689e29fdc22SBarry Smith alpha9 = x[10 * i + 8]; 1690b9cda22cSBarry Smith alpha10 = x[10 * i + 9]; 1691b9cda22cSBarry Smith while (n-- > 0) { 1692e29fdc22SBarry Smith y[10 * (*idx)] += alpha1 * (*v); 1693e29fdc22SBarry Smith y[10 * (*idx) + 1] += alpha2 * (*v); 1694e29fdc22SBarry Smith y[10 * (*idx) + 2] += alpha3 * (*v); 1695e29fdc22SBarry Smith y[10 * (*idx) + 3] += alpha4 * (*v); 1696e29fdc22SBarry Smith y[10 * (*idx) + 4] += alpha5 * (*v); 1697e29fdc22SBarry Smith y[10 * (*idx) + 5] += alpha6 * (*v); 1698e29fdc22SBarry Smith y[10 * (*idx) + 6] += alpha7 * (*v); 1699e29fdc22SBarry Smith y[10 * (*idx) + 7] += alpha8 * (*v); 1700e29fdc22SBarry Smith y[10 * (*idx) + 8] += alpha9 * (*v); 1701b9cda22cSBarry Smith y[10 * (*idx) + 9] += alpha10 * (*v); 17029371c9d4SSatish Balay idx++; 17039371c9d4SSatish Balay v++; 1704b9cda22cSBarry Smith } 1705b9cda22cSBarry Smith } 17069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1709b9cda22cSBarry Smith PetscFunctionReturn(0); 1710b9cda22cSBarry Smith } 1711b9cda22cSBarry Smith 17129371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1713b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1714b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1715d2840e09SBarry Smith const PetscScalar *x, *v; 1716d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1717d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1718d2840e09SBarry Smith PetscInt n, i; 1719b9cda22cSBarry Smith 1720b9cda22cSBarry Smith PetscFunctionBegin; 17219566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 17229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 17239566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1724b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1725b9cda22cSBarry Smith idx = a->j + a->i[i]; 1726b9cda22cSBarry Smith v = a->a + a->i[i]; 1727b9cda22cSBarry Smith n = a->i[i + 1] - a->i[i]; 1728b9cda22cSBarry Smith alpha1 = x[10 * i]; 1729b9cda22cSBarry Smith alpha2 = x[10 * i + 1]; 1730b9cda22cSBarry Smith alpha3 = x[10 * i + 2]; 1731b9cda22cSBarry Smith alpha4 = x[10 * i + 3]; 1732b9cda22cSBarry Smith alpha5 = x[10 * i + 4]; 1733b9cda22cSBarry Smith alpha6 = x[10 * i + 5]; 1734b9cda22cSBarry Smith alpha7 = x[10 * i + 6]; 1735b9cda22cSBarry Smith alpha8 = x[10 * i + 7]; 1736b9cda22cSBarry Smith alpha9 = x[10 * i + 8]; 1737b9cda22cSBarry Smith alpha10 = x[10 * i + 9]; 1738b9cda22cSBarry Smith while (n-- > 0) { 1739b9cda22cSBarry Smith y[10 * (*idx)] += alpha1 * (*v); 1740b9cda22cSBarry Smith y[10 * (*idx) + 1] += alpha2 * (*v); 1741b9cda22cSBarry Smith y[10 * (*idx) + 2] += alpha3 * (*v); 1742b9cda22cSBarry Smith y[10 * (*idx) + 3] += alpha4 * (*v); 1743b9cda22cSBarry Smith y[10 * (*idx) + 4] += alpha5 * (*v); 1744b9cda22cSBarry Smith y[10 * (*idx) + 5] += alpha6 * (*v); 1745b9cda22cSBarry Smith y[10 * (*idx) + 6] += alpha7 * (*v); 1746b9cda22cSBarry Smith y[10 * (*idx) + 7] += alpha8 * (*v); 1747b9cda22cSBarry Smith y[10 * (*idx) + 8] += alpha9 * (*v); 1748b9cda22cSBarry Smith y[10 * (*idx) + 9] += alpha10 * (*v); 17499371c9d4SSatish Balay idx++; 17509371c9d4SSatish Balay v++; 1751b9cda22cSBarry Smith } 1752b9cda22cSBarry Smith } 17539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1756b9cda22cSBarry Smith PetscFunctionReturn(0); 1757b9cda22cSBarry Smith } 1758b9cda22cSBarry Smith 17592f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 17609371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1761dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1762dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1763d2840e09SBarry Smith const PetscScalar *x, *v; 1764d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1765d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1766d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 1767dbdd7285SBarry Smith 1768dbdd7285SBarry Smith PetscFunctionBegin; 17699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 17709566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1771dbdd7285SBarry Smith idx = a->j; 1772dbdd7285SBarry Smith v = a->a; 1773dbdd7285SBarry Smith ii = a->i; 1774dbdd7285SBarry Smith 1775dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1776dbdd7285SBarry Smith jrow = ii[i]; 1777dbdd7285SBarry Smith n = ii[i + 1] - jrow; 1778dbdd7285SBarry Smith sum1 = 0.0; 1779dbdd7285SBarry Smith sum2 = 0.0; 1780dbdd7285SBarry Smith sum3 = 0.0; 1781dbdd7285SBarry Smith sum4 = 0.0; 1782dbdd7285SBarry Smith sum5 = 0.0; 1783dbdd7285SBarry Smith sum6 = 0.0; 1784dbdd7285SBarry Smith sum7 = 0.0; 1785dbdd7285SBarry Smith sum8 = 0.0; 1786dbdd7285SBarry Smith sum9 = 0.0; 1787dbdd7285SBarry Smith sum10 = 0.0; 1788dbdd7285SBarry Smith sum11 = 0.0; 178926fbe8dcSKarl Rupp 1790dbdd7285SBarry Smith nonzerorow += (n > 0); 1791dbdd7285SBarry Smith for (j = 0; j < n; j++) { 1792dbdd7285SBarry Smith sum1 += v[jrow] * x[11 * idx[jrow]]; 1793dbdd7285SBarry Smith sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1794dbdd7285SBarry Smith sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1795dbdd7285SBarry Smith sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1796dbdd7285SBarry Smith sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1797dbdd7285SBarry Smith sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1798dbdd7285SBarry Smith sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1799dbdd7285SBarry Smith sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1800dbdd7285SBarry Smith sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1801dbdd7285SBarry Smith sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1802dbdd7285SBarry Smith sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1803dbdd7285SBarry Smith jrow++; 1804dbdd7285SBarry Smith } 1805dbdd7285SBarry Smith y[11 * i] = sum1; 1806dbdd7285SBarry Smith y[11 * i + 1] = sum2; 1807dbdd7285SBarry Smith y[11 * i + 2] = sum3; 1808dbdd7285SBarry Smith y[11 * i + 3] = sum4; 1809dbdd7285SBarry Smith y[11 * i + 4] = sum5; 1810dbdd7285SBarry Smith y[11 * i + 5] = sum6; 1811dbdd7285SBarry Smith y[11 * i + 6] = sum7; 1812dbdd7285SBarry Smith y[11 * i + 7] = sum8; 1813dbdd7285SBarry Smith y[11 * i + 8] = sum9; 1814dbdd7285SBarry Smith y[11 * i + 9] = sum10; 1815dbdd7285SBarry Smith y[11 * i + 10] = sum11; 1816dbdd7285SBarry Smith } 1817dbdd7285SBarry Smith 18189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow)); 18199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 18209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1821dbdd7285SBarry Smith PetscFunctionReturn(0); 1822dbdd7285SBarry Smith } 1823dbdd7285SBarry Smith 18249371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1825dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1826dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1827d2840e09SBarry Smith const PetscScalar *x, *v; 1828d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1829d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1830dbdd7285SBarry Smith PetscInt n, i, jrow, j; 1831dbdd7285SBarry Smith 1832dbdd7285SBarry Smith PetscFunctionBegin; 18339566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 18349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 18359566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1836dbdd7285SBarry Smith idx = a->j; 1837dbdd7285SBarry Smith v = a->a; 1838dbdd7285SBarry Smith ii = a->i; 1839dbdd7285SBarry Smith 1840dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1841dbdd7285SBarry Smith jrow = ii[i]; 1842dbdd7285SBarry Smith n = ii[i + 1] - jrow; 1843dbdd7285SBarry Smith sum1 = 0.0; 1844dbdd7285SBarry Smith sum2 = 0.0; 1845dbdd7285SBarry Smith sum3 = 0.0; 1846dbdd7285SBarry Smith sum4 = 0.0; 1847dbdd7285SBarry Smith sum5 = 0.0; 1848dbdd7285SBarry Smith sum6 = 0.0; 1849dbdd7285SBarry Smith sum7 = 0.0; 1850dbdd7285SBarry Smith sum8 = 0.0; 1851dbdd7285SBarry Smith sum9 = 0.0; 1852dbdd7285SBarry Smith sum10 = 0.0; 1853dbdd7285SBarry Smith sum11 = 0.0; 1854dbdd7285SBarry Smith for (j = 0; j < n; j++) { 1855dbdd7285SBarry Smith sum1 += v[jrow] * x[11 * idx[jrow]]; 1856dbdd7285SBarry Smith sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1857dbdd7285SBarry Smith sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1858dbdd7285SBarry Smith sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1859dbdd7285SBarry Smith sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1860dbdd7285SBarry Smith sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1861dbdd7285SBarry Smith sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1862dbdd7285SBarry Smith sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1863dbdd7285SBarry Smith sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1864dbdd7285SBarry Smith sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1865dbdd7285SBarry Smith sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1866dbdd7285SBarry Smith jrow++; 1867dbdd7285SBarry Smith } 1868dbdd7285SBarry Smith y[11 * i] += sum1; 1869dbdd7285SBarry Smith y[11 * i + 1] += sum2; 1870dbdd7285SBarry Smith y[11 * i + 2] += sum3; 1871dbdd7285SBarry Smith y[11 * i + 3] += sum4; 1872dbdd7285SBarry Smith y[11 * i + 4] += sum5; 1873dbdd7285SBarry Smith y[11 * i + 5] += sum6; 1874dbdd7285SBarry Smith y[11 * i + 6] += sum7; 1875dbdd7285SBarry Smith y[11 * i + 7] += sum8; 1876dbdd7285SBarry Smith y[11 * i + 8] += sum9; 1877dbdd7285SBarry Smith y[11 * i + 9] += sum10; 1878dbdd7285SBarry Smith y[11 * i + 10] += sum11; 1879dbdd7285SBarry Smith } 1880dbdd7285SBarry Smith 18819566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 18829566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 18839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1884dbdd7285SBarry Smith PetscFunctionReturn(0); 1885dbdd7285SBarry Smith } 1886dbdd7285SBarry Smith 18879371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1888dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1889dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1890d2840e09SBarry Smith const PetscScalar *x, *v; 1891d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1892d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1893d2840e09SBarry Smith PetscInt n, i; 1894dbdd7285SBarry Smith 1895dbdd7285SBarry Smith PetscFunctionBegin; 18969566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 18979566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 18989566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1899dbdd7285SBarry Smith 1900dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1901dbdd7285SBarry Smith idx = a->j + a->i[i]; 1902dbdd7285SBarry Smith v = a->a + a->i[i]; 1903dbdd7285SBarry Smith n = a->i[i + 1] - a->i[i]; 1904dbdd7285SBarry Smith alpha1 = x[11 * i]; 1905dbdd7285SBarry Smith alpha2 = x[11 * i + 1]; 1906dbdd7285SBarry Smith alpha3 = x[11 * i + 2]; 1907dbdd7285SBarry Smith alpha4 = x[11 * i + 3]; 1908dbdd7285SBarry Smith alpha5 = x[11 * i + 4]; 1909dbdd7285SBarry Smith alpha6 = x[11 * i + 5]; 1910dbdd7285SBarry Smith alpha7 = x[11 * i + 6]; 1911dbdd7285SBarry Smith alpha8 = x[11 * i + 7]; 1912dbdd7285SBarry Smith alpha9 = x[11 * i + 8]; 1913dbdd7285SBarry Smith alpha10 = x[11 * i + 9]; 1914dbdd7285SBarry Smith alpha11 = x[11 * i + 10]; 1915dbdd7285SBarry Smith while (n-- > 0) { 1916dbdd7285SBarry Smith y[11 * (*idx)] += alpha1 * (*v); 1917dbdd7285SBarry Smith y[11 * (*idx) + 1] += alpha2 * (*v); 1918dbdd7285SBarry Smith y[11 * (*idx) + 2] += alpha3 * (*v); 1919dbdd7285SBarry Smith y[11 * (*idx) + 3] += alpha4 * (*v); 1920dbdd7285SBarry Smith y[11 * (*idx) + 4] += alpha5 * (*v); 1921dbdd7285SBarry Smith y[11 * (*idx) + 5] += alpha6 * (*v); 1922dbdd7285SBarry Smith y[11 * (*idx) + 6] += alpha7 * (*v); 1923dbdd7285SBarry Smith y[11 * (*idx) + 7] += alpha8 * (*v); 1924dbdd7285SBarry Smith y[11 * (*idx) + 8] += alpha9 * (*v); 1925dbdd7285SBarry Smith y[11 * (*idx) + 9] += alpha10 * (*v); 1926dbdd7285SBarry Smith y[11 * (*idx) + 10] += alpha11 * (*v); 19279371c9d4SSatish Balay idx++; 19289371c9d4SSatish Balay v++; 1929dbdd7285SBarry Smith } 1930dbdd7285SBarry Smith } 19319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 19329566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19339566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1934dbdd7285SBarry Smith PetscFunctionReturn(0); 1935dbdd7285SBarry Smith } 1936dbdd7285SBarry Smith 19379371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1938dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1939dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1940d2840e09SBarry Smith const PetscScalar *x, *v; 1941d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1942d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1943d2840e09SBarry Smith PetscInt n, i; 1944dbdd7285SBarry Smith 1945dbdd7285SBarry Smith PetscFunctionBegin; 19469566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 19479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 19489566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1949dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1950dbdd7285SBarry Smith idx = a->j + a->i[i]; 1951dbdd7285SBarry Smith v = a->a + a->i[i]; 1952dbdd7285SBarry Smith n = a->i[i + 1] - a->i[i]; 1953dbdd7285SBarry Smith alpha1 = x[11 * i]; 1954dbdd7285SBarry Smith alpha2 = x[11 * i + 1]; 1955dbdd7285SBarry Smith alpha3 = x[11 * i + 2]; 1956dbdd7285SBarry Smith alpha4 = x[11 * i + 3]; 1957dbdd7285SBarry Smith alpha5 = x[11 * i + 4]; 1958dbdd7285SBarry Smith alpha6 = x[11 * i + 5]; 1959dbdd7285SBarry Smith alpha7 = x[11 * i + 6]; 1960dbdd7285SBarry Smith alpha8 = x[11 * i + 7]; 1961dbdd7285SBarry Smith alpha9 = x[11 * i + 8]; 1962dbdd7285SBarry Smith alpha10 = x[11 * i + 9]; 1963dbdd7285SBarry Smith alpha11 = x[11 * i + 10]; 1964dbdd7285SBarry Smith while (n-- > 0) { 1965dbdd7285SBarry Smith y[11 * (*idx)] += alpha1 * (*v); 1966dbdd7285SBarry Smith y[11 * (*idx) + 1] += alpha2 * (*v); 1967dbdd7285SBarry Smith y[11 * (*idx) + 2] += alpha3 * (*v); 1968dbdd7285SBarry Smith y[11 * (*idx) + 3] += alpha4 * (*v); 1969dbdd7285SBarry Smith y[11 * (*idx) + 4] += alpha5 * (*v); 1970dbdd7285SBarry Smith y[11 * (*idx) + 5] += alpha6 * (*v); 1971dbdd7285SBarry Smith y[11 * (*idx) + 6] += alpha7 * (*v); 1972dbdd7285SBarry Smith y[11 * (*idx) + 7] += alpha8 * (*v); 1973dbdd7285SBarry Smith y[11 * (*idx) + 8] += alpha9 * (*v); 1974dbdd7285SBarry Smith y[11 * (*idx) + 9] += alpha10 * (*v); 1975dbdd7285SBarry Smith y[11 * (*idx) + 10] += alpha11 * (*v); 19769371c9d4SSatish Balay idx++; 19779371c9d4SSatish Balay v++; 1978dbdd7285SBarry Smith } 1979dbdd7285SBarry Smith } 19809566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 19819566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19829566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1983dbdd7285SBarry Smith PetscFunctionReturn(0); 1984dbdd7285SBarry Smith } 1985dbdd7285SBarry Smith 1986dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 19879371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 19882f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 19892f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1990d2840e09SBarry Smith const PetscScalar *x, *v; 1991d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 19922f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1993d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1994d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 19952f7816d4SBarry Smith 19962f7816d4SBarry Smith PetscFunctionBegin; 19979566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 19989566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 19992f7816d4SBarry Smith idx = a->j; 20002f7816d4SBarry Smith v = a->a; 20012f7816d4SBarry Smith ii = a->i; 20022f7816d4SBarry Smith 20032f7816d4SBarry Smith for (i = 0; i < m; i++) { 20042f7816d4SBarry Smith jrow = ii[i]; 20052f7816d4SBarry Smith n = ii[i + 1] - jrow; 20062f7816d4SBarry Smith sum1 = 0.0; 20072f7816d4SBarry Smith sum2 = 0.0; 20082f7816d4SBarry Smith sum3 = 0.0; 20092f7816d4SBarry Smith sum4 = 0.0; 20102f7816d4SBarry Smith sum5 = 0.0; 20112f7816d4SBarry Smith sum6 = 0.0; 20122f7816d4SBarry Smith sum7 = 0.0; 20132f7816d4SBarry Smith sum8 = 0.0; 20142f7816d4SBarry Smith sum9 = 0.0; 20152f7816d4SBarry Smith sum10 = 0.0; 20162f7816d4SBarry Smith sum11 = 0.0; 20172f7816d4SBarry Smith sum12 = 0.0; 20182f7816d4SBarry Smith sum13 = 0.0; 20192f7816d4SBarry Smith sum14 = 0.0; 20202f7816d4SBarry Smith sum15 = 0.0; 20212f7816d4SBarry Smith sum16 = 0.0; 202226fbe8dcSKarl Rupp 2023b3c51c66SMatthew Knepley nonzerorow += (n > 0); 20242f7816d4SBarry Smith for (j = 0; j < n; j++) { 20252f7816d4SBarry Smith sum1 += v[jrow] * x[16 * idx[jrow]]; 20262f7816d4SBarry Smith sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 20272f7816d4SBarry Smith sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 20282f7816d4SBarry Smith sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 20292f7816d4SBarry Smith sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 20302f7816d4SBarry Smith sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 20312f7816d4SBarry Smith sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 20322f7816d4SBarry Smith sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 20332f7816d4SBarry Smith sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 20342f7816d4SBarry Smith sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 20352f7816d4SBarry Smith sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 20362f7816d4SBarry Smith sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 20372f7816d4SBarry Smith sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 20382f7816d4SBarry Smith sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 20392f7816d4SBarry Smith sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 20402f7816d4SBarry Smith sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 20412f7816d4SBarry Smith jrow++; 20422f7816d4SBarry Smith } 20432f7816d4SBarry Smith y[16 * i] = sum1; 20442f7816d4SBarry Smith y[16 * i + 1] = sum2; 20452f7816d4SBarry Smith y[16 * i + 2] = sum3; 20462f7816d4SBarry Smith y[16 * i + 3] = sum4; 20472f7816d4SBarry Smith y[16 * i + 4] = sum5; 20482f7816d4SBarry Smith y[16 * i + 5] = sum6; 20492f7816d4SBarry Smith y[16 * i + 6] = sum7; 20502f7816d4SBarry Smith y[16 * i + 7] = sum8; 20512f7816d4SBarry Smith y[16 * i + 8] = sum9; 20522f7816d4SBarry Smith y[16 * i + 9] = sum10; 20532f7816d4SBarry Smith y[16 * i + 10] = sum11; 20542f7816d4SBarry Smith y[16 * i + 11] = sum12; 20552f7816d4SBarry Smith y[16 * i + 12] = sum13; 20562f7816d4SBarry Smith y[16 * i + 13] = sum14; 20572f7816d4SBarry Smith y[16 * i + 14] = sum15; 20582f7816d4SBarry Smith y[16 * i + 15] = sum16; 20592f7816d4SBarry Smith } 20602f7816d4SBarry Smith 20619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow)); 20629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 20639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 20642f7816d4SBarry Smith PetscFunctionReturn(0); 20652f7816d4SBarry Smith } 20662f7816d4SBarry Smith 20679371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 20682f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 20692f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2070d2840e09SBarry Smith const PetscScalar *x, *v; 2071d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 20722f7816d4SBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2073d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2074d2840e09SBarry Smith PetscInt n, i; 20752f7816d4SBarry Smith 20762f7816d4SBarry Smith PetscFunctionBegin; 20779566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 20789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 20799566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2080bfec09a0SHong Zhang 20812f7816d4SBarry Smith for (i = 0; i < m; i++) { 2082bfec09a0SHong Zhang idx = a->j + a->i[i]; 2083bfec09a0SHong Zhang v = a->a + a->i[i]; 20842f7816d4SBarry Smith n = a->i[i + 1] - a->i[i]; 20852f7816d4SBarry Smith alpha1 = x[16 * i]; 20862f7816d4SBarry Smith alpha2 = x[16 * i + 1]; 20872f7816d4SBarry Smith alpha3 = x[16 * i + 2]; 20882f7816d4SBarry Smith alpha4 = x[16 * i + 3]; 20892f7816d4SBarry Smith alpha5 = x[16 * i + 4]; 20902f7816d4SBarry Smith alpha6 = x[16 * i + 5]; 20912f7816d4SBarry Smith alpha7 = x[16 * i + 6]; 20922f7816d4SBarry Smith alpha8 = x[16 * i + 7]; 20932f7816d4SBarry Smith alpha9 = x[16 * i + 8]; 20942f7816d4SBarry Smith alpha10 = x[16 * i + 9]; 20952f7816d4SBarry Smith alpha11 = x[16 * i + 10]; 20962f7816d4SBarry Smith alpha12 = x[16 * i + 11]; 20972f7816d4SBarry Smith alpha13 = x[16 * i + 12]; 20982f7816d4SBarry Smith alpha14 = x[16 * i + 13]; 20992f7816d4SBarry Smith alpha15 = x[16 * i + 14]; 21002f7816d4SBarry Smith alpha16 = x[16 * i + 15]; 21012f7816d4SBarry Smith while (n-- > 0) { 21022f7816d4SBarry Smith y[16 * (*idx)] += alpha1 * (*v); 21032f7816d4SBarry Smith y[16 * (*idx) + 1] += alpha2 * (*v); 21042f7816d4SBarry Smith y[16 * (*idx) + 2] += alpha3 * (*v); 21052f7816d4SBarry Smith y[16 * (*idx) + 3] += alpha4 * (*v); 21062f7816d4SBarry Smith y[16 * (*idx) + 4] += alpha5 * (*v); 21072f7816d4SBarry Smith y[16 * (*idx) + 5] += alpha6 * (*v); 21082f7816d4SBarry Smith y[16 * (*idx) + 6] += alpha7 * (*v); 21092f7816d4SBarry Smith y[16 * (*idx) + 7] += alpha8 * (*v); 21102f7816d4SBarry Smith y[16 * (*idx) + 8] += alpha9 * (*v); 21112f7816d4SBarry Smith y[16 * (*idx) + 9] += alpha10 * (*v); 21122f7816d4SBarry Smith y[16 * (*idx) + 10] += alpha11 * (*v); 21132f7816d4SBarry Smith y[16 * (*idx) + 11] += alpha12 * (*v); 21142f7816d4SBarry Smith y[16 * (*idx) + 12] += alpha13 * (*v); 21152f7816d4SBarry Smith y[16 * (*idx) + 13] += alpha14 * (*v); 21162f7816d4SBarry Smith y[16 * (*idx) + 14] += alpha15 * (*v); 21172f7816d4SBarry Smith y[16 * (*idx) + 15] += alpha16 * (*v); 21189371c9d4SSatish Balay idx++; 21199371c9d4SSatish Balay v++; 21202f7816d4SBarry Smith } 21212f7816d4SBarry Smith } 21229566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 21239566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 21249566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 21252f7816d4SBarry Smith PetscFunctionReturn(0); 21262f7816d4SBarry Smith } 21272f7816d4SBarry Smith 21289371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 21292f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 21302f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2131d2840e09SBarry Smith const PetscScalar *x, *v; 2132d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21332f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2134d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2135b24ad042SBarry Smith PetscInt n, i, jrow, j; 21362f7816d4SBarry Smith 21372f7816d4SBarry Smith PetscFunctionBegin; 21389566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 21399566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21409566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 21412f7816d4SBarry Smith idx = a->j; 21422f7816d4SBarry Smith v = a->a; 21432f7816d4SBarry Smith ii = a->i; 21442f7816d4SBarry Smith 21452f7816d4SBarry Smith for (i = 0; i < m; i++) { 21462f7816d4SBarry Smith jrow = ii[i]; 21472f7816d4SBarry Smith n = ii[i + 1] - jrow; 21482f7816d4SBarry Smith sum1 = 0.0; 21492f7816d4SBarry Smith sum2 = 0.0; 21502f7816d4SBarry Smith sum3 = 0.0; 21512f7816d4SBarry Smith sum4 = 0.0; 21522f7816d4SBarry Smith sum5 = 0.0; 21532f7816d4SBarry Smith sum6 = 0.0; 21542f7816d4SBarry Smith sum7 = 0.0; 21552f7816d4SBarry Smith sum8 = 0.0; 21562f7816d4SBarry Smith sum9 = 0.0; 21572f7816d4SBarry Smith sum10 = 0.0; 21582f7816d4SBarry Smith sum11 = 0.0; 21592f7816d4SBarry Smith sum12 = 0.0; 21602f7816d4SBarry Smith sum13 = 0.0; 21612f7816d4SBarry Smith sum14 = 0.0; 21622f7816d4SBarry Smith sum15 = 0.0; 21632f7816d4SBarry Smith sum16 = 0.0; 21642f7816d4SBarry Smith for (j = 0; j < n; j++) { 21652f7816d4SBarry Smith sum1 += v[jrow] * x[16 * idx[jrow]]; 21662f7816d4SBarry Smith sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 21672f7816d4SBarry Smith sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 21682f7816d4SBarry Smith sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 21692f7816d4SBarry Smith sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 21702f7816d4SBarry Smith sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 21712f7816d4SBarry Smith sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 21722f7816d4SBarry Smith sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 21732f7816d4SBarry Smith sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 21742f7816d4SBarry Smith sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 21752f7816d4SBarry Smith sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 21762f7816d4SBarry Smith sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 21772f7816d4SBarry Smith sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 21782f7816d4SBarry Smith sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 21792f7816d4SBarry Smith sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 21802f7816d4SBarry Smith sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 21812f7816d4SBarry Smith jrow++; 21822f7816d4SBarry Smith } 21832f7816d4SBarry Smith y[16 * i] += sum1; 21842f7816d4SBarry Smith y[16 * i + 1] += sum2; 21852f7816d4SBarry Smith y[16 * i + 2] += sum3; 21862f7816d4SBarry Smith y[16 * i + 3] += sum4; 21872f7816d4SBarry Smith y[16 * i + 4] += sum5; 21882f7816d4SBarry Smith y[16 * i + 5] += sum6; 21892f7816d4SBarry Smith y[16 * i + 6] += sum7; 21902f7816d4SBarry Smith y[16 * i + 7] += sum8; 21912f7816d4SBarry Smith y[16 * i + 8] += sum9; 21922f7816d4SBarry Smith y[16 * i + 9] += sum10; 21932f7816d4SBarry Smith y[16 * i + 10] += sum11; 21942f7816d4SBarry Smith y[16 * i + 11] += sum12; 21952f7816d4SBarry Smith y[16 * i + 12] += sum13; 21962f7816d4SBarry Smith y[16 * i + 13] += sum14; 21972f7816d4SBarry Smith y[16 * i + 14] += sum15; 21982f7816d4SBarry Smith y[16 * i + 15] += sum16; 21992f7816d4SBarry Smith } 22002f7816d4SBarry Smith 22019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 22029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 22039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 22042f7816d4SBarry Smith PetscFunctionReturn(0); 22052f7816d4SBarry Smith } 22062f7816d4SBarry Smith 22079371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 22082f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 22092f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2210d2840e09SBarry Smith const PetscScalar *x, *v; 2211d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 22122f7816d4SBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2213d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2214d2840e09SBarry Smith PetscInt n, i; 22152f7816d4SBarry Smith 22162f7816d4SBarry Smith PetscFunctionBegin; 22179566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 22189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 22199566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 22202f7816d4SBarry Smith for (i = 0; i < m; i++) { 2221bfec09a0SHong Zhang idx = a->j + a->i[i]; 2222bfec09a0SHong Zhang v = a->a + a->i[i]; 22232f7816d4SBarry Smith n = a->i[i + 1] - a->i[i]; 22242f7816d4SBarry Smith alpha1 = x[16 * i]; 22252f7816d4SBarry Smith alpha2 = x[16 * i + 1]; 22262f7816d4SBarry Smith alpha3 = x[16 * i + 2]; 22272f7816d4SBarry Smith alpha4 = x[16 * i + 3]; 22282f7816d4SBarry Smith alpha5 = x[16 * i + 4]; 22292f7816d4SBarry Smith alpha6 = x[16 * i + 5]; 22302f7816d4SBarry Smith alpha7 = x[16 * i + 6]; 22312f7816d4SBarry Smith alpha8 = x[16 * i + 7]; 22322f7816d4SBarry Smith alpha9 = x[16 * i + 8]; 22332f7816d4SBarry Smith alpha10 = x[16 * i + 9]; 22342f7816d4SBarry Smith alpha11 = x[16 * i + 10]; 22352f7816d4SBarry Smith alpha12 = x[16 * i + 11]; 22362f7816d4SBarry Smith alpha13 = x[16 * i + 12]; 22372f7816d4SBarry Smith alpha14 = x[16 * i + 13]; 22382f7816d4SBarry Smith alpha15 = x[16 * i + 14]; 22392f7816d4SBarry Smith alpha16 = x[16 * i + 15]; 22402f7816d4SBarry Smith while (n-- > 0) { 22412f7816d4SBarry Smith y[16 * (*idx)] += alpha1 * (*v); 22422f7816d4SBarry Smith y[16 * (*idx) + 1] += alpha2 * (*v); 22432f7816d4SBarry Smith y[16 * (*idx) + 2] += alpha3 * (*v); 22442f7816d4SBarry Smith y[16 * (*idx) + 3] += alpha4 * (*v); 22452f7816d4SBarry Smith y[16 * (*idx) + 4] += alpha5 * (*v); 22462f7816d4SBarry Smith y[16 * (*idx) + 5] += alpha6 * (*v); 22472f7816d4SBarry Smith y[16 * (*idx) + 6] += alpha7 * (*v); 22482f7816d4SBarry Smith y[16 * (*idx) + 7] += alpha8 * (*v); 22492f7816d4SBarry Smith y[16 * (*idx) + 8] += alpha9 * (*v); 22502f7816d4SBarry Smith y[16 * (*idx) + 9] += alpha10 * (*v); 22512f7816d4SBarry Smith y[16 * (*idx) + 10] += alpha11 * (*v); 22522f7816d4SBarry Smith y[16 * (*idx) + 11] += alpha12 * (*v); 22532f7816d4SBarry Smith y[16 * (*idx) + 12] += alpha13 * (*v); 22542f7816d4SBarry Smith y[16 * (*idx) + 13] += alpha14 * (*v); 22552f7816d4SBarry Smith y[16 * (*idx) + 14] += alpha15 * (*v); 22562f7816d4SBarry Smith y[16 * (*idx) + 15] += alpha16 * (*v); 22579371c9d4SSatish Balay idx++; 22589371c9d4SSatish Balay v++; 22592f7816d4SBarry Smith } 22602f7816d4SBarry Smith } 22619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 22629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 22639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 226466ed3db0SBarry Smith PetscFunctionReturn(0); 226566ed3db0SBarry Smith } 226666ed3db0SBarry Smith 2267ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 22689371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2269ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2270ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2271d2840e09SBarry Smith const PetscScalar *x, *v; 2272d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2273ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2274d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2275d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 2276ed1c418cSBarry Smith 2277ed1c418cSBarry Smith PetscFunctionBegin; 22789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 22799566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2280ed1c418cSBarry Smith idx = a->j; 2281ed1c418cSBarry Smith v = a->a; 2282ed1c418cSBarry Smith ii = a->i; 2283ed1c418cSBarry Smith 2284ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2285ed1c418cSBarry Smith jrow = ii[i]; 2286ed1c418cSBarry Smith n = ii[i + 1] - jrow; 2287ed1c418cSBarry Smith sum1 = 0.0; 2288ed1c418cSBarry Smith sum2 = 0.0; 2289ed1c418cSBarry Smith sum3 = 0.0; 2290ed1c418cSBarry Smith sum4 = 0.0; 2291ed1c418cSBarry Smith sum5 = 0.0; 2292ed1c418cSBarry Smith sum6 = 0.0; 2293ed1c418cSBarry Smith sum7 = 0.0; 2294ed1c418cSBarry Smith sum8 = 0.0; 2295ed1c418cSBarry Smith sum9 = 0.0; 2296ed1c418cSBarry Smith sum10 = 0.0; 2297ed1c418cSBarry Smith sum11 = 0.0; 2298ed1c418cSBarry Smith sum12 = 0.0; 2299ed1c418cSBarry Smith sum13 = 0.0; 2300ed1c418cSBarry Smith sum14 = 0.0; 2301ed1c418cSBarry Smith sum15 = 0.0; 2302ed1c418cSBarry Smith sum16 = 0.0; 2303ed1c418cSBarry Smith sum17 = 0.0; 2304ed1c418cSBarry Smith sum18 = 0.0; 230526fbe8dcSKarl Rupp 2306ed1c418cSBarry Smith nonzerorow += (n > 0); 2307ed1c418cSBarry Smith for (j = 0; j < n; j++) { 2308ed1c418cSBarry Smith sum1 += v[jrow] * x[18 * idx[jrow]]; 2309ed1c418cSBarry Smith sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2310ed1c418cSBarry Smith sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2311ed1c418cSBarry Smith sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2312ed1c418cSBarry Smith sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2313ed1c418cSBarry Smith sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2314ed1c418cSBarry Smith sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2315ed1c418cSBarry Smith sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2316ed1c418cSBarry Smith sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2317ed1c418cSBarry Smith sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2318ed1c418cSBarry Smith sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2319ed1c418cSBarry Smith sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2320ed1c418cSBarry Smith sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2321ed1c418cSBarry Smith sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2322ed1c418cSBarry Smith sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2323ed1c418cSBarry Smith sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2324ed1c418cSBarry Smith sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2325ed1c418cSBarry Smith sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2326ed1c418cSBarry Smith jrow++; 2327ed1c418cSBarry Smith } 2328ed1c418cSBarry Smith y[18 * i] = sum1; 2329ed1c418cSBarry Smith y[18 * i + 1] = sum2; 2330ed1c418cSBarry Smith y[18 * i + 2] = sum3; 2331ed1c418cSBarry Smith y[18 * i + 3] = sum4; 2332ed1c418cSBarry Smith y[18 * i + 4] = sum5; 2333ed1c418cSBarry Smith y[18 * i + 5] = sum6; 2334ed1c418cSBarry Smith y[18 * i + 6] = sum7; 2335ed1c418cSBarry Smith y[18 * i + 7] = sum8; 2336ed1c418cSBarry Smith y[18 * i + 8] = sum9; 2337ed1c418cSBarry Smith y[18 * i + 9] = sum10; 2338ed1c418cSBarry Smith y[18 * i + 10] = sum11; 2339ed1c418cSBarry Smith y[18 * i + 11] = sum12; 2340ed1c418cSBarry Smith y[18 * i + 12] = sum13; 2341ed1c418cSBarry Smith y[18 * i + 13] = sum14; 2342ed1c418cSBarry Smith y[18 * i + 14] = sum15; 2343ed1c418cSBarry Smith y[18 * i + 15] = sum16; 2344ed1c418cSBarry Smith y[18 * i + 16] = sum17; 2345ed1c418cSBarry Smith y[18 * i + 17] = sum18; 2346ed1c418cSBarry Smith } 2347ed1c418cSBarry Smith 23489566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow)); 23499566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 23509566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2351ed1c418cSBarry Smith PetscFunctionReturn(0); 2352ed1c418cSBarry Smith } 2353ed1c418cSBarry Smith 23549371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2355ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2356ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2357d2840e09SBarry Smith const PetscScalar *x, *v; 2358d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2359ed1c418cSBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2360d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2361d2840e09SBarry Smith PetscInt n, i; 2362ed1c418cSBarry Smith 2363ed1c418cSBarry Smith PetscFunctionBegin; 23649566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 23659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 23669566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2367ed1c418cSBarry Smith 2368ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2369ed1c418cSBarry Smith idx = a->j + a->i[i]; 2370ed1c418cSBarry Smith v = a->a + a->i[i]; 2371ed1c418cSBarry Smith n = a->i[i + 1] - a->i[i]; 2372ed1c418cSBarry Smith alpha1 = x[18 * i]; 2373ed1c418cSBarry Smith alpha2 = x[18 * i + 1]; 2374ed1c418cSBarry Smith alpha3 = x[18 * i + 2]; 2375ed1c418cSBarry Smith alpha4 = x[18 * i + 3]; 2376ed1c418cSBarry Smith alpha5 = x[18 * i + 4]; 2377ed1c418cSBarry Smith alpha6 = x[18 * i + 5]; 2378ed1c418cSBarry Smith alpha7 = x[18 * i + 6]; 2379ed1c418cSBarry Smith alpha8 = x[18 * i + 7]; 2380ed1c418cSBarry Smith alpha9 = x[18 * i + 8]; 2381ed1c418cSBarry Smith alpha10 = x[18 * i + 9]; 2382ed1c418cSBarry Smith alpha11 = x[18 * i + 10]; 2383ed1c418cSBarry Smith alpha12 = x[18 * i + 11]; 2384ed1c418cSBarry Smith alpha13 = x[18 * i + 12]; 2385ed1c418cSBarry Smith alpha14 = x[18 * i + 13]; 2386ed1c418cSBarry Smith alpha15 = x[18 * i + 14]; 2387ed1c418cSBarry Smith alpha16 = x[18 * i + 15]; 2388ed1c418cSBarry Smith alpha17 = x[18 * i + 16]; 2389ed1c418cSBarry Smith alpha18 = x[18 * i + 17]; 2390ed1c418cSBarry Smith while (n-- > 0) { 2391ed1c418cSBarry Smith y[18 * (*idx)] += alpha1 * (*v); 2392ed1c418cSBarry Smith y[18 * (*idx) + 1] += alpha2 * (*v); 2393ed1c418cSBarry Smith y[18 * (*idx) + 2] += alpha3 * (*v); 2394ed1c418cSBarry Smith y[18 * (*idx) + 3] += alpha4 * (*v); 2395ed1c418cSBarry Smith y[18 * (*idx) + 4] += alpha5 * (*v); 2396ed1c418cSBarry Smith y[18 * (*idx) + 5] += alpha6 * (*v); 2397ed1c418cSBarry Smith y[18 * (*idx) + 6] += alpha7 * (*v); 2398ed1c418cSBarry Smith y[18 * (*idx) + 7] += alpha8 * (*v); 2399ed1c418cSBarry Smith y[18 * (*idx) + 8] += alpha9 * (*v); 2400ed1c418cSBarry Smith y[18 * (*idx) + 9] += alpha10 * (*v); 2401ed1c418cSBarry Smith y[18 * (*idx) + 10] += alpha11 * (*v); 2402ed1c418cSBarry Smith y[18 * (*idx) + 11] += alpha12 * (*v); 2403ed1c418cSBarry Smith y[18 * (*idx) + 12] += alpha13 * (*v); 2404ed1c418cSBarry Smith y[18 * (*idx) + 13] += alpha14 * (*v); 2405ed1c418cSBarry Smith y[18 * (*idx) + 14] += alpha15 * (*v); 2406ed1c418cSBarry Smith y[18 * (*idx) + 15] += alpha16 * (*v); 2407ed1c418cSBarry Smith y[18 * (*idx) + 16] += alpha17 * (*v); 2408ed1c418cSBarry Smith y[18 * (*idx) + 17] += alpha18 * (*v); 24099371c9d4SSatish Balay idx++; 24109371c9d4SSatish Balay v++; 2411ed1c418cSBarry Smith } 2412ed1c418cSBarry Smith } 24139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 24149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2416ed1c418cSBarry Smith PetscFunctionReturn(0); 2417ed1c418cSBarry Smith } 2418ed1c418cSBarry Smith 24199371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2420ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2421ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2422d2840e09SBarry Smith const PetscScalar *x, *v; 2423d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2424ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2425d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2426ed1c418cSBarry Smith PetscInt n, i, jrow, j; 2427ed1c418cSBarry Smith 2428ed1c418cSBarry Smith PetscFunctionBegin; 24299566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 24309566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 24319566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 2432ed1c418cSBarry Smith idx = a->j; 2433ed1c418cSBarry Smith v = a->a; 2434ed1c418cSBarry Smith ii = a->i; 2435ed1c418cSBarry Smith 2436ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2437ed1c418cSBarry Smith jrow = ii[i]; 2438ed1c418cSBarry Smith n = ii[i + 1] - jrow; 2439ed1c418cSBarry Smith sum1 = 0.0; 2440ed1c418cSBarry Smith sum2 = 0.0; 2441ed1c418cSBarry Smith sum3 = 0.0; 2442ed1c418cSBarry Smith sum4 = 0.0; 2443ed1c418cSBarry Smith sum5 = 0.0; 2444ed1c418cSBarry Smith sum6 = 0.0; 2445ed1c418cSBarry Smith sum7 = 0.0; 2446ed1c418cSBarry Smith sum8 = 0.0; 2447ed1c418cSBarry Smith sum9 = 0.0; 2448ed1c418cSBarry Smith sum10 = 0.0; 2449ed1c418cSBarry Smith sum11 = 0.0; 2450ed1c418cSBarry Smith sum12 = 0.0; 2451ed1c418cSBarry Smith sum13 = 0.0; 2452ed1c418cSBarry Smith sum14 = 0.0; 2453ed1c418cSBarry Smith sum15 = 0.0; 2454ed1c418cSBarry Smith sum16 = 0.0; 2455ed1c418cSBarry Smith sum17 = 0.0; 2456ed1c418cSBarry Smith sum18 = 0.0; 2457ed1c418cSBarry Smith for (j = 0; j < n; j++) { 2458ed1c418cSBarry Smith sum1 += v[jrow] * x[18 * idx[jrow]]; 2459ed1c418cSBarry Smith sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2460ed1c418cSBarry Smith sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2461ed1c418cSBarry Smith sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2462ed1c418cSBarry Smith sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2463ed1c418cSBarry Smith sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2464ed1c418cSBarry Smith sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2465ed1c418cSBarry Smith sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2466ed1c418cSBarry Smith sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2467ed1c418cSBarry Smith sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2468ed1c418cSBarry Smith sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2469ed1c418cSBarry Smith sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2470ed1c418cSBarry Smith sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2471ed1c418cSBarry Smith sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2472ed1c418cSBarry Smith sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2473ed1c418cSBarry Smith sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2474ed1c418cSBarry Smith sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2475ed1c418cSBarry Smith sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2476ed1c418cSBarry Smith jrow++; 2477ed1c418cSBarry Smith } 2478ed1c418cSBarry Smith y[18 * i] += sum1; 2479ed1c418cSBarry Smith y[18 * i + 1] += sum2; 2480ed1c418cSBarry Smith y[18 * i + 2] += sum3; 2481ed1c418cSBarry Smith y[18 * i + 3] += sum4; 2482ed1c418cSBarry Smith y[18 * i + 4] += sum5; 2483ed1c418cSBarry Smith y[18 * i + 5] += sum6; 2484ed1c418cSBarry Smith y[18 * i + 6] += sum7; 2485ed1c418cSBarry Smith y[18 * i + 7] += sum8; 2486ed1c418cSBarry Smith y[18 * i + 8] += sum9; 2487ed1c418cSBarry Smith y[18 * i + 9] += sum10; 2488ed1c418cSBarry Smith y[18 * i + 10] += sum11; 2489ed1c418cSBarry Smith y[18 * i + 11] += sum12; 2490ed1c418cSBarry Smith y[18 * i + 12] += sum13; 2491ed1c418cSBarry Smith y[18 * i + 13] += sum14; 2492ed1c418cSBarry Smith y[18 * i + 14] += sum15; 2493ed1c418cSBarry Smith y[18 * i + 15] += sum16; 2494ed1c418cSBarry Smith y[18 * i + 16] += sum17; 2495ed1c418cSBarry Smith y[18 * i + 17] += sum18; 2496ed1c418cSBarry Smith } 2497ed1c418cSBarry Smith 24989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 24999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2501ed1c418cSBarry Smith PetscFunctionReturn(0); 2502ed1c418cSBarry Smith } 2503ed1c418cSBarry Smith 25049371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2505ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2506ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2507d2840e09SBarry Smith const PetscScalar *x, *v; 2508d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2509ed1c418cSBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2510d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2511d2840e09SBarry Smith PetscInt n, i; 2512ed1c418cSBarry Smith 2513ed1c418cSBarry Smith PetscFunctionBegin; 25149566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 25159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 25169566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 2517ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2518ed1c418cSBarry Smith idx = a->j + a->i[i]; 2519ed1c418cSBarry Smith v = a->a + a->i[i]; 2520ed1c418cSBarry Smith n = a->i[i + 1] - a->i[i]; 2521ed1c418cSBarry Smith alpha1 = x[18 * i]; 2522ed1c418cSBarry Smith alpha2 = x[18 * i + 1]; 2523ed1c418cSBarry Smith alpha3 = x[18 * i + 2]; 2524ed1c418cSBarry Smith alpha4 = x[18 * i + 3]; 2525ed1c418cSBarry Smith alpha5 = x[18 * i + 4]; 2526ed1c418cSBarry Smith alpha6 = x[18 * i + 5]; 2527ed1c418cSBarry Smith alpha7 = x[18 * i + 6]; 2528ed1c418cSBarry Smith alpha8 = x[18 * i + 7]; 2529ed1c418cSBarry Smith alpha9 = x[18 * i + 8]; 2530ed1c418cSBarry Smith alpha10 = x[18 * i + 9]; 2531ed1c418cSBarry Smith alpha11 = x[18 * i + 10]; 2532ed1c418cSBarry Smith alpha12 = x[18 * i + 11]; 2533ed1c418cSBarry Smith alpha13 = x[18 * i + 12]; 2534ed1c418cSBarry Smith alpha14 = x[18 * i + 13]; 2535ed1c418cSBarry Smith alpha15 = x[18 * i + 14]; 2536ed1c418cSBarry Smith alpha16 = x[18 * i + 15]; 2537ed1c418cSBarry Smith alpha17 = x[18 * i + 16]; 2538ed1c418cSBarry Smith alpha18 = x[18 * i + 17]; 2539ed1c418cSBarry Smith while (n-- > 0) { 2540ed1c418cSBarry Smith y[18 * (*idx)] += alpha1 * (*v); 2541ed1c418cSBarry Smith y[18 * (*idx) + 1] += alpha2 * (*v); 2542ed1c418cSBarry Smith y[18 * (*idx) + 2] += alpha3 * (*v); 2543ed1c418cSBarry Smith y[18 * (*idx) + 3] += alpha4 * (*v); 2544ed1c418cSBarry Smith y[18 * (*idx) + 4] += alpha5 * (*v); 2545ed1c418cSBarry Smith y[18 * (*idx) + 5] += alpha6 * (*v); 2546ed1c418cSBarry Smith y[18 * (*idx) + 6] += alpha7 * (*v); 2547ed1c418cSBarry Smith y[18 * (*idx) + 7] += alpha8 * (*v); 2548ed1c418cSBarry Smith y[18 * (*idx) + 8] += alpha9 * (*v); 2549ed1c418cSBarry Smith y[18 * (*idx) + 9] += alpha10 * (*v); 2550ed1c418cSBarry Smith y[18 * (*idx) + 10] += alpha11 * (*v); 2551ed1c418cSBarry Smith y[18 * (*idx) + 11] += alpha12 * (*v); 2552ed1c418cSBarry Smith y[18 * (*idx) + 12] += alpha13 * (*v); 2553ed1c418cSBarry Smith y[18 * (*idx) + 13] += alpha14 * (*v); 2554ed1c418cSBarry Smith y[18 * (*idx) + 14] += alpha15 * (*v); 2555ed1c418cSBarry Smith y[18 * (*idx) + 15] += alpha16 * (*v); 2556ed1c418cSBarry Smith y[18 * (*idx) + 16] += alpha17 * (*v); 2557ed1c418cSBarry Smith y[18 * (*idx) + 17] += alpha18 * (*v); 25589371c9d4SSatish Balay idx++; 25599371c9d4SSatish Balay v++; 2560ed1c418cSBarry Smith } 2561ed1c418cSBarry Smith } 25629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 25639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2565ed1c418cSBarry Smith PetscFunctionReturn(0); 2566ed1c418cSBarry Smith } 2567ed1c418cSBarry Smith 25689371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 25696861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 25706861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 25716861f193SBarry Smith const PetscScalar *x, *v; 25726861f193SBarry Smith PetscScalar *y, *sums; 25736861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 25746861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 25756861f193SBarry Smith 25766861f193SBarry Smith PetscFunctionBegin; 25779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 25789566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 25799566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 25806861f193SBarry Smith idx = a->j; 25816861f193SBarry Smith v = a->a; 25826861f193SBarry Smith ii = a->i; 25836861f193SBarry Smith 25846861f193SBarry Smith for (i = 0; i < m; i++) { 25856861f193SBarry Smith jrow = ii[i]; 25866861f193SBarry Smith n = ii[i + 1] - jrow; 25876861f193SBarry Smith sums = y + dof * i; 25886861f193SBarry Smith for (j = 0; j < n; j++) { 2589ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 25906861f193SBarry Smith jrow++; 25916861f193SBarry Smith } 25926861f193SBarry Smith } 25936861f193SBarry Smith 25949566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 25959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 25976861f193SBarry Smith PetscFunctionReturn(0); 25986861f193SBarry Smith } 25996861f193SBarry Smith 26009371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 26016861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26026861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26036861f193SBarry Smith const PetscScalar *x, *v; 26046861f193SBarry Smith PetscScalar *y, *sums; 26056861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 26066861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 26076861f193SBarry Smith 26086861f193SBarry Smith PetscFunctionBegin; 26099566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 26109566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26119566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 26126861f193SBarry Smith idx = a->j; 26136861f193SBarry Smith v = a->a; 26146861f193SBarry Smith ii = a->i; 26156861f193SBarry Smith 26166861f193SBarry Smith for (i = 0; i < m; i++) { 26176861f193SBarry Smith jrow = ii[i]; 26186861f193SBarry Smith n = ii[i + 1] - jrow; 26196861f193SBarry Smith sums = y + dof * i; 26206861f193SBarry Smith for (j = 0; j < n; j++) { 2621ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 26226861f193SBarry Smith jrow++; 26236861f193SBarry Smith } 26246861f193SBarry Smith } 26256861f193SBarry Smith 26269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 26296861f193SBarry Smith PetscFunctionReturn(0); 26306861f193SBarry Smith } 26316861f193SBarry Smith 26329371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 26336861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26346861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26356861f193SBarry Smith const PetscScalar *x, *v, *alpha; 26366861f193SBarry Smith PetscScalar *y; 26376861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 26386861f193SBarry Smith PetscInt n, i, k; 26396861f193SBarry Smith 26406861f193SBarry Smith PetscFunctionBegin; 26419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26429566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 26439566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 26446861f193SBarry Smith for (i = 0; i < m; i++) { 26456861f193SBarry Smith idx = a->j + a->i[i]; 26466861f193SBarry Smith v = a->a + a->i[i]; 26476861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 26486861f193SBarry Smith alpha = x + dof * i; 26496861f193SBarry Smith while (n-- > 0) { 2650ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 26519371c9d4SSatish Balay idx++; 26529371c9d4SSatish Balay v++; 26536861f193SBarry Smith } 26546861f193SBarry Smith } 26559566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26569566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 26586861f193SBarry Smith PetscFunctionReturn(0); 26596861f193SBarry Smith } 26606861f193SBarry Smith 26619371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 26626861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26636861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26646861f193SBarry Smith const PetscScalar *x, *v, *alpha; 26656861f193SBarry Smith PetscScalar *y; 26666861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 26676861f193SBarry Smith PetscInt n, i, k; 26686861f193SBarry Smith 26696861f193SBarry Smith PetscFunctionBegin; 26709566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 26719566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26729566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 26736861f193SBarry Smith for (i = 0; i < m; i++) { 26746861f193SBarry Smith idx = a->j + a->i[i]; 26756861f193SBarry Smith v = a->a + a->i[i]; 26766861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 26776861f193SBarry Smith alpha = x + dof * i; 26786861f193SBarry Smith while (n-- > 0) { 2679ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 26809371c9d4SSatish Balay idx++; 26819371c9d4SSatish Balay v++; 26826861f193SBarry Smith } 26836861f193SBarry Smith } 26849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 26876861f193SBarry Smith PetscFunctionReturn(0); 26886861f193SBarry Smith } 26896861f193SBarry Smith 2690f4a53059SBarry Smith /*===================================================================================*/ 26919371c9d4SSatish Balay PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 26924cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2693f4a53059SBarry Smith 2694b24ad042SBarry Smith PetscFunctionBegin; 2695f4a53059SBarry Smith /* start the scatter */ 26969566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 26979566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 26989566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 26999566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 2700f4a53059SBarry Smith PetscFunctionReturn(0); 2701f4a53059SBarry Smith } 2702f4a53059SBarry Smith 27039371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 27044cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2705b24ad042SBarry Smith 27064cb9d9b8SBarry Smith PetscFunctionBegin; 27079566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 27089566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 27099566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 27109566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 27114cb9d9b8SBarry Smith PetscFunctionReturn(0); 27124cb9d9b8SBarry Smith } 27134cb9d9b8SBarry Smith 27149371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 27154cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 27164cb9d9b8SBarry Smith 2717b24ad042SBarry Smith PetscFunctionBegin; 27184cb9d9b8SBarry Smith /* start the scatter */ 27199566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27209566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 27219566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27229566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 27234cb9d9b8SBarry Smith PetscFunctionReturn(0); 27244cb9d9b8SBarry Smith } 27254cb9d9b8SBarry Smith 27269371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 27274cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2728b24ad042SBarry Smith 27294cb9d9b8SBarry Smith PetscFunctionBegin; 27309566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 27319566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 27329566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 27339566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 27344cb9d9b8SBarry Smith PetscFunctionReturn(0); 27354cb9d9b8SBarry Smith } 27364cb9d9b8SBarry Smith 273795ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 27389371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) { 27394222ddf1SHong Zhang Mat_Product *product = C->product; 27404222ddf1SHong Zhang 27414222ddf1SHong Zhang PetscFunctionBegin; 27424222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 27434222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 274498921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 27454222ddf1SHong Zhang PetscFunctionReturn(0); 27464222ddf1SHong Zhang } 27474222ddf1SHong Zhang 27489371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) { 27494222ddf1SHong Zhang Mat_Product *product = C->product; 27504222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 27514222ddf1SHong Zhang Mat A = product->A, P = product->B; 27524222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 27534222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 27544222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 27554222ddf1SHong Zhang PetscInt nalg = 4; 27564222ddf1SHong Zhang #else 27574222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 27584222ddf1SHong Zhang PetscInt nalg = 5; 27594222ddf1SHong Zhang #endif 27604222ddf1SHong Zhang 27614222ddf1SHong Zhang PetscFunctionBegin; 276208401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 27634222ddf1SHong Zhang 27644222ddf1SHong Zhang /* PtAP */ 27654222ddf1SHong Zhang /* Check matrix local sizes */ 27669371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 27679371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 27689371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 27699371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 27704222ddf1SHong Zhang 27714222ddf1SHong Zhang /* Set the default algorithm */ 27729566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 277348a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 27744222ddf1SHong Zhang 27754222ddf1SHong Zhang /* Get runtime option */ 2776d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 27779566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 277848a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2779d0609cedSBarry Smith PetscOptionsEnd(); 27804222ddf1SHong Zhang 27819566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 27824222ddf1SHong Zhang if (flg) { 27834222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 27844222ddf1SHong Zhang PetscFunctionReturn(0); 27854222ddf1SHong Zhang } 27864222ddf1SHong Zhang 27879566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 27884222ddf1SHong Zhang if (flg) { 27894222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 27904222ddf1SHong Zhang PetscFunctionReturn(0); 27914222ddf1SHong Zhang } 27924222ddf1SHong Zhang 27934222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 27949566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 27959566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 27969566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 27974222ddf1SHong Zhang PetscFunctionReturn(0); 27984222ddf1SHong Zhang } 27994222ddf1SHong Zhang 28004222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 28019371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) { 28020298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 28037ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 28047ba1a0bfSKris Buschelman Mat P = pp->AIJ; 28057ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 2806d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 28077ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 2808d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 2809d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 28107ba1a0bfSKris Buschelman MatScalar *ca; 2811d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 28127ba1a0bfSKris Buschelman 28137ba1a0bfSKris Buschelman PetscFunctionBegin; 28147ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 28159566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 28167ba1a0bfSKris Buschelman 28177ba1a0bfSKris Buschelman cn = pn * ppdof; 28187ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 28197ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 28209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 28217ba1a0bfSKris Buschelman ci[0] = 0; 28227ba1a0bfSKris Buschelman 28237ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 28249566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 28259566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 28269566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 28277ba1a0bfSKris Buschelman 28287ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 28297ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 28307ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 28319566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 28327ba1a0bfSKris Buschelman current_space = free_space; 28337ba1a0bfSKris Buschelman 28347ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 28357ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 28367ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 28377ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 28387ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 28397ba1a0bfSKris Buschelman ptanzi = 0; 28407ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 28417ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 28427ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 28437ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 28447ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 28457ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 28467ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 28477ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 28487ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 28497ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 28507ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 28517ba1a0bfSKris Buschelman } 28527ba1a0bfSKris Buschelman } 28537ba1a0bfSKris Buschelman } 28547ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 28557ba1a0bfSKris Buschelman ptaj = ptasparserow; 28567ba1a0bfSKris Buschelman cnzi = 0; 28577ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 28587ba1a0bfSKris Buschelman /* Get offset within block of P */ 28597ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 28607ba1a0bfSKris Buschelman /* Get block row of P */ 28617ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 28627ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 28637ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 28647ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 28657ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 28667ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 28677ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 28687ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 28697ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 28707ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 28717ba1a0bfSKris Buschelman } 28727ba1a0bfSKris Buschelman } 28737ba1a0bfSKris Buschelman } 28747ba1a0bfSKris Buschelman 28757ba1a0bfSKris Buschelman /* sort sparserow */ 28769566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 28777ba1a0bfSKris Buschelman 28787ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 28797ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 288048a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 28817ba1a0bfSKris Buschelman 28827ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 28839566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 288426fbe8dcSKarl Rupp 28857ba1a0bfSKris Buschelman current_space->array += cnzi; 28867ba1a0bfSKris Buschelman current_space->local_used += cnzi; 28877ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 28887ba1a0bfSKris Buschelman 288926fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 289026fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 289126fbe8dcSKarl Rupp 28927ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 28937ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 28947ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 28957ba1a0bfSKris Buschelman } 28967ba1a0bfSKris Buschelman } 28977ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 28987ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 28997ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 29009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 29019566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 29029566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 29037ba1a0bfSKris Buschelman 29047ba1a0bfSKris Buschelman /* Allocate space for ca */ 29059566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 29067ba1a0bfSKris Buschelman 29077ba1a0bfSKris Buschelman /* put together the new matrix */ 29089566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 29099566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 29107ba1a0bfSKris Buschelman 29117ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 29127ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 29134222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 2914e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2915e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 29167ba1a0bfSKris Buschelman c->nonew = 0; 291726fbe8dcSKarl Rupp 29184222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29194222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 29207ba1a0bfSKris Buschelman 29217ba1a0bfSKris Buschelman /* Clean up. */ 29229566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 29237ba1a0bfSKris Buschelman PetscFunctionReturn(0); 29247ba1a0bfSKris Buschelman } 29257ba1a0bfSKris Buschelman 29269371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) { 29277ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 29287ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 29297ba1a0bfSKris Buschelman Mat P = pp->AIJ; 29307ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29317ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 29327ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 2933a2ea699eSBarry Smith const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 2934d2840e09SBarry Smith const PetscInt *ci = c->i, *cj = c->j, *cjj; 2935d2840e09SBarry Smith const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 2936d2840e09SBarry Smith PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 2937a2ea699eSBarry Smith const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 2938d2840e09SBarry Smith MatScalar *ca = c->a, *caj, *apa; 29397ba1a0bfSKris Buschelman 29407ba1a0bfSKris Buschelman PetscFunctionBegin; 29417ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 29429566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 29437ba1a0bfSKris Buschelman 29447ba1a0bfSKris Buschelman /* Clear old values in C */ 29459566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 29467ba1a0bfSKris Buschelman 29477ba1a0bfSKris Buschelman for (i = 0; i < am; i++) { 29487ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 29497ba1a0bfSKris Buschelman anzi = ai[i + 1] - ai[i]; 29507ba1a0bfSKris Buschelman apnzj = 0; 29517ba1a0bfSKris Buschelman for (j = 0; j < anzi; j++) { 29527ba1a0bfSKris Buschelman /* Get offset within block of P */ 29537ba1a0bfSKris Buschelman pshift = *aj % ppdof; 29547ba1a0bfSKris Buschelman /* Get block row of P */ 29557ba1a0bfSKris Buschelman prow = *aj++ / ppdof; /* integer division */ 29567ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 29577ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29587ba1a0bfSKris Buschelman paj = pa + pi[prow]; 29597ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 29607ba1a0bfSKris Buschelman poffset = pjj[k] * ppdof + pshift; 29617ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 29627ba1a0bfSKris Buschelman apjdense[poffset] = -1; 29637ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 29647ba1a0bfSKris Buschelman } 29657ba1a0bfSKris Buschelman apa[poffset] += (*aa) * paj[k]; 29667ba1a0bfSKris Buschelman } 29679566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 29687ba1a0bfSKris Buschelman aa++; 29697ba1a0bfSKris Buschelman } 29707ba1a0bfSKris Buschelman 29717ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 29727ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 29739566063dSJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 29747ba1a0bfSKris Buschelman 29757ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 29767ba1a0bfSKris Buschelman prow = i / ppdof; /* integer division */ 29777ba1a0bfSKris Buschelman pshift = i % ppdof; 29787ba1a0bfSKris Buschelman poffset = pi[prow]; 29797ba1a0bfSKris Buschelman pnzi = pi[prow + 1] - poffset; 29807ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 29817ba1a0bfSKris Buschelman pJ = pj + poffset; 29827ba1a0bfSKris Buschelman pA = pa + poffset; 29837ba1a0bfSKris Buschelman for (j = 0; j < pnzi; j++) { 29847ba1a0bfSKris Buschelman crow = (*pJ) * ppdof + pshift; 29857ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 29867ba1a0bfSKris Buschelman caj = ca + ci[crow]; 29877ba1a0bfSKris Buschelman pJ++; 29887ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 29897ba1a0bfSKris Buschelman for (k = 0, nextap = 0; nextap < apnzj; k++) { 299026fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 29917ba1a0bfSKris Buschelman } 29929566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 29937ba1a0bfSKris Buschelman pA++; 29947ba1a0bfSKris Buschelman } 29957ba1a0bfSKris Buschelman 29967ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 29977ba1a0bfSKris Buschelman for (j = 0; j < apnzj; j++) { 29987ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 29997ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 30007ba1a0bfSKris Buschelman } 30017ba1a0bfSKris Buschelman } 30027ba1a0bfSKris Buschelman 30037ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 30049566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 30059566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 30069566063dSJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 300795ddefa2SHong Zhang PetscFunctionReturn(0); 300895ddefa2SHong Zhang } 30097ba1a0bfSKris Buschelman 30109371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) { 30114222ddf1SHong Zhang Mat_Product *product = C->product; 30124222ddf1SHong Zhang Mat A = product->A, P = product->B; 30132121bac1SHong Zhang 30142121bac1SHong Zhang PetscFunctionBegin; 30159566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 30162121bac1SHong Zhang PetscFunctionReturn(0); 30172121bac1SHong Zhang } 30182121bac1SHong Zhang 30199371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) { 302095ddefa2SHong Zhang PetscFunctionBegin; 30213e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 302295ddefa2SHong Zhang } 302395ddefa2SHong Zhang 30249371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) { 302595ddefa2SHong Zhang PetscFunctionBegin; 30263e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 30277ba1a0bfSKris Buschelman } 30285c65b9ecSFande Kong 3029bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 3030bc8e477aSFande Kong 30319371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) { 3032bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3033bc8e477aSFande Kong 3034bc8e477aSFande Kong PetscFunctionBegin; 303534bcad68SFande Kong 30369566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 3037bc8e477aSFande Kong PetscFunctionReturn(0); 3038bc8e477aSFande Kong } 3039bc8e477aSFande Kong 30404222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 3041bc8e477aSFande Kong 30429371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) { 3043bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3044bc8e477aSFande Kong 3045bc8e477aSFande Kong PetscFunctionBegin; 30469566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 30474222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3048bc8e477aSFande Kong PetscFunctionReturn(0); 3049bc8e477aSFande Kong } 3050bc8e477aSFande Kong 3051bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 3052bc8e477aSFande Kong 30539371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) { 3054bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3055bc8e477aSFande Kong 3056bc8e477aSFande Kong PetscFunctionBegin; 305734bcad68SFande Kong 30589566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 3059bc8e477aSFande Kong PetscFunctionReturn(0); 3060bc8e477aSFande Kong } 3061bc8e477aSFande Kong 30624222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 3063bc8e477aSFande Kong 30649371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) { 3065bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3066bc8e477aSFande Kong 3067bc8e477aSFande Kong PetscFunctionBegin; 306834bcad68SFande Kong 30699566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 30704222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3071bc8e477aSFande Kong PetscFunctionReturn(0); 3072bc8e477aSFande Kong } 3073bc8e477aSFande Kong 30749371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) { 30754222ddf1SHong Zhang Mat_Product *product = C->product; 30764222ddf1SHong Zhang Mat A = product->A, P = product->B; 30774222ddf1SHong Zhang PetscBool flg; 3078bc8e477aSFande Kong 3079bc8e477aSFande Kong PetscFunctionBegin; 30809566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 30814222ddf1SHong Zhang if (flg) { 30829566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 30834222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 30844222ddf1SHong Zhang PetscFunctionReturn(0); 3085bc8e477aSFande Kong } 308634bcad68SFande Kong 30879566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 30884222ddf1SHong Zhang if (flg) { 30899566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 30904222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 30914222ddf1SHong Zhang PetscFunctionReturn(0); 30924222ddf1SHong Zhang } 309334bcad68SFande Kong 30944222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 3095bc8e477aSFande Kong } 3096bc8e477aSFande Kong 30979371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 30989c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3099ceb03754SKris Buschelman Mat a = b->AIJ, B; 31009c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 31010fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 3102ba8c8a56SBarry Smith PetscInt *cols; 3103ba8c8a56SBarry Smith PetscScalar *vals; 31049c4fc4c7SBarry Smith 31059c4fc4c7SBarry Smith PetscFunctionBegin; 31069566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 31079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 31089c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 31099c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 311026fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 31119c4fc4c7SBarry Smith } 31129566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 31139566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 31149566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 31159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 31169566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 31179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 31189c4fc4c7SBarry Smith ii = 0; 31199c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 31209566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 31210fd73130SBarry Smith for (j = 0; j < dof; j++) { 312226fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 31239566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 31249c4fc4c7SBarry Smith ii++; 31259c4fc4c7SBarry Smith } 31269566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 31279c4fc4c7SBarry Smith } 31289566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 31299566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 31309566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3131ceb03754SKris Buschelman 3132511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 31339566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 3134c3d102feSKris Buschelman } else { 3135ceb03754SKris Buschelman *newmat = B; 3136c3d102feSKris Buschelman } 31379c4fc4c7SBarry Smith PetscFunctionReturn(0); 31389c4fc4c7SBarry Smith } 31399c4fc4c7SBarry Smith 3140c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3141be1d678aSKris Buschelman 31429371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 31430fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 3144ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 31450fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 31460fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 31470fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 31480fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 31490298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 31500298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 31510fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 31520fd73130SBarry Smith PetscScalar *vals, *ovals; 31530fd73130SBarry Smith 31540fd73130SBarry Smith PetscFunctionBegin; 31559566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 3156d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 31570fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 31580fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 31590fd73130SBarry Smith for (j = 0; j < dof; j++) { 31600fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 31610fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 31620fd73130SBarry Smith } 31630fd73130SBarry Smith } 31649566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 31659566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 31669566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 31679566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 31689566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 31699566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 31700fd73130SBarry Smith 31719566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 3172d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 3173d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 31740fd73130SBarry Smith garray = mpiaij->garray; 31750fd73130SBarry Smith 31760fd73130SBarry Smith ii = rstart; 3177d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 31789566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 31799566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 31800fd73130SBarry Smith for (j = 0; j < dof; j++) { 3181ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 3182ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 31839566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 31849566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 31850fd73130SBarry Smith ii++; 31860fd73130SBarry Smith } 31879566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 31889566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 31890fd73130SBarry Smith } 31909566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 31910fd73130SBarry Smith 31929566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 31939566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3194ceb03754SKris Buschelman 3195511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 31967adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 31977adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 319826fbe8dcSKarl Rupp 31999566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 320026fbe8dcSKarl Rupp 32017adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3202c3d102feSKris Buschelman } else { 3203ceb03754SKris Buschelman *newmat = B; 3204c3d102feSKris Buschelman } 32050fd73130SBarry Smith PetscFunctionReturn(0); 32060fd73130SBarry Smith } 32070fd73130SBarry Smith 32089371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 32099e516c8fSBarry Smith Mat A; 32109e516c8fSBarry Smith 32119e516c8fSBarry Smith PetscFunctionBegin; 32129566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 32139566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 32149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 32159e516c8fSBarry Smith PetscFunctionReturn(0); 32169e516c8fSBarry Smith } 32170fd73130SBarry Smith 32189371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) { 3219ec626240SStefano Zampini Mat A; 3220ec626240SStefano Zampini 3221ec626240SStefano Zampini PetscFunctionBegin; 32229566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 32239566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 32249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 3225ec626240SStefano Zampini PetscFunctionReturn(0); 3226ec626240SStefano Zampini } 3227ec626240SStefano Zampini 3228bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3229480dffcfSJed Brown /*@ 32300bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 32310bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 323211a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 323311a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 32340bad9183SKris Buschelman 3235ff585edeSJed Brown Collective 3236ff585edeSJed Brown 3237ff585edeSJed Brown Input Parameters: 323811a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 3239ff585edeSJed Brown - dof - the block size (number of components per node) 3240ff585edeSJed Brown 3241ff585edeSJed Brown Output Parameter: 324211a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 3243ff585edeSJed Brown 32440bad9183SKris Buschelman Operations provided: 324567b8a455SSatish Balay .vb 324611a5261eSBarry Smith MatMult() 324711a5261eSBarry Smith MatMultTranspose() 324811a5261eSBarry Smith MatMultAdd() 324911a5261eSBarry Smith MatMultTransposeAdd() 325011a5261eSBarry Smith MatView() 325167b8a455SSatish Balay .ve 32520bad9183SKris Buschelman 32530bad9183SKris Buschelman Level: advanced 32540bad9183SKris Buschelman 325511a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3256ff585edeSJed Brown @*/ 32579371c9d4SSatish Balay PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) { 3258b24ad042SBarry Smith PetscInt n; 325982b1193eSBarry Smith Mat B; 326090f67b22SBarry Smith PetscBool flg; 3261fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3262fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3263fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3264fdc842d1SBarry Smith #endif 326582b1193eSBarry Smith 326682b1193eSBarry Smith PetscFunctionBegin; 3267fdc842d1SBarry Smith dof = PetscAbs(dof); 32689566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 3269d72c5c08SBarry Smith 327026fbe8dcSKarl Rupp if (dof == 1) *maij = A; 327126fbe8dcSKarl Rupp else { 32729566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3273c248e2fdSStefano Zampini /* propagate vec type */ 32749566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 32759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 32769566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 32779566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 32789566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 32799566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 328026fbe8dcSKarl Rupp 3281cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3282d72c5c08SBarry Smith 328390f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 328490f67b22SBarry Smith if (flg) { 3285feb9fe23SJed Brown Mat_SeqMAIJ *b; 3286feb9fe23SJed Brown 32879566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 328826fbe8dcSKarl Rupp 32890298fd71SBarry Smith B->ops->setup = NULL; 32904cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 32910fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 32924222ddf1SHong Zhang 3293feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 3294b9b97703SBarry Smith b->dof = dof; 32954cb9d9b8SBarry Smith b->AIJ = A; 329626fbe8dcSKarl Rupp 3297d72c5c08SBarry Smith if (dof == 2) { 3298cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3299cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3300cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3301cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3302bcc973b7SBarry Smith } else if (dof == 3) { 3303bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3304bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3305bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3306bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3307bcc973b7SBarry Smith } else if (dof == 4) { 3308bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3309bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3310bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3311bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3312f9fae5adSBarry Smith } else if (dof == 5) { 3313f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3314f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3315f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3316f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 33176cd98798SBarry Smith } else if (dof == 6) { 33186cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 33196cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 33206cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 33216cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3322ed8eea03SBarry Smith } else if (dof == 7) { 3323ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3324ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3325ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3326ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 332766ed3db0SBarry Smith } else if (dof == 8) { 332866ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 332966ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 333066ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 333166ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 33322b692628SMatthew Knepley } else if (dof == 9) { 33332b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 33342b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 33352b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 33362b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3337b9cda22cSBarry Smith } else if (dof == 10) { 3338b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3339b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3340b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3341b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3342dbdd7285SBarry Smith } else if (dof == 11) { 3343dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3344dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3345dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3346dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 33472f7816d4SBarry Smith } else if (dof == 16) { 33482f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 33492f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 33502f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 33512f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3352ed1c418cSBarry Smith } else if (dof == 18) { 3353ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3354ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3355ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3356ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 335782b1193eSBarry Smith } else { 33586861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 33596861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 33606861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 33616861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 336282b1193eSBarry Smith } 3363fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 33649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 3365fdc842d1SBarry Smith #endif 33669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 33679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3368f4a53059SBarry Smith } else { 3369f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3370feb9fe23SJed Brown Mat_MPIMAIJ *b; 3371f4a53059SBarry Smith IS from, to; 3372f4a53059SBarry Smith Vec gvec; 3373f4a53059SBarry Smith 33749566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 337526fbe8dcSKarl Rupp 33760298fd71SBarry Smith B->ops->setup = NULL; 3377d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 33780fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 337926fbe8dcSKarl Rupp 3380b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 3381b9b97703SBarry Smith b->dof = dof; 3382b9b97703SBarry Smith b->A = A; 338326fbe8dcSKarl Rupp 33849566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 33859566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 33864cb9d9b8SBarry Smith 33879566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 33889566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 33899566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 33909566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 33919566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 3392f4a53059SBarry Smith 3393f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 33949566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 33959566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 3396f4a53059SBarry Smith 3397f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 33989566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 3399f4a53059SBarry Smith 3400f4a53059SBarry Smith /* generate the scatter context */ 34019566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 3402f4a53059SBarry Smith 34039566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 34049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 34059566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 3406f4a53059SBarry Smith 3407f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 34084cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 34094cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 34104cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 341126fbe8dcSKarl Rupp 3412fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 34139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 3414fdc842d1SBarry Smith #endif 34159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 34169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3417f4a53059SBarry Smith } 34187dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3419ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 34209566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3421fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3422fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 3423fdc842d1SBarry Smith { 3424fdc842d1SBarry Smith PetscBool flg; 3425fdc842d1SBarry Smith if (convert) { 34269566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 342748a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 3428fdc842d1SBarry Smith } 3429fdc842d1SBarry Smith } 3430fdc842d1SBarry Smith #endif 3431cd3bbe55SBarry Smith *maij = B; 3432d72c5c08SBarry Smith } 343382b1193eSBarry Smith PetscFunctionReturn(0); 343482b1193eSBarry Smith } 3435