1be1d678aSKris Buschelman 2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 482b1193eSBarry Smith 5b350b9acSSatish Balay /*@ 6*11a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7ff585edeSJed Brown 8*11a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9ff585edeSJed Brown 10ff585edeSJed Brown Input Parameter: 11*11a5261eSBarry Smith . A - the `MATMAIJ` matrix 12ff585edeSJed Brown 13ff585edeSJed Brown Output Parameter: 14*11a5261eSBarry Smith . B - the `MATAIJ` matrix 15ff585edeSJed Brown 16ff585edeSJed Brown Level: advanced 17ff585edeSJed Brown 18*11a5261eSBarry Smith Note: 19*11a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20ff585edeSJed Brown 21*11a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22ff585edeSJed Brown @*/ 239371c9d4SSatish Balay PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) { 24ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 25b9b97703SBarry Smith 26b9b97703SBarry Smith PetscFunctionBegin; 279566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 29b9b97703SBarry Smith if (ismpimaij) { 30b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 31b9b97703SBarry Smith 32b9b97703SBarry Smith *B = b->A; 33b9b97703SBarry Smith } else if (isseqmaij) { 34b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 35b9b97703SBarry Smith 36b9b97703SBarry Smith *B = b->AIJ; 37b9b97703SBarry Smith } else { 38b9b97703SBarry Smith *B = A; 39b9b97703SBarry Smith } 40b9b97703SBarry Smith PetscFunctionReturn(0); 41b9b97703SBarry Smith } 42b9b97703SBarry Smith 43480dffcfSJed Brown /*@ 44*11a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 45ff585edeSJed Brown 463f9fe445SBarry Smith Logically Collective 47ff585edeSJed Brown 48d8d19677SJose E. Roman Input Parameters: 49*11a5261eSBarry Smith + A - the `MATMAIJ` matrix 50ff585edeSJed Brown - dof - the block size for the new matrix 51ff585edeSJed Brown 52ff585edeSJed Brown Output Parameter: 53*11a5261eSBarry Smith . B - the new `MATMAIJ` matrix 54ff585edeSJed Brown 55ff585edeSJed Brown Level: advanced 56ff585edeSJed Brown 57*11a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()` 58ff585edeSJed Brown @*/ 599371c9d4SSatish Balay PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) { 600298fd71SBarry Smith Mat Aij = NULL; 61b9b97703SBarry Smith 62b9b97703SBarry Smith PetscFunctionBegin; 63c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 649566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 659566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 66b9b97703SBarry Smith PetscFunctionReturn(0); 67b9b97703SBarry Smith } 68b9b97703SBarry Smith 699371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqMAIJ(Mat A) { 704cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7182b1193eSBarry Smith 7282b1193eSBarry Smith PetscFunctionBegin; 739566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 749566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 752e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 779566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 784cb9d9b8SBarry Smith PetscFunctionReturn(0); 794cb9d9b8SBarry Smith } 804cb9d9b8SBarry Smith 819371c9d4SSatish Balay PetscErrorCode MatSetUp_MAIJ(Mat A) { 82356c569eSBarry Smith PetscFunctionBegin; 83ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 84356c569eSBarry Smith } 85356c569eSBarry Smith 869371c9d4SSatish Balay PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) { 870fd73130SBarry Smith Mat B; 880fd73130SBarry Smith 890fd73130SBarry Smith PetscFunctionBegin; 909566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 919566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 929566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 930fd73130SBarry Smith PetscFunctionReturn(0); 940fd73130SBarry Smith } 950fd73130SBarry Smith 969371c9d4SSatish Balay PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) { 970fd73130SBarry Smith Mat B; 980fd73130SBarry Smith 990fd73130SBarry Smith PetscFunctionBegin; 1009566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1019566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1030fd73130SBarry Smith PetscFunctionReturn(0); 1040fd73130SBarry Smith } 1050fd73130SBarry Smith 1069371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIMAIJ(Mat A) { 1074cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1084cb9d9b8SBarry Smith 1094cb9d9b8SBarry Smith PetscFunctionBegin; 1109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1129566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1139566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1149566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1159566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1162e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1199566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 120b9b97703SBarry Smith PetscFunctionReturn(0); 121b9b97703SBarry Smith } 122b9b97703SBarry Smith 1230bad9183SKris Buschelman /*MC 124fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1250bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 126*11a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1270bad9183SKris Buschelman 1280bad9183SKris Buschelman Operations provided: 12967b8a455SSatish Balay .vb 130*11a5261eSBarry Smith MatMult() 131*11a5261eSBarry Smith MatMultTranspose() 132*11a5261eSBarry Smith MatMultAdd() 133*11a5261eSBarry Smith MatMultTransposeAdd() 13467b8a455SSatish Balay .ve 1350bad9183SKris Buschelman 1360bad9183SKris Buschelman Level: advanced 1370bad9183SKris Buschelman 138*11a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1390bad9183SKris Buschelman M*/ 1400bad9183SKris Buschelman 1419371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) { 1424cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 14364b52464SHong Zhang PetscMPIInt size; 14482b1193eSBarry Smith 14582b1193eSBarry Smith PetscFunctionBegin; 1469566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A, &b)); 147b0a32e0cSBarry Smith A->data = (void *)b; 14826fbe8dcSKarl Rupp 1499566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15026fbe8dcSKarl Rupp 151356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 152f4a53059SBarry Smith 153f4259b30SLisandro Dalcin b->AIJ = NULL; 154cd3bbe55SBarry Smith b->dof = 0; 155f4259b30SLisandro Dalcin b->OAIJ = NULL; 156f4259b30SLisandro Dalcin b->ctx = NULL; 157f4259b30SLisandro Dalcin b->w = NULL; 1589566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 15964b52464SHong Zhang if (size == 1) { 1609566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16164b52464SHong Zhang } else { 1629566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 16364b52464SHong Zhang } 16432e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 16532e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 16682b1193eSBarry Smith PetscFunctionReturn(0); 16782b1193eSBarry Smith } 16882b1193eSBarry Smith 169cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1709371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 1714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 172bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 173d2840e09SBarry Smith const PetscScalar *x, *v; 174d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 175d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 176d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 17782b1193eSBarry Smith 178bcc973b7SBarry Smith PetscFunctionBegin; 1799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1809566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 181bcc973b7SBarry Smith idx = a->j; 182bcc973b7SBarry Smith v = a->a; 183bcc973b7SBarry Smith ii = a->i; 184bcc973b7SBarry Smith 185bcc973b7SBarry Smith for (i = 0; i < m; i++) { 186bcc973b7SBarry Smith jrow = ii[i]; 187bcc973b7SBarry Smith n = ii[i + 1] - jrow; 188bcc973b7SBarry Smith sum1 = 0.0; 189bcc973b7SBarry Smith sum2 = 0.0; 19026fbe8dcSKarl Rupp 191b3c51c66SMatthew Knepley nonzerorow += (n > 0); 192bcc973b7SBarry Smith for (j = 0; j < n; j++) { 193bcc973b7SBarry Smith sum1 += v[jrow] * x[2 * idx[jrow]]; 194bcc973b7SBarry Smith sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 195bcc973b7SBarry Smith jrow++; 196bcc973b7SBarry Smith } 197bcc973b7SBarry Smith y[2 * i] = sum1; 198bcc973b7SBarry Smith y[2 * i + 1] = sum2; 199bcc973b7SBarry Smith } 200bcc973b7SBarry Smith 2019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow)); 2029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 20482b1193eSBarry Smith PetscFunctionReturn(0); 20582b1193eSBarry Smith } 206bcc973b7SBarry Smith 2079371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 2084cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 209bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 210d2840e09SBarry Smith const PetscScalar *x, *v; 211d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2; 212d2840e09SBarry Smith PetscInt n, i; 213d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 21482b1193eSBarry Smith 215bcc973b7SBarry Smith PetscFunctionBegin; 2169566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 2179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2189566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 219bfec09a0SHong Zhang 220bcc973b7SBarry Smith for (i = 0; i < m; i++) { 221bfec09a0SHong Zhang idx = a->j + a->i[i]; 222bfec09a0SHong Zhang v = a->a + a->i[i]; 223bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 224bcc973b7SBarry Smith alpha1 = x[2 * i]; 225bcc973b7SBarry Smith alpha2 = x[2 * i + 1]; 22626fbe8dcSKarl Rupp while (n-- > 0) { 22726fbe8dcSKarl Rupp y[2 * (*idx)] += alpha1 * (*v); 22826fbe8dcSKarl Rupp y[2 * (*idx) + 1] += alpha2 * (*v); 2299371c9d4SSatish Balay idx++; 2309371c9d4SSatish Balay v++; 23126fbe8dcSKarl Rupp } 232bcc973b7SBarry Smith } 2339566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 2349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 23682b1193eSBarry Smith PetscFunctionReturn(0); 23782b1193eSBarry Smith } 238bcc973b7SBarry Smith 2399371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 2404cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 241bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 242d2840e09SBarry Smith const PetscScalar *x, *v; 243d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 244b24ad042SBarry Smith PetscInt n, i, jrow, j; 245d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 24682b1193eSBarry Smith 247bcc973b7SBarry Smith PetscFunctionBegin; 2489566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 2499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2509566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 251bcc973b7SBarry Smith idx = a->j; 252bcc973b7SBarry Smith v = a->a; 253bcc973b7SBarry Smith ii = a->i; 254bcc973b7SBarry Smith 255bcc973b7SBarry Smith for (i = 0; i < m; i++) { 256bcc973b7SBarry Smith jrow = ii[i]; 257bcc973b7SBarry Smith n = ii[i + 1] - jrow; 258bcc973b7SBarry Smith sum1 = 0.0; 259bcc973b7SBarry Smith sum2 = 0.0; 260bcc973b7SBarry Smith for (j = 0; j < n; j++) { 261bcc973b7SBarry Smith sum1 += v[jrow] * x[2 * idx[jrow]]; 262bcc973b7SBarry Smith sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 263bcc973b7SBarry Smith jrow++; 264bcc973b7SBarry Smith } 265bcc973b7SBarry Smith y[2 * i] += sum1; 266bcc973b7SBarry Smith y[2 * i + 1] += sum2; 267bcc973b7SBarry Smith } 268bcc973b7SBarry Smith 2699566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 272bcc973b7SBarry Smith PetscFunctionReturn(0); 27382b1193eSBarry Smith } 2749371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 2754cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 276bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 277d2840e09SBarry Smith const PetscScalar *x, *v; 278d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2; 279d2840e09SBarry Smith PetscInt n, i; 280d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 28182b1193eSBarry Smith 282bcc973b7SBarry Smith PetscFunctionBegin; 2839566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 2849566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2859566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 286bfec09a0SHong Zhang 287bcc973b7SBarry Smith for (i = 0; i < m; i++) { 288bfec09a0SHong Zhang idx = a->j + a->i[i]; 289bfec09a0SHong Zhang v = a->a + a->i[i]; 290bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 291bcc973b7SBarry Smith alpha1 = x[2 * i]; 292bcc973b7SBarry Smith alpha2 = x[2 * i + 1]; 29326fbe8dcSKarl Rupp while (n-- > 0) { 29426fbe8dcSKarl Rupp y[2 * (*idx)] += alpha1 * (*v); 29526fbe8dcSKarl Rupp y[2 * (*idx) + 1] += alpha2 * (*v); 2969371c9d4SSatish Balay idx++; 2979371c9d4SSatish Balay v++; 29826fbe8dcSKarl Rupp } 299bcc973b7SBarry Smith } 3009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 3019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3029566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 303bcc973b7SBarry Smith PetscFunctionReturn(0); 30482b1193eSBarry Smith } 305cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3069371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 3074cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 308bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 309d2840e09SBarry Smith const PetscScalar *x, *v; 310d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3; 311d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 312d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 31382b1193eSBarry Smith 314bcc973b7SBarry Smith PetscFunctionBegin; 3159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3169566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 317bcc973b7SBarry Smith idx = a->j; 318bcc973b7SBarry Smith v = a->a; 319bcc973b7SBarry Smith ii = a->i; 320bcc973b7SBarry Smith 321bcc973b7SBarry Smith for (i = 0; i < m; i++) { 322bcc973b7SBarry Smith jrow = ii[i]; 323bcc973b7SBarry Smith n = ii[i + 1] - jrow; 324bcc973b7SBarry Smith sum1 = 0.0; 325bcc973b7SBarry Smith sum2 = 0.0; 326bcc973b7SBarry Smith sum3 = 0.0; 32726fbe8dcSKarl Rupp 328b3c51c66SMatthew Knepley nonzerorow += (n > 0); 329bcc973b7SBarry Smith for (j = 0; j < n; j++) { 330bcc973b7SBarry Smith sum1 += v[jrow] * x[3 * idx[jrow]]; 331bcc973b7SBarry Smith sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 332bcc973b7SBarry Smith sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 333bcc973b7SBarry Smith jrow++; 334bcc973b7SBarry Smith } 335bcc973b7SBarry Smith y[3 * i] = sum1; 336bcc973b7SBarry Smith y[3 * i + 1] = sum2; 337bcc973b7SBarry Smith y[3 * i + 2] = sum3; 338bcc973b7SBarry Smith } 339bcc973b7SBarry Smith 3409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow)); 3419566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 343bcc973b7SBarry Smith PetscFunctionReturn(0); 344bcc973b7SBarry Smith } 345bcc973b7SBarry Smith 3469371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 3474cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 348bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 349d2840e09SBarry Smith const PetscScalar *x, *v; 350d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3; 351d2840e09SBarry Smith PetscInt n, i; 352d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 353bcc973b7SBarry Smith 354bcc973b7SBarry Smith PetscFunctionBegin; 3559566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 3569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3579566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 358bfec09a0SHong Zhang 359bcc973b7SBarry Smith for (i = 0; i < m; i++) { 360bfec09a0SHong Zhang idx = a->j + a->i[i]; 361bfec09a0SHong Zhang v = a->a + a->i[i]; 362bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 363bcc973b7SBarry Smith alpha1 = x[3 * i]; 364bcc973b7SBarry Smith alpha2 = x[3 * i + 1]; 365bcc973b7SBarry Smith alpha3 = x[3 * i + 2]; 366bcc973b7SBarry Smith while (n-- > 0) { 367bcc973b7SBarry Smith y[3 * (*idx)] += alpha1 * (*v); 368bcc973b7SBarry Smith y[3 * (*idx) + 1] += alpha2 * (*v); 369bcc973b7SBarry Smith y[3 * (*idx) + 2] += alpha3 * (*v); 3709371c9d4SSatish Balay idx++; 3719371c9d4SSatish Balay v++; 372bcc973b7SBarry Smith } 373bcc973b7SBarry Smith } 3749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 3759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 377bcc973b7SBarry Smith PetscFunctionReturn(0); 378bcc973b7SBarry Smith } 379bcc973b7SBarry Smith 3809371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 3814cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 382bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 383d2840e09SBarry Smith const PetscScalar *x, *v; 384d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3; 385b24ad042SBarry Smith PetscInt n, i, jrow, j; 386d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 387bcc973b7SBarry Smith 388bcc973b7SBarry Smith PetscFunctionBegin; 3899566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 3909566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3919566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 392bcc973b7SBarry Smith idx = a->j; 393bcc973b7SBarry Smith v = a->a; 394bcc973b7SBarry Smith ii = a->i; 395bcc973b7SBarry Smith 396bcc973b7SBarry Smith for (i = 0; i < m; i++) { 397bcc973b7SBarry Smith jrow = ii[i]; 398bcc973b7SBarry Smith n = ii[i + 1] - jrow; 399bcc973b7SBarry Smith sum1 = 0.0; 400bcc973b7SBarry Smith sum2 = 0.0; 401bcc973b7SBarry Smith sum3 = 0.0; 402bcc973b7SBarry Smith for (j = 0; j < n; j++) { 403bcc973b7SBarry Smith sum1 += v[jrow] * x[3 * idx[jrow]]; 404bcc973b7SBarry Smith sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 405bcc973b7SBarry Smith sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 406bcc973b7SBarry Smith jrow++; 407bcc973b7SBarry Smith } 408bcc973b7SBarry Smith y[3 * i] += sum1; 409bcc973b7SBarry Smith y[3 * i + 1] += sum2; 410bcc973b7SBarry Smith y[3 * i + 2] += sum3; 411bcc973b7SBarry Smith } 412bcc973b7SBarry Smith 4139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 4149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 416bcc973b7SBarry Smith PetscFunctionReturn(0); 417bcc973b7SBarry Smith } 4189371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 4194cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 420bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 421d2840e09SBarry Smith const PetscScalar *x, *v; 422d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3; 423d2840e09SBarry Smith PetscInt n, i; 424d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 425bcc973b7SBarry Smith 426bcc973b7SBarry Smith PetscFunctionBegin; 4279566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4299566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 430bcc973b7SBarry Smith for (i = 0; i < m; i++) { 431bfec09a0SHong Zhang idx = a->j + a->i[i]; 432bfec09a0SHong Zhang v = a->a + a->i[i]; 433bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 434bcc973b7SBarry Smith alpha1 = x[3 * i]; 435bcc973b7SBarry Smith alpha2 = x[3 * i + 1]; 436bcc973b7SBarry Smith alpha3 = x[3 * i + 2]; 437bcc973b7SBarry Smith while (n-- > 0) { 438bcc973b7SBarry Smith y[3 * (*idx)] += alpha1 * (*v); 439bcc973b7SBarry Smith y[3 * (*idx) + 1] += alpha2 * (*v); 440bcc973b7SBarry Smith y[3 * (*idx) + 2] += alpha3 * (*v); 4419371c9d4SSatish Balay idx++; 4429371c9d4SSatish Balay v++; 443bcc973b7SBarry Smith } 444bcc973b7SBarry Smith } 4459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 4469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4479566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 448bcc973b7SBarry Smith PetscFunctionReturn(0); 449bcc973b7SBarry Smith } 450bcc973b7SBarry Smith 451bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4529371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 4534cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 454bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 455d2840e09SBarry Smith const PetscScalar *x, *v; 456d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4; 457d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 458d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 459bcc973b7SBarry Smith 460bcc973b7SBarry Smith PetscFunctionBegin; 4619566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4629566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 463bcc973b7SBarry Smith idx = a->j; 464bcc973b7SBarry Smith v = a->a; 465bcc973b7SBarry Smith ii = a->i; 466bcc973b7SBarry Smith 467bcc973b7SBarry Smith for (i = 0; i < m; i++) { 468bcc973b7SBarry Smith jrow = ii[i]; 469bcc973b7SBarry Smith n = ii[i + 1] - jrow; 470bcc973b7SBarry Smith sum1 = 0.0; 471bcc973b7SBarry Smith sum2 = 0.0; 472bcc973b7SBarry Smith sum3 = 0.0; 473bcc973b7SBarry Smith sum4 = 0.0; 474b3c51c66SMatthew Knepley nonzerorow += (n > 0); 475bcc973b7SBarry Smith for (j = 0; j < n; j++) { 476bcc973b7SBarry Smith sum1 += v[jrow] * x[4 * idx[jrow]]; 477bcc973b7SBarry Smith sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 478bcc973b7SBarry Smith sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 479bcc973b7SBarry Smith sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 480bcc973b7SBarry Smith jrow++; 481bcc973b7SBarry Smith } 482bcc973b7SBarry Smith y[4 * i] = sum1; 483bcc973b7SBarry Smith y[4 * i + 1] = sum2; 484bcc973b7SBarry Smith y[4 * i + 2] = sum3; 485bcc973b7SBarry Smith y[4 * i + 3] = sum4; 486bcc973b7SBarry Smith } 487bcc973b7SBarry Smith 4889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow)); 4899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 491bcc973b7SBarry Smith PetscFunctionReturn(0); 492bcc973b7SBarry Smith } 493bcc973b7SBarry Smith 4949371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 4954cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 496bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 497d2840e09SBarry Smith const PetscScalar *x, *v; 498d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 499d2840e09SBarry Smith PetscInt n, i; 500d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 501bcc973b7SBarry Smith 502bcc973b7SBarry Smith PetscFunctionBegin; 5039566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 5049566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5059566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 506bcc973b7SBarry Smith for (i = 0; i < m; i++) { 507bfec09a0SHong Zhang idx = a->j + a->i[i]; 508bfec09a0SHong Zhang v = a->a + a->i[i]; 509bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 510bcc973b7SBarry Smith alpha1 = x[4 * i]; 511bcc973b7SBarry Smith alpha2 = x[4 * i + 1]; 512bcc973b7SBarry Smith alpha3 = x[4 * i + 2]; 513bcc973b7SBarry Smith alpha4 = x[4 * i + 3]; 514bcc973b7SBarry Smith while (n-- > 0) { 515bcc973b7SBarry Smith y[4 * (*idx)] += alpha1 * (*v); 516bcc973b7SBarry Smith y[4 * (*idx) + 1] += alpha2 * (*v); 517bcc973b7SBarry Smith y[4 * (*idx) + 2] += alpha3 * (*v); 518bcc973b7SBarry Smith y[4 * (*idx) + 3] += alpha4 * (*v); 5199371c9d4SSatish Balay idx++; 5209371c9d4SSatish Balay v++; 521bcc973b7SBarry Smith } 522bcc973b7SBarry Smith } 5239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 5249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 526bcc973b7SBarry Smith PetscFunctionReturn(0); 527bcc973b7SBarry Smith } 528bcc973b7SBarry Smith 5299371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 5304cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 531f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 532d2840e09SBarry Smith const PetscScalar *x, *v; 533d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4; 534b24ad042SBarry Smith PetscInt n, i, jrow, j; 535d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 536f9fae5adSBarry Smith 537f9fae5adSBarry Smith PetscFunctionBegin; 5389566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 5399566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5409566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 541f9fae5adSBarry Smith idx = a->j; 542f9fae5adSBarry Smith v = a->a; 543f9fae5adSBarry Smith ii = a->i; 544f9fae5adSBarry Smith 545f9fae5adSBarry Smith for (i = 0; i < m; i++) { 546f9fae5adSBarry Smith jrow = ii[i]; 547f9fae5adSBarry Smith n = ii[i + 1] - jrow; 548f9fae5adSBarry Smith sum1 = 0.0; 549f9fae5adSBarry Smith sum2 = 0.0; 550f9fae5adSBarry Smith sum3 = 0.0; 551f9fae5adSBarry Smith sum4 = 0.0; 552f9fae5adSBarry Smith for (j = 0; j < n; j++) { 553f9fae5adSBarry Smith sum1 += v[jrow] * x[4 * idx[jrow]]; 554f9fae5adSBarry Smith sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 555f9fae5adSBarry Smith sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 556f9fae5adSBarry Smith sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 557f9fae5adSBarry Smith jrow++; 558f9fae5adSBarry Smith } 559f9fae5adSBarry Smith y[4 * i] += sum1; 560f9fae5adSBarry Smith y[4 * i + 1] += sum2; 561f9fae5adSBarry Smith y[4 * i + 2] += sum3; 562f9fae5adSBarry Smith y[4 * i + 3] += sum4; 563f9fae5adSBarry Smith } 564f9fae5adSBarry Smith 5659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 5669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5679566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 568f9fae5adSBarry Smith PetscFunctionReturn(0); 569bcc973b7SBarry Smith } 5709371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 5714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 572f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 573d2840e09SBarry Smith const PetscScalar *x, *v; 574d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 575d2840e09SBarry Smith PetscInt n, i; 576d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 577f9fae5adSBarry Smith 578f9fae5adSBarry Smith PetscFunctionBegin; 5799566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 5809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5819566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 582bfec09a0SHong Zhang 583f9fae5adSBarry Smith for (i = 0; i < m; i++) { 584bfec09a0SHong Zhang idx = a->j + a->i[i]; 585bfec09a0SHong Zhang v = a->a + a->i[i]; 586f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 587f9fae5adSBarry Smith alpha1 = x[4 * i]; 588f9fae5adSBarry Smith alpha2 = x[4 * i + 1]; 589f9fae5adSBarry Smith alpha3 = x[4 * i + 2]; 590f9fae5adSBarry Smith alpha4 = x[4 * i + 3]; 591f9fae5adSBarry Smith while (n-- > 0) { 592f9fae5adSBarry Smith y[4 * (*idx)] += alpha1 * (*v); 593f9fae5adSBarry Smith y[4 * (*idx) + 1] += alpha2 * (*v); 594f9fae5adSBarry Smith y[4 * (*idx) + 2] += alpha3 * (*v); 595f9fae5adSBarry Smith y[4 * (*idx) + 3] += alpha4 * (*v); 5969371c9d4SSatish Balay idx++; 5979371c9d4SSatish Balay v++; 598f9fae5adSBarry Smith } 599f9fae5adSBarry Smith } 6009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 6019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6029566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 603f9fae5adSBarry Smith PetscFunctionReturn(0); 604f9fae5adSBarry Smith } 605f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6066cd98798SBarry Smith 6079371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 6084cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 609f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 610d2840e09SBarry Smith const PetscScalar *x, *v; 611d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 612d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 613d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 614f9fae5adSBarry Smith 615f9fae5adSBarry Smith PetscFunctionBegin; 6169566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6179566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 618f9fae5adSBarry Smith idx = a->j; 619f9fae5adSBarry Smith v = a->a; 620f9fae5adSBarry Smith ii = a->i; 621f9fae5adSBarry Smith 622f9fae5adSBarry Smith for (i = 0; i < m; i++) { 623f9fae5adSBarry Smith jrow = ii[i]; 624f9fae5adSBarry Smith n = ii[i + 1] - jrow; 625f9fae5adSBarry Smith sum1 = 0.0; 626f9fae5adSBarry Smith sum2 = 0.0; 627f9fae5adSBarry Smith sum3 = 0.0; 628f9fae5adSBarry Smith sum4 = 0.0; 629f9fae5adSBarry Smith sum5 = 0.0; 63026fbe8dcSKarl Rupp 631b3c51c66SMatthew Knepley nonzerorow += (n > 0); 632f9fae5adSBarry Smith for (j = 0; j < n; j++) { 633f9fae5adSBarry Smith sum1 += v[jrow] * x[5 * idx[jrow]]; 634f9fae5adSBarry Smith sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 635f9fae5adSBarry Smith sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 636f9fae5adSBarry Smith sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 637f9fae5adSBarry Smith sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 638f9fae5adSBarry Smith jrow++; 639f9fae5adSBarry Smith } 640f9fae5adSBarry Smith y[5 * i] = sum1; 641f9fae5adSBarry Smith y[5 * i + 1] = sum2; 642f9fae5adSBarry Smith y[5 * i + 2] = sum3; 643f9fae5adSBarry Smith y[5 * i + 3] = sum4; 644f9fae5adSBarry Smith y[5 * i + 4] = sum5; 645f9fae5adSBarry Smith } 646f9fae5adSBarry Smith 6479566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow)); 6489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 650f9fae5adSBarry Smith PetscFunctionReturn(0); 651f9fae5adSBarry Smith } 652f9fae5adSBarry Smith 6539371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 6544cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 655f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 656d2840e09SBarry Smith const PetscScalar *x, *v; 657d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 658d2840e09SBarry Smith PetscInt n, i; 659d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 660f9fae5adSBarry Smith 661f9fae5adSBarry Smith PetscFunctionBegin; 6629566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 6639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6649566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 665bfec09a0SHong Zhang 666f9fae5adSBarry Smith for (i = 0; i < m; i++) { 667bfec09a0SHong Zhang idx = a->j + a->i[i]; 668bfec09a0SHong Zhang v = a->a + a->i[i]; 669f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 670f9fae5adSBarry Smith alpha1 = x[5 * i]; 671f9fae5adSBarry Smith alpha2 = x[5 * i + 1]; 672f9fae5adSBarry Smith alpha3 = x[5 * i + 2]; 673f9fae5adSBarry Smith alpha4 = x[5 * i + 3]; 674f9fae5adSBarry Smith alpha5 = x[5 * i + 4]; 675f9fae5adSBarry Smith while (n-- > 0) { 676f9fae5adSBarry Smith y[5 * (*idx)] += alpha1 * (*v); 677f9fae5adSBarry Smith y[5 * (*idx) + 1] += alpha2 * (*v); 678f9fae5adSBarry Smith y[5 * (*idx) + 2] += alpha3 * (*v); 679f9fae5adSBarry Smith y[5 * (*idx) + 3] += alpha4 * (*v); 680f9fae5adSBarry Smith y[5 * (*idx) + 4] += alpha5 * (*v); 6819371c9d4SSatish Balay idx++; 6829371c9d4SSatish Balay v++; 683f9fae5adSBarry Smith } 684f9fae5adSBarry Smith } 6859566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 6869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 688f9fae5adSBarry Smith PetscFunctionReturn(0); 689f9fae5adSBarry Smith } 690f9fae5adSBarry Smith 6919371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 6924cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 693f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 694d2840e09SBarry Smith const PetscScalar *x, *v; 695d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 696b24ad042SBarry Smith PetscInt n, i, jrow, j; 697d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 698f9fae5adSBarry Smith 699f9fae5adSBarry Smith PetscFunctionBegin; 7009566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7019566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7029566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 703f9fae5adSBarry Smith idx = a->j; 704f9fae5adSBarry Smith v = a->a; 705f9fae5adSBarry Smith ii = a->i; 706f9fae5adSBarry Smith 707f9fae5adSBarry Smith for (i = 0; i < m; i++) { 708f9fae5adSBarry Smith jrow = ii[i]; 709f9fae5adSBarry Smith n = ii[i + 1] - jrow; 710f9fae5adSBarry Smith sum1 = 0.0; 711f9fae5adSBarry Smith sum2 = 0.0; 712f9fae5adSBarry Smith sum3 = 0.0; 713f9fae5adSBarry Smith sum4 = 0.0; 714f9fae5adSBarry Smith sum5 = 0.0; 715f9fae5adSBarry Smith for (j = 0; j < n; j++) { 716f9fae5adSBarry Smith sum1 += v[jrow] * x[5 * idx[jrow]]; 717f9fae5adSBarry Smith sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 718f9fae5adSBarry Smith sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 719f9fae5adSBarry Smith sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 720f9fae5adSBarry Smith sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 721f9fae5adSBarry Smith jrow++; 722f9fae5adSBarry Smith } 723f9fae5adSBarry Smith y[5 * i] += sum1; 724f9fae5adSBarry Smith y[5 * i + 1] += sum2; 725f9fae5adSBarry Smith y[5 * i + 2] += sum3; 726f9fae5adSBarry Smith y[5 * i + 3] += sum4; 727f9fae5adSBarry Smith y[5 * i + 4] += sum5; 728f9fae5adSBarry Smith } 729f9fae5adSBarry Smith 7309566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7319566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7329566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 733f9fae5adSBarry Smith PetscFunctionReturn(0); 734f9fae5adSBarry Smith } 735f9fae5adSBarry Smith 7369371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 7374cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 738f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 739d2840e09SBarry Smith const PetscScalar *x, *v; 740d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 741d2840e09SBarry Smith PetscInt n, i; 742d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 743f9fae5adSBarry Smith 744f9fae5adSBarry Smith PetscFunctionBegin; 7459566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7479566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 748bfec09a0SHong Zhang 749f9fae5adSBarry Smith for (i = 0; i < m; i++) { 750bfec09a0SHong Zhang idx = a->j + a->i[i]; 751bfec09a0SHong Zhang v = a->a + a->i[i]; 752f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 753f9fae5adSBarry Smith alpha1 = x[5 * i]; 754f9fae5adSBarry Smith alpha2 = x[5 * i + 1]; 755f9fae5adSBarry Smith alpha3 = x[5 * i + 2]; 756f9fae5adSBarry Smith alpha4 = x[5 * i + 3]; 757f9fae5adSBarry Smith alpha5 = x[5 * i + 4]; 758f9fae5adSBarry Smith while (n-- > 0) { 759f9fae5adSBarry Smith y[5 * (*idx)] += alpha1 * (*v); 760f9fae5adSBarry Smith y[5 * (*idx) + 1] += alpha2 * (*v); 761f9fae5adSBarry Smith y[5 * (*idx) + 2] += alpha3 * (*v); 762f9fae5adSBarry Smith y[5 * (*idx) + 3] += alpha4 * (*v); 763f9fae5adSBarry Smith y[5 * (*idx) + 4] += alpha5 * (*v); 7649371c9d4SSatish Balay idx++; 7659371c9d4SSatish Balay v++; 766f9fae5adSBarry Smith } 767f9fae5adSBarry Smith } 7689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 771f9fae5adSBarry Smith PetscFunctionReturn(0); 772bcc973b7SBarry Smith } 773bcc973b7SBarry Smith 7746cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7759371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 7766cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7776cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 778d2840e09SBarry Smith const PetscScalar *x, *v; 779d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 780d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 781d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 7826cd98798SBarry Smith 7836cd98798SBarry Smith PetscFunctionBegin; 7849566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7859566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 7866cd98798SBarry Smith idx = a->j; 7876cd98798SBarry Smith v = a->a; 7886cd98798SBarry Smith ii = a->i; 7896cd98798SBarry Smith 7906cd98798SBarry Smith for (i = 0; i < m; i++) { 7916cd98798SBarry Smith jrow = ii[i]; 7926cd98798SBarry Smith n = ii[i + 1] - jrow; 7936cd98798SBarry Smith sum1 = 0.0; 7946cd98798SBarry Smith sum2 = 0.0; 7956cd98798SBarry Smith sum3 = 0.0; 7966cd98798SBarry Smith sum4 = 0.0; 7976cd98798SBarry Smith sum5 = 0.0; 7986cd98798SBarry Smith sum6 = 0.0; 79926fbe8dcSKarl Rupp 800b3c51c66SMatthew Knepley nonzerorow += (n > 0); 8016cd98798SBarry Smith for (j = 0; j < n; j++) { 8026cd98798SBarry Smith sum1 += v[jrow] * x[6 * idx[jrow]]; 8036cd98798SBarry Smith sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 8046cd98798SBarry Smith sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 8056cd98798SBarry Smith sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 8066cd98798SBarry Smith sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 8076cd98798SBarry Smith sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 8086cd98798SBarry Smith jrow++; 8096cd98798SBarry Smith } 8106cd98798SBarry Smith y[6 * i] = sum1; 8116cd98798SBarry Smith y[6 * i + 1] = sum2; 8126cd98798SBarry Smith y[6 * i + 2] = sum3; 8136cd98798SBarry Smith y[6 * i + 3] = sum4; 8146cd98798SBarry Smith y[6 * i + 4] = sum5; 8156cd98798SBarry Smith y[6 * i + 5] = sum6; 8166cd98798SBarry Smith } 8176cd98798SBarry Smith 8189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow)); 8199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 8209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 8216cd98798SBarry Smith PetscFunctionReturn(0); 8226cd98798SBarry Smith } 8236cd98798SBarry Smith 8249371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 8256cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8266cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 827d2840e09SBarry Smith const PetscScalar *x, *v; 828d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 829d2840e09SBarry Smith PetscInt n, i; 830d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 8316cd98798SBarry Smith 8326cd98798SBarry Smith PetscFunctionBegin; 8339566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 8349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8359566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 836bfec09a0SHong Zhang 8376cd98798SBarry Smith for (i = 0; i < m; i++) { 838bfec09a0SHong Zhang idx = a->j + a->i[i]; 839bfec09a0SHong Zhang v = a->a + a->i[i]; 8406cd98798SBarry Smith n = a->i[i + 1] - a->i[i]; 8416cd98798SBarry Smith alpha1 = x[6 * i]; 8426cd98798SBarry Smith alpha2 = x[6 * i + 1]; 8436cd98798SBarry Smith alpha3 = x[6 * i + 2]; 8446cd98798SBarry Smith alpha4 = x[6 * i + 3]; 8456cd98798SBarry Smith alpha5 = x[6 * i + 4]; 8466cd98798SBarry Smith alpha6 = x[6 * i + 5]; 8476cd98798SBarry Smith while (n-- > 0) { 8486cd98798SBarry Smith y[6 * (*idx)] += alpha1 * (*v); 8496cd98798SBarry Smith y[6 * (*idx) + 1] += alpha2 * (*v); 8506cd98798SBarry Smith y[6 * (*idx) + 2] += alpha3 * (*v); 8516cd98798SBarry Smith y[6 * (*idx) + 3] += alpha4 * (*v); 8526cd98798SBarry Smith y[6 * (*idx) + 4] += alpha5 * (*v); 8536cd98798SBarry Smith y[6 * (*idx) + 5] += alpha6 * (*v); 8549371c9d4SSatish Balay idx++; 8559371c9d4SSatish Balay v++; 8566cd98798SBarry Smith } 8576cd98798SBarry Smith } 8589566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 8599566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 8609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 8616cd98798SBarry Smith PetscFunctionReturn(0); 8626cd98798SBarry Smith } 8636cd98798SBarry Smith 8649371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 8656cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8666cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 867d2840e09SBarry Smith const PetscScalar *x, *v; 868d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 869b24ad042SBarry Smith PetscInt n, i, jrow, j; 870d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 8716cd98798SBarry Smith 8726cd98798SBarry Smith PetscFunctionBegin; 8739566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 8749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8759566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 8766cd98798SBarry Smith idx = a->j; 8776cd98798SBarry Smith v = a->a; 8786cd98798SBarry Smith ii = a->i; 8796cd98798SBarry Smith 8806cd98798SBarry Smith for (i = 0; i < m; i++) { 8816cd98798SBarry Smith jrow = ii[i]; 8826cd98798SBarry Smith n = ii[i + 1] - jrow; 8836cd98798SBarry Smith sum1 = 0.0; 8846cd98798SBarry Smith sum2 = 0.0; 8856cd98798SBarry Smith sum3 = 0.0; 8866cd98798SBarry Smith sum4 = 0.0; 8876cd98798SBarry Smith sum5 = 0.0; 8886cd98798SBarry Smith sum6 = 0.0; 8896cd98798SBarry Smith for (j = 0; j < n; j++) { 8906cd98798SBarry Smith sum1 += v[jrow] * x[6 * idx[jrow]]; 8916cd98798SBarry Smith sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 8926cd98798SBarry Smith sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 8936cd98798SBarry Smith sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 8946cd98798SBarry Smith sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 8956cd98798SBarry Smith sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 8966cd98798SBarry Smith jrow++; 8976cd98798SBarry Smith } 8986cd98798SBarry Smith y[6 * i] += sum1; 8996cd98798SBarry Smith y[6 * i + 1] += sum2; 9006cd98798SBarry Smith y[6 * i + 2] += sum3; 9016cd98798SBarry Smith y[6 * i + 3] += sum4; 9026cd98798SBarry Smith y[6 * i + 4] += sum5; 9036cd98798SBarry Smith y[6 * i + 5] += sum6; 9046cd98798SBarry Smith } 9056cd98798SBarry Smith 9069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 9079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 9089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 9096cd98798SBarry Smith PetscFunctionReturn(0); 9106cd98798SBarry Smith } 9116cd98798SBarry Smith 9129371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 9136cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 9146cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 915d2840e09SBarry Smith const PetscScalar *x, *v; 916d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 917d2840e09SBarry Smith PetscInt n, i; 918d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 9196cd98798SBarry Smith 9206cd98798SBarry Smith PetscFunctionBegin; 9219566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 9229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9239566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 924bfec09a0SHong Zhang 9256cd98798SBarry Smith for (i = 0; i < m; i++) { 926bfec09a0SHong Zhang idx = a->j + a->i[i]; 927bfec09a0SHong Zhang v = a->a + a->i[i]; 9286cd98798SBarry Smith n = a->i[i + 1] - a->i[i]; 9296cd98798SBarry Smith alpha1 = x[6 * i]; 9306cd98798SBarry Smith alpha2 = x[6 * i + 1]; 9316cd98798SBarry Smith alpha3 = x[6 * i + 2]; 9326cd98798SBarry Smith alpha4 = x[6 * i + 3]; 9336cd98798SBarry Smith alpha5 = x[6 * i + 4]; 9346cd98798SBarry Smith alpha6 = x[6 * i + 5]; 9356cd98798SBarry Smith while (n-- > 0) { 9366cd98798SBarry Smith y[6 * (*idx)] += alpha1 * (*v); 9376cd98798SBarry Smith y[6 * (*idx) + 1] += alpha2 * (*v); 9386cd98798SBarry Smith y[6 * (*idx) + 2] += alpha3 * (*v); 9396cd98798SBarry Smith y[6 * (*idx) + 3] += alpha4 * (*v); 9406cd98798SBarry Smith y[6 * (*idx) + 4] += alpha5 * (*v); 9416cd98798SBarry Smith y[6 * (*idx) + 5] += alpha6 * (*v); 9429371c9d4SSatish Balay idx++; 9439371c9d4SSatish Balay v++; 9446cd98798SBarry Smith } 9456cd98798SBarry Smith } 9469566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 9479566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 9489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 9496cd98798SBarry Smith PetscFunctionReturn(0); 9506cd98798SBarry Smith } 9516cd98798SBarry Smith 95266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 9539371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 954ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 955ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 956d2840e09SBarry Smith const PetscScalar *x, *v; 957d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 958d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 959d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 960ed8eea03SBarry Smith 961ed8eea03SBarry Smith PetscFunctionBegin; 9629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9639566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 964ed8eea03SBarry Smith idx = a->j; 965ed8eea03SBarry Smith v = a->a; 966ed8eea03SBarry Smith ii = a->i; 967ed8eea03SBarry Smith 968ed8eea03SBarry Smith for (i = 0; i < m; i++) { 969ed8eea03SBarry Smith jrow = ii[i]; 970ed8eea03SBarry Smith n = ii[i + 1] - jrow; 971ed8eea03SBarry Smith sum1 = 0.0; 972ed8eea03SBarry Smith sum2 = 0.0; 973ed8eea03SBarry Smith sum3 = 0.0; 974ed8eea03SBarry Smith sum4 = 0.0; 975ed8eea03SBarry Smith sum5 = 0.0; 976ed8eea03SBarry Smith sum6 = 0.0; 977ed8eea03SBarry Smith sum7 = 0.0; 97826fbe8dcSKarl Rupp 979b3c51c66SMatthew Knepley nonzerorow += (n > 0); 980ed8eea03SBarry Smith for (j = 0; j < n; j++) { 981ed8eea03SBarry Smith sum1 += v[jrow] * x[7 * idx[jrow]]; 982ed8eea03SBarry Smith sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 983ed8eea03SBarry Smith sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 984ed8eea03SBarry Smith sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 985ed8eea03SBarry Smith sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 986ed8eea03SBarry Smith sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 987ed8eea03SBarry Smith sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 988ed8eea03SBarry Smith jrow++; 989ed8eea03SBarry Smith } 990ed8eea03SBarry Smith y[7 * i] = sum1; 991ed8eea03SBarry Smith y[7 * i + 1] = sum2; 992ed8eea03SBarry Smith y[7 * i + 2] = sum3; 993ed8eea03SBarry Smith y[7 * i + 3] = sum4; 994ed8eea03SBarry Smith y[7 * i + 4] = sum5; 995ed8eea03SBarry Smith y[7 * i + 5] = sum6; 996ed8eea03SBarry Smith y[7 * i + 6] = sum7; 997ed8eea03SBarry Smith } 998ed8eea03SBarry Smith 9999566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow)); 10009566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10019566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1002ed8eea03SBarry Smith PetscFunctionReturn(0); 1003ed8eea03SBarry Smith } 1004ed8eea03SBarry Smith 10059371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 1006ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1007ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1008d2840e09SBarry Smith const PetscScalar *x, *v; 1009d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1010d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1011d2840e09SBarry Smith PetscInt n, i; 1012ed8eea03SBarry Smith 1013ed8eea03SBarry Smith PetscFunctionBegin; 10149566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 10159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 10169566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1017ed8eea03SBarry Smith 1018ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1019ed8eea03SBarry Smith idx = a->j + a->i[i]; 1020ed8eea03SBarry Smith v = a->a + a->i[i]; 1021ed8eea03SBarry Smith n = a->i[i + 1] - a->i[i]; 1022ed8eea03SBarry Smith alpha1 = x[7 * i]; 1023ed8eea03SBarry Smith alpha2 = x[7 * i + 1]; 1024ed8eea03SBarry Smith alpha3 = x[7 * i + 2]; 1025ed8eea03SBarry Smith alpha4 = x[7 * i + 3]; 1026ed8eea03SBarry Smith alpha5 = x[7 * i + 4]; 1027ed8eea03SBarry Smith alpha6 = x[7 * i + 5]; 1028ed8eea03SBarry Smith alpha7 = x[7 * i + 6]; 1029ed8eea03SBarry Smith while (n-- > 0) { 1030ed8eea03SBarry Smith y[7 * (*idx)] += alpha1 * (*v); 1031ed8eea03SBarry Smith y[7 * (*idx) + 1] += alpha2 * (*v); 1032ed8eea03SBarry Smith y[7 * (*idx) + 2] += alpha3 * (*v); 1033ed8eea03SBarry Smith y[7 * (*idx) + 3] += alpha4 * (*v); 1034ed8eea03SBarry Smith y[7 * (*idx) + 4] += alpha5 * (*v); 1035ed8eea03SBarry Smith y[7 * (*idx) + 5] += alpha6 * (*v); 1036ed8eea03SBarry Smith y[7 * (*idx) + 6] += alpha7 * (*v); 10379371c9d4SSatish Balay idx++; 10389371c9d4SSatish Balay v++; 1039ed8eea03SBarry Smith } 1040ed8eea03SBarry Smith } 10419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 10429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1044ed8eea03SBarry Smith PetscFunctionReturn(0); 1045ed8eea03SBarry Smith } 1046ed8eea03SBarry Smith 10479371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1048ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1049ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1050d2840e09SBarry Smith const PetscScalar *x, *v; 1051d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1052d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1053ed8eea03SBarry Smith PetscInt n, i, jrow, j; 1054ed8eea03SBarry Smith 1055ed8eea03SBarry Smith PetscFunctionBegin; 10569566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 10579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 10589566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1059ed8eea03SBarry Smith idx = a->j; 1060ed8eea03SBarry Smith v = a->a; 1061ed8eea03SBarry Smith ii = a->i; 1062ed8eea03SBarry Smith 1063ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1064ed8eea03SBarry Smith jrow = ii[i]; 1065ed8eea03SBarry Smith n = ii[i + 1] - jrow; 1066ed8eea03SBarry Smith sum1 = 0.0; 1067ed8eea03SBarry Smith sum2 = 0.0; 1068ed8eea03SBarry Smith sum3 = 0.0; 1069ed8eea03SBarry Smith sum4 = 0.0; 1070ed8eea03SBarry Smith sum5 = 0.0; 1071ed8eea03SBarry Smith sum6 = 0.0; 1072ed8eea03SBarry Smith sum7 = 0.0; 1073ed8eea03SBarry Smith for (j = 0; j < n; j++) { 1074ed8eea03SBarry Smith sum1 += v[jrow] * x[7 * idx[jrow]]; 1075ed8eea03SBarry Smith sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1076ed8eea03SBarry Smith sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1077ed8eea03SBarry Smith sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1078ed8eea03SBarry Smith sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1079ed8eea03SBarry Smith sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1080ed8eea03SBarry Smith sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1081ed8eea03SBarry Smith jrow++; 1082ed8eea03SBarry Smith } 1083ed8eea03SBarry Smith y[7 * i] += sum1; 1084ed8eea03SBarry Smith y[7 * i + 1] += sum2; 1085ed8eea03SBarry Smith y[7 * i + 2] += sum3; 1086ed8eea03SBarry Smith y[7 * i + 3] += sum4; 1087ed8eea03SBarry Smith y[7 * i + 4] += sum5; 1088ed8eea03SBarry Smith y[7 * i + 5] += sum6; 1089ed8eea03SBarry Smith y[7 * i + 6] += sum7; 1090ed8eea03SBarry Smith } 1091ed8eea03SBarry Smith 10929566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 10939566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10949566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1095ed8eea03SBarry Smith PetscFunctionReturn(0); 1096ed8eea03SBarry Smith } 1097ed8eea03SBarry Smith 10989371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1099ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1100ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1101d2840e09SBarry Smith const PetscScalar *x, *v; 1102d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1103d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1104d2840e09SBarry Smith PetscInt n, i; 1105ed8eea03SBarry Smith 1106ed8eea03SBarry Smith PetscFunctionBegin; 11079566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 11089566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11099566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1110ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1111ed8eea03SBarry Smith idx = a->j + a->i[i]; 1112ed8eea03SBarry Smith v = a->a + a->i[i]; 1113ed8eea03SBarry Smith n = a->i[i + 1] - a->i[i]; 1114ed8eea03SBarry Smith alpha1 = x[7 * i]; 1115ed8eea03SBarry Smith alpha2 = x[7 * i + 1]; 1116ed8eea03SBarry Smith alpha3 = x[7 * i + 2]; 1117ed8eea03SBarry Smith alpha4 = x[7 * i + 3]; 1118ed8eea03SBarry Smith alpha5 = x[7 * i + 4]; 1119ed8eea03SBarry Smith alpha6 = x[7 * i + 5]; 1120ed8eea03SBarry Smith alpha7 = x[7 * i + 6]; 1121ed8eea03SBarry Smith while (n-- > 0) { 1122ed8eea03SBarry Smith y[7 * (*idx)] += alpha1 * (*v); 1123ed8eea03SBarry Smith y[7 * (*idx) + 1] += alpha2 * (*v); 1124ed8eea03SBarry Smith y[7 * (*idx) + 2] += alpha3 * (*v); 1125ed8eea03SBarry Smith y[7 * (*idx) + 3] += alpha4 * (*v); 1126ed8eea03SBarry Smith y[7 * (*idx) + 4] += alpha5 * (*v); 1127ed8eea03SBarry Smith y[7 * (*idx) + 5] += alpha6 * (*v); 1128ed8eea03SBarry Smith y[7 * (*idx) + 6] += alpha7 * (*v); 11299371c9d4SSatish Balay idx++; 11309371c9d4SSatish Balay v++; 1131ed8eea03SBarry Smith } 1132ed8eea03SBarry Smith } 11339566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 11349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1136ed8eea03SBarry Smith PetscFunctionReturn(0); 1137ed8eea03SBarry Smith } 1138ed8eea03SBarry Smith 11399371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 114066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 114166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1142d2840e09SBarry Smith const PetscScalar *x, *v; 1143d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1144d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1145d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 114666ed3db0SBarry Smith 114766ed3db0SBarry Smith PetscFunctionBegin; 11489566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11499566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 115066ed3db0SBarry Smith idx = a->j; 115166ed3db0SBarry Smith v = a->a; 115266ed3db0SBarry Smith ii = a->i; 115366ed3db0SBarry Smith 115466ed3db0SBarry Smith for (i = 0; i < m; i++) { 115566ed3db0SBarry Smith jrow = ii[i]; 115666ed3db0SBarry Smith n = ii[i + 1] - jrow; 115766ed3db0SBarry Smith sum1 = 0.0; 115866ed3db0SBarry Smith sum2 = 0.0; 115966ed3db0SBarry Smith sum3 = 0.0; 116066ed3db0SBarry Smith sum4 = 0.0; 116166ed3db0SBarry Smith sum5 = 0.0; 116266ed3db0SBarry Smith sum6 = 0.0; 116366ed3db0SBarry Smith sum7 = 0.0; 116466ed3db0SBarry Smith sum8 = 0.0; 116526fbe8dcSKarl Rupp 1166b3c51c66SMatthew Knepley nonzerorow += (n > 0); 116766ed3db0SBarry Smith for (j = 0; j < n; j++) { 116866ed3db0SBarry Smith sum1 += v[jrow] * x[8 * idx[jrow]]; 116966ed3db0SBarry Smith sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 117066ed3db0SBarry Smith sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 117166ed3db0SBarry Smith sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 117266ed3db0SBarry Smith sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 117366ed3db0SBarry Smith sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 117466ed3db0SBarry Smith sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 117566ed3db0SBarry Smith sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 117666ed3db0SBarry Smith jrow++; 117766ed3db0SBarry Smith } 117866ed3db0SBarry Smith y[8 * i] = sum1; 117966ed3db0SBarry Smith y[8 * i + 1] = sum2; 118066ed3db0SBarry Smith y[8 * i + 2] = sum3; 118166ed3db0SBarry Smith y[8 * i + 3] = sum4; 118266ed3db0SBarry Smith y[8 * i + 4] = sum5; 118366ed3db0SBarry Smith y[8 * i + 5] = sum6; 118466ed3db0SBarry Smith y[8 * i + 6] = sum7; 118566ed3db0SBarry Smith y[8 * i + 7] = sum8; 118666ed3db0SBarry Smith } 118766ed3db0SBarry Smith 11889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow)); 11899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 119166ed3db0SBarry Smith PetscFunctionReturn(0); 119266ed3db0SBarry Smith } 119366ed3db0SBarry Smith 11949371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 119566ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 119666ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1197d2840e09SBarry Smith const PetscScalar *x, *v; 1198d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1199d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1200d2840e09SBarry Smith PetscInt n, i; 120166ed3db0SBarry Smith 120266ed3db0SBarry Smith PetscFunctionBegin; 12039566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 12049566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 12059566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1206bfec09a0SHong Zhang 120766ed3db0SBarry Smith for (i = 0; i < m; i++) { 1208bfec09a0SHong Zhang idx = a->j + a->i[i]; 1209bfec09a0SHong Zhang v = a->a + a->i[i]; 121066ed3db0SBarry Smith n = a->i[i + 1] - a->i[i]; 121166ed3db0SBarry Smith alpha1 = x[8 * i]; 121266ed3db0SBarry Smith alpha2 = x[8 * i + 1]; 121366ed3db0SBarry Smith alpha3 = x[8 * i + 2]; 121466ed3db0SBarry Smith alpha4 = x[8 * i + 3]; 121566ed3db0SBarry Smith alpha5 = x[8 * i + 4]; 121666ed3db0SBarry Smith alpha6 = x[8 * i + 5]; 121766ed3db0SBarry Smith alpha7 = x[8 * i + 6]; 121866ed3db0SBarry Smith alpha8 = x[8 * i + 7]; 121966ed3db0SBarry Smith while (n-- > 0) { 122066ed3db0SBarry Smith y[8 * (*idx)] += alpha1 * (*v); 122166ed3db0SBarry Smith y[8 * (*idx) + 1] += alpha2 * (*v); 122266ed3db0SBarry Smith y[8 * (*idx) + 2] += alpha3 * (*v); 122366ed3db0SBarry Smith y[8 * (*idx) + 3] += alpha4 * (*v); 122466ed3db0SBarry Smith y[8 * (*idx) + 4] += alpha5 * (*v); 122566ed3db0SBarry Smith y[8 * (*idx) + 5] += alpha6 * (*v); 122666ed3db0SBarry Smith y[8 * (*idx) + 6] += alpha7 * (*v); 122766ed3db0SBarry Smith y[8 * (*idx) + 7] += alpha8 * (*v); 12289371c9d4SSatish Balay idx++; 12299371c9d4SSatish Balay v++; 123066ed3db0SBarry Smith } 123166ed3db0SBarry Smith } 12329566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 12339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 12349566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 123566ed3db0SBarry Smith PetscFunctionReturn(0); 123666ed3db0SBarry Smith } 123766ed3db0SBarry Smith 12389371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 123966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 124066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1241d2840e09SBarry Smith const PetscScalar *x, *v; 1242d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1243d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1244b24ad042SBarry Smith PetscInt n, i, jrow, j; 124566ed3db0SBarry Smith 124666ed3db0SBarry Smith PetscFunctionBegin; 12479566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 12489566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 12499566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 125066ed3db0SBarry Smith idx = a->j; 125166ed3db0SBarry Smith v = a->a; 125266ed3db0SBarry Smith ii = a->i; 125366ed3db0SBarry Smith 125466ed3db0SBarry Smith for (i = 0; i < m; i++) { 125566ed3db0SBarry Smith jrow = ii[i]; 125666ed3db0SBarry Smith n = ii[i + 1] - jrow; 125766ed3db0SBarry Smith sum1 = 0.0; 125866ed3db0SBarry Smith sum2 = 0.0; 125966ed3db0SBarry Smith sum3 = 0.0; 126066ed3db0SBarry Smith sum4 = 0.0; 126166ed3db0SBarry Smith sum5 = 0.0; 126266ed3db0SBarry Smith sum6 = 0.0; 126366ed3db0SBarry Smith sum7 = 0.0; 126466ed3db0SBarry Smith sum8 = 0.0; 126566ed3db0SBarry Smith for (j = 0; j < n; j++) { 126666ed3db0SBarry Smith sum1 += v[jrow] * x[8 * idx[jrow]]; 126766ed3db0SBarry Smith sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 126866ed3db0SBarry Smith sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 126966ed3db0SBarry Smith sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 127066ed3db0SBarry Smith sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 127166ed3db0SBarry Smith sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 127266ed3db0SBarry Smith sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 127366ed3db0SBarry Smith sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 127466ed3db0SBarry Smith jrow++; 127566ed3db0SBarry Smith } 127666ed3db0SBarry Smith y[8 * i] += sum1; 127766ed3db0SBarry Smith y[8 * i + 1] += sum2; 127866ed3db0SBarry Smith y[8 * i + 2] += sum3; 127966ed3db0SBarry Smith y[8 * i + 3] += sum4; 128066ed3db0SBarry Smith y[8 * i + 4] += sum5; 128166ed3db0SBarry Smith y[8 * i + 5] += sum6; 128266ed3db0SBarry Smith y[8 * i + 6] += sum7; 128366ed3db0SBarry Smith y[8 * i + 7] += sum8; 128466ed3db0SBarry Smith } 128566ed3db0SBarry Smith 12869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 12879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 12889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 128966ed3db0SBarry Smith PetscFunctionReturn(0); 129066ed3db0SBarry Smith } 129166ed3db0SBarry Smith 12929371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 129366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 129466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1295d2840e09SBarry Smith const PetscScalar *x, *v; 1296d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1297d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1298d2840e09SBarry Smith PetscInt n, i; 129966ed3db0SBarry Smith 130066ed3db0SBarry Smith PetscFunctionBegin; 13019566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 13029566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 13039566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 130466ed3db0SBarry Smith for (i = 0; i < m; i++) { 1305bfec09a0SHong Zhang idx = a->j + a->i[i]; 1306bfec09a0SHong Zhang v = a->a + a->i[i]; 130766ed3db0SBarry Smith n = a->i[i + 1] - a->i[i]; 130866ed3db0SBarry Smith alpha1 = x[8 * i]; 130966ed3db0SBarry Smith alpha2 = x[8 * i + 1]; 131066ed3db0SBarry Smith alpha3 = x[8 * i + 2]; 131166ed3db0SBarry Smith alpha4 = x[8 * i + 3]; 131266ed3db0SBarry Smith alpha5 = x[8 * i + 4]; 131366ed3db0SBarry Smith alpha6 = x[8 * i + 5]; 131466ed3db0SBarry Smith alpha7 = x[8 * i + 6]; 131566ed3db0SBarry Smith alpha8 = x[8 * i + 7]; 131666ed3db0SBarry Smith while (n-- > 0) { 131766ed3db0SBarry Smith y[8 * (*idx)] += alpha1 * (*v); 131866ed3db0SBarry Smith y[8 * (*idx) + 1] += alpha2 * (*v); 131966ed3db0SBarry Smith y[8 * (*idx) + 2] += alpha3 * (*v); 132066ed3db0SBarry Smith y[8 * (*idx) + 3] += alpha4 * (*v); 132166ed3db0SBarry Smith y[8 * (*idx) + 4] += alpha5 * (*v); 132266ed3db0SBarry Smith y[8 * (*idx) + 5] += alpha6 * (*v); 132366ed3db0SBarry Smith y[8 * (*idx) + 6] += alpha7 * (*v); 132466ed3db0SBarry Smith y[8 * (*idx) + 7] += alpha8 * (*v); 13259371c9d4SSatish Balay idx++; 13269371c9d4SSatish Balay v++; 132766ed3db0SBarry Smith } 132866ed3db0SBarry Smith } 13299566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 13309566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 13319566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 13322f7816d4SBarry Smith PetscFunctionReturn(0); 13332f7816d4SBarry Smith } 13342f7816d4SBarry Smith 13352b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 13369371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 13372b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 13382b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1339d2840e09SBarry Smith const PetscScalar *x, *v; 1340d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1341d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1342d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 13432b692628SMatthew Knepley 13442b692628SMatthew Knepley PetscFunctionBegin; 13459566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 13469566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 13472b692628SMatthew Knepley idx = a->j; 13482b692628SMatthew Knepley v = a->a; 13492b692628SMatthew Knepley ii = a->i; 13502b692628SMatthew Knepley 13512b692628SMatthew Knepley for (i = 0; i < m; i++) { 13522b692628SMatthew Knepley jrow = ii[i]; 13532b692628SMatthew Knepley n = ii[i + 1] - jrow; 13542b692628SMatthew Knepley sum1 = 0.0; 13552b692628SMatthew Knepley sum2 = 0.0; 13562b692628SMatthew Knepley sum3 = 0.0; 13572b692628SMatthew Knepley sum4 = 0.0; 13582b692628SMatthew Knepley sum5 = 0.0; 13592b692628SMatthew Knepley sum6 = 0.0; 13602b692628SMatthew Knepley sum7 = 0.0; 13612b692628SMatthew Knepley sum8 = 0.0; 13622b692628SMatthew Knepley sum9 = 0.0; 136326fbe8dcSKarl Rupp 1364b3c51c66SMatthew Knepley nonzerorow += (n > 0); 13652b692628SMatthew Knepley for (j = 0; j < n; j++) { 13662b692628SMatthew Knepley sum1 += v[jrow] * x[9 * idx[jrow]]; 13672b692628SMatthew Knepley sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 13682b692628SMatthew Knepley sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 13692b692628SMatthew Knepley sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 13702b692628SMatthew Knepley sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 13712b692628SMatthew Knepley sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 13722b692628SMatthew Knepley sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 13732b692628SMatthew Knepley sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 13742b692628SMatthew Knepley sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 13752b692628SMatthew Knepley jrow++; 13762b692628SMatthew Knepley } 13772b692628SMatthew Knepley y[9 * i] = sum1; 13782b692628SMatthew Knepley y[9 * i + 1] = sum2; 13792b692628SMatthew Knepley y[9 * i + 2] = sum3; 13802b692628SMatthew Knepley y[9 * i + 3] = sum4; 13812b692628SMatthew Knepley y[9 * i + 4] = sum5; 13822b692628SMatthew Knepley y[9 * i + 5] = sum6; 13832b692628SMatthew Knepley y[9 * i + 6] = sum7; 13842b692628SMatthew Knepley y[9 * i + 7] = sum8; 13852b692628SMatthew Knepley y[9 * i + 8] = sum9; 13862b692628SMatthew Knepley } 13872b692628SMatthew Knepley 13889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow)); 13899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 13909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 13912b692628SMatthew Knepley PetscFunctionReturn(0); 13922b692628SMatthew Knepley } 13932b692628SMatthew Knepley 1394b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1395b9cda22cSBarry Smith 13969371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 13972b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 13982b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1399d2840e09SBarry Smith const PetscScalar *x, *v; 1400d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1401d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1402d2840e09SBarry Smith PetscInt n, i; 14032b692628SMatthew Knepley 14042b692628SMatthew Knepley PetscFunctionBegin; 14059566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 14069566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 14079566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 14082b692628SMatthew Knepley 14092b692628SMatthew Knepley for (i = 0; i < m; i++) { 14102b692628SMatthew Knepley idx = a->j + a->i[i]; 14112b692628SMatthew Knepley v = a->a + a->i[i]; 14122b692628SMatthew Knepley n = a->i[i + 1] - a->i[i]; 14132b692628SMatthew Knepley alpha1 = x[9 * i]; 14142b692628SMatthew Knepley alpha2 = x[9 * i + 1]; 14152b692628SMatthew Knepley alpha3 = x[9 * i + 2]; 14162b692628SMatthew Knepley alpha4 = x[9 * i + 3]; 14172b692628SMatthew Knepley alpha5 = x[9 * i + 4]; 14182b692628SMatthew Knepley alpha6 = x[9 * i + 5]; 14192b692628SMatthew Knepley alpha7 = x[9 * i + 6]; 14202b692628SMatthew Knepley alpha8 = x[9 * i + 7]; 14212f6bd0c6SMatthew Knepley alpha9 = x[9 * i + 8]; 14222b692628SMatthew Knepley while (n-- > 0) { 14232b692628SMatthew Knepley y[9 * (*idx)] += alpha1 * (*v); 14242b692628SMatthew Knepley y[9 * (*idx) + 1] += alpha2 * (*v); 14252b692628SMatthew Knepley y[9 * (*idx) + 2] += alpha3 * (*v); 14262b692628SMatthew Knepley y[9 * (*idx) + 3] += alpha4 * (*v); 14272b692628SMatthew Knepley y[9 * (*idx) + 4] += alpha5 * (*v); 14282b692628SMatthew Knepley y[9 * (*idx) + 5] += alpha6 * (*v); 14292b692628SMatthew Knepley y[9 * (*idx) + 6] += alpha7 * (*v); 14302b692628SMatthew Knepley y[9 * (*idx) + 7] += alpha8 * (*v); 14312b692628SMatthew Knepley y[9 * (*idx) + 8] += alpha9 * (*v); 14329371c9d4SSatish Balay idx++; 14339371c9d4SSatish Balay v++; 14342b692628SMatthew Knepley } 14352b692628SMatthew Knepley } 14369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 14379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 14389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 14392b692628SMatthew Knepley PetscFunctionReturn(0); 14402b692628SMatthew Knepley } 14412b692628SMatthew Knepley 14429371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 14432b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 14442b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1445d2840e09SBarry Smith const PetscScalar *x, *v; 1446d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1447d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1448b24ad042SBarry Smith PetscInt n, i, jrow, j; 14492b692628SMatthew Knepley 14502b692628SMatthew Knepley PetscFunctionBegin; 14519566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 14529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 14539566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 14542b692628SMatthew Knepley idx = a->j; 14552b692628SMatthew Knepley v = a->a; 14562b692628SMatthew Knepley ii = a->i; 14572b692628SMatthew Knepley 14582b692628SMatthew Knepley for (i = 0; i < m; i++) { 14592b692628SMatthew Knepley jrow = ii[i]; 14602b692628SMatthew Knepley n = ii[i + 1] - jrow; 14612b692628SMatthew Knepley sum1 = 0.0; 14622b692628SMatthew Knepley sum2 = 0.0; 14632b692628SMatthew Knepley sum3 = 0.0; 14642b692628SMatthew Knepley sum4 = 0.0; 14652b692628SMatthew Knepley sum5 = 0.0; 14662b692628SMatthew Knepley sum6 = 0.0; 14672b692628SMatthew Knepley sum7 = 0.0; 14682b692628SMatthew Knepley sum8 = 0.0; 14692b692628SMatthew Knepley sum9 = 0.0; 14702b692628SMatthew Knepley for (j = 0; j < n; j++) { 14712b692628SMatthew Knepley sum1 += v[jrow] * x[9 * idx[jrow]]; 14722b692628SMatthew Knepley sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 14732b692628SMatthew Knepley sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 14742b692628SMatthew Knepley sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 14752b692628SMatthew Knepley sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 14762b692628SMatthew Knepley sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 14772b692628SMatthew Knepley sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 14782b692628SMatthew Knepley sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 14792b692628SMatthew Knepley sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 14802b692628SMatthew Knepley jrow++; 14812b692628SMatthew Knepley } 14822b692628SMatthew Knepley y[9 * i] += sum1; 14832b692628SMatthew Knepley y[9 * i + 1] += sum2; 14842b692628SMatthew Knepley y[9 * i + 2] += sum3; 14852b692628SMatthew Knepley y[9 * i + 3] += sum4; 14862b692628SMatthew Knepley y[9 * i + 4] += sum5; 14872b692628SMatthew Knepley y[9 * i + 5] += sum6; 14882b692628SMatthew Knepley y[9 * i + 6] += sum7; 14892b692628SMatthew Knepley y[9 * i + 7] += sum8; 14902b692628SMatthew Knepley y[9 * i + 8] += sum9; 14912b692628SMatthew Knepley } 14922b692628SMatthew Knepley 14939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 14949566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 14959566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 14962b692628SMatthew Knepley PetscFunctionReturn(0); 14972b692628SMatthew Knepley } 14982b692628SMatthew Knepley 14999371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 15002b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 15012b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1502d2840e09SBarry Smith const PetscScalar *x, *v; 1503d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1504d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1505d2840e09SBarry Smith PetscInt n, i; 15062b692628SMatthew Knepley 15072b692628SMatthew Knepley PetscFunctionBegin; 15089566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 15099566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 15109566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 15112b692628SMatthew Knepley for (i = 0; i < m; i++) { 15122b692628SMatthew Knepley idx = a->j + a->i[i]; 15132b692628SMatthew Knepley v = a->a + a->i[i]; 15142b692628SMatthew Knepley n = a->i[i + 1] - a->i[i]; 15152b692628SMatthew Knepley alpha1 = x[9 * i]; 15162b692628SMatthew Knepley alpha2 = x[9 * i + 1]; 15172b692628SMatthew Knepley alpha3 = x[9 * i + 2]; 15182b692628SMatthew Knepley alpha4 = x[9 * i + 3]; 15192b692628SMatthew Knepley alpha5 = x[9 * i + 4]; 15202b692628SMatthew Knepley alpha6 = x[9 * i + 5]; 15212b692628SMatthew Knepley alpha7 = x[9 * i + 6]; 15222b692628SMatthew Knepley alpha8 = x[9 * i + 7]; 15232b692628SMatthew Knepley alpha9 = x[9 * i + 8]; 15242b692628SMatthew Knepley while (n-- > 0) { 15252b692628SMatthew Knepley y[9 * (*idx)] += alpha1 * (*v); 15262b692628SMatthew Knepley y[9 * (*idx) + 1] += alpha2 * (*v); 15272b692628SMatthew Knepley y[9 * (*idx) + 2] += alpha3 * (*v); 15282b692628SMatthew Knepley y[9 * (*idx) + 3] += alpha4 * (*v); 15292b692628SMatthew Knepley y[9 * (*idx) + 4] += alpha5 * (*v); 15302b692628SMatthew Knepley y[9 * (*idx) + 5] += alpha6 * (*v); 15312b692628SMatthew Knepley y[9 * (*idx) + 6] += alpha7 * (*v); 15322b692628SMatthew Knepley y[9 * (*idx) + 7] += alpha8 * (*v); 15332b692628SMatthew Knepley y[9 * (*idx) + 8] += alpha9 * (*v); 15349371c9d4SSatish Balay idx++; 15359371c9d4SSatish Balay v++; 15362b692628SMatthew Knepley } 15372b692628SMatthew Knepley } 15389566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 15399566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 15409566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 15412b692628SMatthew Knepley PetscFunctionReturn(0); 15422b692628SMatthew Knepley } 15439371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1544b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1545b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1546d2840e09SBarry Smith const PetscScalar *x, *v; 1547d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1548d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1549d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 1550b9cda22cSBarry Smith 1551b9cda22cSBarry Smith PetscFunctionBegin; 15529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 15539566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1554b9cda22cSBarry Smith idx = a->j; 1555b9cda22cSBarry Smith v = a->a; 1556b9cda22cSBarry Smith ii = a->i; 1557b9cda22cSBarry Smith 1558b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1559b9cda22cSBarry Smith jrow = ii[i]; 1560b9cda22cSBarry Smith n = ii[i + 1] - jrow; 1561b9cda22cSBarry Smith sum1 = 0.0; 1562b9cda22cSBarry Smith sum2 = 0.0; 1563b9cda22cSBarry Smith sum3 = 0.0; 1564b9cda22cSBarry Smith sum4 = 0.0; 1565b9cda22cSBarry Smith sum5 = 0.0; 1566b9cda22cSBarry Smith sum6 = 0.0; 1567b9cda22cSBarry Smith sum7 = 0.0; 1568b9cda22cSBarry Smith sum8 = 0.0; 1569b9cda22cSBarry Smith sum9 = 0.0; 1570b9cda22cSBarry Smith sum10 = 0.0; 157126fbe8dcSKarl Rupp 1572b3c51c66SMatthew Knepley nonzerorow += (n > 0); 1573b9cda22cSBarry Smith for (j = 0; j < n; j++) { 1574b9cda22cSBarry Smith sum1 += v[jrow] * x[10 * idx[jrow]]; 1575b9cda22cSBarry Smith sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1576b9cda22cSBarry Smith sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1577b9cda22cSBarry Smith sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1578b9cda22cSBarry Smith sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1579b9cda22cSBarry Smith sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1580b9cda22cSBarry Smith sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1581b9cda22cSBarry Smith sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1582b9cda22cSBarry Smith sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1583b9cda22cSBarry Smith sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1584b9cda22cSBarry Smith jrow++; 1585b9cda22cSBarry Smith } 1586b9cda22cSBarry Smith y[10 * i] = sum1; 1587b9cda22cSBarry Smith y[10 * i + 1] = sum2; 1588b9cda22cSBarry Smith y[10 * i + 2] = sum3; 1589b9cda22cSBarry Smith y[10 * i + 3] = sum4; 1590b9cda22cSBarry Smith y[10 * i + 4] = sum5; 1591b9cda22cSBarry Smith y[10 * i + 5] = sum6; 1592b9cda22cSBarry Smith y[10 * i + 6] = sum7; 1593b9cda22cSBarry Smith y[10 * i + 7] = sum8; 1594b9cda22cSBarry Smith y[10 * i + 8] = sum9; 1595b9cda22cSBarry Smith y[10 * i + 9] = sum10; 1596b9cda22cSBarry Smith } 1597b9cda22cSBarry Smith 15989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow)); 15999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 16009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1601b9cda22cSBarry Smith PetscFunctionReturn(0); 1602b9cda22cSBarry Smith } 1603b9cda22cSBarry Smith 16049371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1605b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1606b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1607d2840e09SBarry Smith const PetscScalar *x, *v; 1608d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1609d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1610b9cda22cSBarry Smith PetscInt n, i, jrow, j; 1611b9cda22cSBarry Smith 1612b9cda22cSBarry Smith PetscFunctionBegin; 16139566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 16149566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 16159566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1616b9cda22cSBarry Smith idx = a->j; 1617b9cda22cSBarry Smith v = a->a; 1618b9cda22cSBarry Smith ii = a->i; 1619b9cda22cSBarry Smith 1620b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1621b9cda22cSBarry Smith jrow = ii[i]; 1622b9cda22cSBarry Smith n = ii[i + 1] - jrow; 1623b9cda22cSBarry Smith sum1 = 0.0; 1624b9cda22cSBarry Smith sum2 = 0.0; 1625b9cda22cSBarry Smith sum3 = 0.0; 1626b9cda22cSBarry Smith sum4 = 0.0; 1627b9cda22cSBarry Smith sum5 = 0.0; 1628b9cda22cSBarry Smith sum6 = 0.0; 1629b9cda22cSBarry Smith sum7 = 0.0; 1630b9cda22cSBarry Smith sum8 = 0.0; 1631b9cda22cSBarry Smith sum9 = 0.0; 1632b9cda22cSBarry Smith sum10 = 0.0; 1633b9cda22cSBarry Smith for (j = 0; j < n; j++) { 1634b9cda22cSBarry Smith sum1 += v[jrow] * x[10 * idx[jrow]]; 1635b9cda22cSBarry Smith sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1636b9cda22cSBarry Smith sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1637b9cda22cSBarry Smith sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1638b9cda22cSBarry Smith sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1639b9cda22cSBarry Smith sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1640b9cda22cSBarry Smith sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1641b9cda22cSBarry Smith sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1642b9cda22cSBarry Smith sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1643b9cda22cSBarry Smith sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1644b9cda22cSBarry Smith jrow++; 1645b9cda22cSBarry Smith } 1646b9cda22cSBarry Smith y[10 * i] += sum1; 1647b9cda22cSBarry Smith y[10 * i + 1] += sum2; 1648b9cda22cSBarry Smith y[10 * i + 2] += sum3; 1649b9cda22cSBarry Smith y[10 * i + 3] += sum4; 1650b9cda22cSBarry Smith y[10 * i + 4] += sum5; 1651b9cda22cSBarry Smith y[10 * i + 5] += sum6; 1652b9cda22cSBarry Smith y[10 * i + 6] += sum7; 1653b9cda22cSBarry Smith y[10 * i + 7] += sum8; 1654b9cda22cSBarry Smith y[10 * i + 8] += sum9; 1655b9cda22cSBarry Smith y[10 * i + 9] += sum10; 1656b9cda22cSBarry Smith } 1657b9cda22cSBarry Smith 16589566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 16599566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 16609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1661b9cda22cSBarry Smith PetscFunctionReturn(0); 1662b9cda22cSBarry Smith } 1663b9cda22cSBarry Smith 16649371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1665b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1666b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1667d2840e09SBarry Smith const PetscScalar *x, *v; 1668d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1669d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1670d2840e09SBarry Smith PetscInt n, i; 1671b9cda22cSBarry Smith 1672b9cda22cSBarry Smith PetscFunctionBegin; 16739566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 16749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 16759566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1676b9cda22cSBarry Smith 1677b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1678b9cda22cSBarry Smith idx = a->j + a->i[i]; 1679b9cda22cSBarry Smith v = a->a + a->i[i]; 1680b9cda22cSBarry Smith n = a->i[i + 1] - a->i[i]; 1681e29fdc22SBarry Smith alpha1 = x[10 * i]; 1682e29fdc22SBarry Smith alpha2 = x[10 * i + 1]; 1683e29fdc22SBarry Smith alpha3 = x[10 * i + 2]; 1684e29fdc22SBarry Smith alpha4 = x[10 * i + 3]; 1685e29fdc22SBarry Smith alpha5 = x[10 * i + 4]; 1686e29fdc22SBarry Smith alpha6 = x[10 * i + 5]; 1687e29fdc22SBarry Smith alpha7 = x[10 * i + 6]; 1688e29fdc22SBarry Smith alpha8 = x[10 * i + 7]; 1689e29fdc22SBarry Smith alpha9 = x[10 * i + 8]; 1690b9cda22cSBarry Smith alpha10 = x[10 * i + 9]; 1691b9cda22cSBarry Smith while (n-- > 0) { 1692e29fdc22SBarry Smith y[10 * (*idx)] += alpha1 * (*v); 1693e29fdc22SBarry Smith y[10 * (*idx) + 1] += alpha2 * (*v); 1694e29fdc22SBarry Smith y[10 * (*idx) + 2] += alpha3 * (*v); 1695e29fdc22SBarry Smith y[10 * (*idx) + 3] += alpha4 * (*v); 1696e29fdc22SBarry Smith y[10 * (*idx) + 4] += alpha5 * (*v); 1697e29fdc22SBarry Smith y[10 * (*idx) + 5] += alpha6 * (*v); 1698e29fdc22SBarry Smith y[10 * (*idx) + 6] += alpha7 * (*v); 1699e29fdc22SBarry Smith y[10 * (*idx) + 7] += alpha8 * (*v); 1700e29fdc22SBarry Smith y[10 * (*idx) + 8] += alpha9 * (*v); 1701b9cda22cSBarry Smith y[10 * (*idx) + 9] += alpha10 * (*v); 17029371c9d4SSatish Balay idx++; 17039371c9d4SSatish Balay v++; 1704b9cda22cSBarry Smith } 1705b9cda22cSBarry Smith } 17069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1709b9cda22cSBarry Smith PetscFunctionReturn(0); 1710b9cda22cSBarry Smith } 1711b9cda22cSBarry Smith 17129371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1713b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1714b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1715d2840e09SBarry Smith const PetscScalar *x, *v; 1716d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1717d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1718d2840e09SBarry Smith PetscInt n, i; 1719b9cda22cSBarry Smith 1720b9cda22cSBarry Smith PetscFunctionBegin; 17219566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 17229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 17239566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1724b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1725b9cda22cSBarry Smith idx = a->j + a->i[i]; 1726b9cda22cSBarry Smith v = a->a + a->i[i]; 1727b9cda22cSBarry Smith n = a->i[i + 1] - a->i[i]; 1728b9cda22cSBarry Smith alpha1 = x[10 * i]; 1729b9cda22cSBarry Smith alpha2 = x[10 * i + 1]; 1730b9cda22cSBarry Smith alpha3 = x[10 * i + 2]; 1731b9cda22cSBarry Smith alpha4 = x[10 * i + 3]; 1732b9cda22cSBarry Smith alpha5 = x[10 * i + 4]; 1733b9cda22cSBarry Smith alpha6 = x[10 * i + 5]; 1734b9cda22cSBarry Smith alpha7 = x[10 * i + 6]; 1735b9cda22cSBarry Smith alpha8 = x[10 * i + 7]; 1736b9cda22cSBarry Smith alpha9 = x[10 * i + 8]; 1737b9cda22cSBarry Smith alpha10 = x[10 * i + 9]; 1738b9cda22cSBarry Smith while (n-- > 0) { 1739b9cda22cSBarry Smith y[10 * (*idx)] += alpha1 * (*v); 1740b9cda22cSBarry Smith y[10 * (*idx) + 1] += alpha2 * (*v); 1741b9cda22cSBarry Smith y[10 * (*idx) + 2] += alpha3 * (*v); 1742b9cda22cSBarry Smith y[10 * (*idx) + 3] += alpha4 * (*v); 1743b9cda22cSBarry Smith y[10 * (*idx) + 4] += alpha5 * (*v); 1744b9cda22cSBarry Smith y[10 * (*idx) + 5] += alpha6 * (*v); 1745b9cda22cSBarry Smith y[10 * (*idx) + 6] += alpha7 * (*v); 1746b9cda22cSBarry Smith y[10 * (*idx) + 7] += alpha8 * (*v); 1747b9cda22cSBarry Smith y[10 * (*idx) + 8] += alpha9 * (*v); 1748b9cda22cSBarry Smith y[10 * (*idx) + 9] += alpha10 * (*v); 17499371c9d4SSatish Balay idx++; 17509371c9d4SSatish Balay v++; 1751b9cda22cSBarry Smith } 1752b9cda22cSBarry Smith } 17539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1756b9cda22cSBarry Smith PetscFunctionReturn(0); 1757b9cda22cSBarry Smith } 1758b9cda22cSBarry Smith 17592f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 17609371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1761dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1762dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1763d2840e09SBarry Smith const PetscScalar *x, *v; 1764d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1765d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1766d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 1767dbdd7285SBarry Smith 1768dbdd7285SBarry Smith PetscFunctionBegin; 17699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 17709566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1771dbdd7285SBarry Smith idx = a->j; 1772dbdd7285SBarry Smith v = a->a; 1773dbdd7285SBarry Smith ii = a->i; 1774dbdd7285SBarry Smith 1775dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1776dbdd7285SBarry Smith jrow = ii[i]; 1777dbdd7285SBarry Smith n = ii[i + 1] - jrow; 1778dbdd7285SBarry Smith sum1 = 0.0; 1779dbdd7285SBarry Smith sum2 = 0.0; 1780dbdd7285SBarry Smith sum3 = 0.0; 1781dbdd7285SBarry Smith sum4 = 0.0; 1782dbdd7285SBarry Smith sum5 = 0.0; 1783dbdd7285SBarry Smith sum6 = 0.0; 1784dbdd7285SBarry Smith sum7 = 0.0; 1785dbdd7285SBarry Smith sum8 = 0.0; 1786dbdd7285SBarry Smith sum9 = 0.0; 1787dbdd7285SBarry Smith sum10 = 0.0; 1788dbdd7285SBarry Smith sum11 = 0.0; 178926fbe8dcSKarl Rupp 1790dbdd7285SBarry Smith nonzerorow += (n > 0); 1791dbdd7285SBarry Smith for (j = 0; j < n; j++) { 1792dbdd7285SBarry Smith sum1 += v[jrow] * x[11 * idx[jrow]]; 1793dbdd7285SBarry Smith sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1794dbdd7285SBarry Smith sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1795dbdd7285SBarry Smith sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1796dbdd7285SBarry Smith sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1797dbdd7285SBarry Smith sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1798dbdd7285SBarry Smith sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1799dbdd7285SBarry Smith sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1800dbdd7285SBarry Smith sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1801dbdd7285SBarry Smith sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1802dbdd7285SBarry Smith sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1803dbdd7285SBarry Smith jrow++; 1804dbdd7285SBarry Smith } 1805dbdd7285SBarry Smith y[11 * i] = sum1; 1806dbdd7285SBarry Smith y[11 * i + 1] = sum2; 1807dbdd7285SBarry Smith y[11 * i + 2] = sum3; 1808dbdd7285SBarry Smith y[11 * i + 3] = sum4; 1809dbdd7285SBarry Smith y[11 * i + 4] = sum5; 1810dbdd7285SBarry Smith y[11 * i + 5] = sum6; 1811dbdd7285SBarry Smith y[11 * i + 6] = sum7; 1812dbdd7285SBarry Smith y[11 * i + 7] = sum8; 1813dbdd7285SBarry Smith y[11 * i + 8] = sum9; 1814dbdd7285SBarry Smith y[11 * i + 9] = sum10; 1815dbdd7285SBarry Smith y[11 * i + 10] = sum11; 1816dbdd7285SBarry Smith } 1817dbdd7285SBarry Smith 18189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow)); 18199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 18209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1821dbdd7285SBarry Smith PetscFunctionReturn(0); 1822dbdd7285SBarry Smith } 1823dbdd7285SBarry Smith 18249371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1825dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1826dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1827d2840e09SBarry Smith const PetscScalar *x, *v; 1828d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1829d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1830dbdd7285SBarry Smith PetscInt n, i, jrow, j; 1831dbdd7285SBarry Smith 1832dbdd7285SBarry Smith PetscFunctionBegin; 18339566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 18349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 18359566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1836dbdd7285SBarry Smith idx = a->j; 1837dbdd7285SBarry Smith v = a->a; 1838dbdd7285SBarry Smith ii = a->i; 1839dbdd7285SBarry Smith 1840dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1841dbdd7285SBarry Smith jrow = ii[i]; 1842dbdd7285SBarry Smith n = ii[i + 1] - jrow; 1843dbdd7285SBarry Smith sum1 = 0.0; 1844dbdd7285SBarry Smith sum2 = 0.0; 1845dbdd7285SBarry Smith sum3 = 0.0; 1846dbdd7285SBarry Smith sum4 = 0.0; 1847dbdd7285SBarry Smith sum5 = 0.0; 1848dbdd7285SBarry Smith sum6 = 0.0; 1849dbdd7285SBarry Smith sum7 = 0.0; 1850dbdd7285SBarry Smith sum8 = 0.0; 1851dbdd7285SBarry Smith sum9 = 0.0; 1852dbdd7285SBarry Smith sum10 = 0.0; 1853dbdd7285SBarry Smith sum11 = 0.0; 1854dbdd7285SBarry Smith for (j = 0; j < n; j++) { 1855dbdd7285SBarry Smith sum1 += v[jrow] * x[11 * idx[jrow]]; 1856dbdd7285SBarry Smith sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1857dbdd7285SBarry Smith sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1858dbdd7285SBarry Smith sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1859dbdd7285SBarry Smith sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1860dbdd7285SBarry Smith sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1861dbdd7285SBarry Smith sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1862dbdd7285SBarry Smith sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1863dbdd7285SBarry Smith sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1864dbdd7285SBarry Smith sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1865dbdd7285SBarry Smith sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1866dbdd7285SBarry Smith jrow++; 1867dbdd7285SBarry Smith } 1868dbdd7285SBarry Smith y[11 * i] += sum1; 1869dbdd7285SBarry Smith y[11 * i + 1] += sum2; 1870dbdd7285SBarry Smith y[11 * i + 2] += sum3; 1871dbdd7285SBarry Smith y[11 * i + 3] += sum4; 1872dbdd7285SBarry Smith y[11 * i + 4] += sum5; 1873dbdd7285SBarry Smith y[11 * i + 5] += sum6; 1874dbdd7285SBarry Smith y[11 * i + 6] += sum7; 1875dbdd7285SBarry Smith y[11 * i + 7] += sum8; 1876dbdd7285SBarry Smith y[11 * i + 8] += sum9; 1877dbdd7285SBarry Smith y[11 * i + 9] += sum10; 1878dbdd7285SBarry Smith y[11 * i + 10] += sum11; 1879dbdd7285SBarry Smith } 1880dbdd7285SBarry Smith 18819566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 18829566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 18839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1884dbdd7285SBarry Smith PetscFunctionReturn(0); 1885dbdd7285SBarry Smith } 1886dbdd7285SBarry Smith 18879371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1888dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1889dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1890d2840e09SBarry Smith const PetscScalar *x, *v; 1891d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1892d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1893d2840e09SBarry Smith PetscInt n, i; 1894dbdd7285SBarry Smith 1895dbdd7285SBarry Smith PetscFunctionBegin; 18969566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 18979566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 18989566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1899dbdd7285SBarry Smith 1900dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1901dbdd7285SBarry Smith idx = a->j + a->i[i]; 1902dbdd7285SBarry Smith v = a->a + a->i[i]; 1903dbdd7285SBarry Smith n = a->i[i + 1] - a->i[i]; 1904dbdd7285SBarry Smith alpha1 = x[11 * i]; 1905dbdd7285SBarry Smith alpha2 = x[11 * i + 1]; 1906dbdd7285SBarry Smith alpha3 = x[11 * i + 2]; 1907dbdd7285SBarry Smith alpha4 = x[11 * i + 3]; 1908dbdd7285SBarry Smith alpha5 = x[11 * i + 4]; 1909dbdd7285SBarry Smith alpha6 = x[11 * i + 5]; 1910dbdd7285SBarry Smith alpha7 = x[11 * i + 6]; 1911dbdd7285SBarry Smith alpha8 = x[11 * i + 7]; 1912dbdd7285SBarry Smith alpha9 = x[11 * i + 8]; 1913dbdd7285SBarry Smith alpha10 = x[11 * i + 9]; 1914dbdd7285SBarry Smith alpha11 = x[11 * i + 10]; 1915dbdd7285SBarry Smith while (n-- > 0) { 1916dbdd7285SBarry Smith y[11 * (*idx)] += alpha1 * (*v); 1917dbdd7285SBarry Smith y[11 * (*idx) + 1] += alpha2 * (*v); 1918dbdd7285SBarry Smith y[11 * (*idx) + 2] += alpha3 * (*v); 1919dbdd7285SBarry Smith y[11 * (*idx) + 3] += alpha4 * (*v); 1920dbdd7285SBarry Smith y[11 * (*idx) + 4] += alpha5 * (*v); 1921dbdd7285SBarry Smith y[11 * (*idx) + 5] += alpha6 * (*v); 1922dbdd7285SBarry Smith y[11 * (*idx) + 6] += alpha7 * (*v); 1923dbdd7285SBarry Smith y[11 * (*idx) + 7] += alpha8 * (*v); 1924dbdd7285SBarry Smith y[11 * (*idx) + 8] += alpha9 * (*v); 1925dbdd7285SBarry Smith y[11 * (*idx) + 9] += alpha10 * (*v); 1926dbdd7285SBarry Smith y[11 * (*idx) + 10] += alpha11 * (*v); 19279371c9d4SSatish Balay idx++; 19289371c9d4SSatish Balay v++; 1929dbdd7285SBarry Smith } 1930dbdd7285SBarry Smith } 19319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 19329566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19339566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1934dbdd7285SBarry Smith PetscFunctionReturn(0); 1935dbdd7285SBarry Smith } 1936dbdd7285SBarry Smith 19379371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1938dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1939dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1940d2840e09SBarry Smith const PetscScalar *x, *v; 1941d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1942d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1943d2840e09SBarry Smith PetscInt n, i; 1944dbdd7285SBarry Smith 1945dbdd7285SBarry Smith PetscFunctionBegin; 19469566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 19479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 19489566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1949dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1950dbdd7285SBarry Smith idx = a->j + a->i[i]; 1951dbdd7285SBarry Smith v = a->a + a->i[i]; 1952dbdd7285SBarry Smith n = a->i[i + 1] - a->i[i]; 1953dbdd7285SBarry Smith alpha1 = x[11 * i]; 1954dbdd7285SBarry Smith alpha2 = x[11 * i + 1]; 1955dbdd7285SBarry Smith alpha3 = x[11 * i + 2]; 1956dbdd7285SBarry Smith alpha4 = x[11 * i + 3]; 1957dbdd7285SBarry Smith alpha5 = x[11 * i + 4]; 1958dbdd7285SBarry Smith alpha6 = x[11 * i + 5]; 1959dbdd7285SBarry Smith alpha7 = x[11 * i + 6]; 1960dbdd7285SBarry Smith alpha8 = x[11 * i + 7]; 1961dbdd7285SBarry Smith alpha9 = x[11 * i + 8]; 1962dbdd7285SBarry Smith alpha10 = x[11 * i + 9]; 1963dbdd7285SBarry Smith alpha11 = x[11 * i + 10]; 1964dbdd7285SBarry Smith while (n-- > 0) { 1965dbdd7285SBarry Smith y[11 * (*idx)] += alpha1 * (*v); 1966dbdd7285SBarry Smith y[11 * (*idx) + 1] += alpha2 * (*v); 1967dbdd7285SBarry Smith y[11 * (*idx) + 2] += alpha3 * (*v); 1968dbdd7285SBarry Smith y[11 * (*idx) + 3] += alpha4 * (*v); 1969dbdd7285SBarry Smith y[11 * (*idx) + 4] += alpha5 * (*v); 1970dbdd7285SBarry Smith y[11 * (*idx) + 5] += alpha6 * (*v); 1971dbdd7285SBarry Smith y[11 * (*idx) + 6] += alpha7 * (*v); 1972dbdd7285SBarry Smith y[11 * (*idx) + 7] += alpha8 * (*v); 1973dbdd7285SBarry Smith y[11 * (*idx) + 8] += alpha9 * (*v); 1974dbdd7285SBarry Smith y[11 * (*idx) + 9] += alpha10 * (*v); 1975dbdd7285SBarry Smith y[11 * (*idx) + 10] += alpha11 * (*v); 19769371c9d4SSatish Balay idx++; 19779371c9d4SSatish Balay v++; 1978dbdd7285SBarry Smith } 1979dbdd7285SBarry Smith } 19809566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 19819566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19829566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1983dbdd7285SBarry Smith PetscFunctionReturn(0); 1984dbdd7285SBarry Smith } 1985dbdd7285SBarry Smith 1986dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 19879371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 19882f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 19892f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1990d2840e09SBarry Smith const PetscScalar *x, *v; 1991d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 19922f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1993d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1994d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 19952f7816d4SBarry Smith 19962f7816d4SBarry Smith PetscFunctionBegin; 19979566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 19989566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 19992f7816d4SBarry Smith idx = a->j; 20002f7816d4SBarry Smith v = a->a; 20012f7816d4SBarry Smith ii = a->i; 20022f7816d4SBarry Smith 20032f7816d4SBarry Smith for (i = 0; i < m; i++) { 20042f7816d4SBarry Smith jrow = ii[i]; 20052f7816d4SBarry Smith n = ii[i + 1] - jrow; 20062f7816d4SBarry Smith sum1 = 0.0; 20072f7816d4SBarry Smith sum2 = 0.0; 20082f7816d4SBarry Smith sum3 = 0.0; 20092f7816d4SBarry Smith sum4 = 0.0; 20102f7816d4SBarry Smith sum5 = 0.0; 20112f7816d4SBarry Smith sum6 = 0.0; 20122f7816d4SBarry Smith sum7 = 0.0; 20132f7816d4SBarry Smith sum8 = 0.0; 20142f7816d4SBarry Smith sum9 = 0.0; 20152f7816d4SBarry Smith sum10 = 0.0; 20162f7816d4SBarry Smith sum11 = 0.0; 20172f7816d4SBarry Smith sum12 = 0.0; 20182f7816d4SBarry Smith sum13 = 0.0; 20192f7816d4SBarry Smith sum14 = 0.0; 20202f7816d4SBarry Smith sum15 = 0.0; 20212f7816d4SBarry Smith sum16 = 0.0; 202226fbe8dcSKarl Rupp 2023b3c51c66SMatthew Knepley nonzerorow += (n > 0); 20242f7816d4SBarry Smith for (j = 0; j < n; j++) { 20252f7816d4SBarry Smith sum1 += v[jrow] * x[16 * idx[jrow]]; 20262f7816d4SBarry Smith sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 20272f7816d4SBarry Smith sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 20282f7816d4SBarry Smith sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 20292f7816d4SBarry Smith sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 20302f7816d4SBarry Smith sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 20312f7816d4SBarry Smith sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 20322f7816d4SBarry Smith sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 20332f7816d4SBarry Smith sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 20342f7816d4SBarry Smith sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 20352f7816d4SBarry Smith sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 20362f7816d4SBarry Smith sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 20372f7816d4SBarry Smith sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 20382f7816d4SBarry Smith sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 20392f7816d4SBarry Smith sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 20402f7816d4SBarry Smith sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 20412f7816d4SBarry Smith jrow++; 20422f7816d4SBarry Smith } 20432f7816d4SBarry Smith y[16 * i] = sum1; 20442f7816d4SBarry Smith y[16 * i + 1] = sum2; 20452f7816d4SBarry Smith y[16 * i + 2] = sum3; 20462f7816d4SBarry Smith y[16 * i + 3] = sum4; 20472f7816d4SBarry Smith y[16 * i + 4] = sum5; 20482f7816d4SBarry Smith y[16 * i + 5] = sum6; 20492f7816d4SBarry Smith y[16 * i + 6] = sum7; 20502f7816d4SBarry Smith y[16 * i + 7] = sum8; 20512f7816d4SBarry Smith y[16 * i + 8] = sum9; 20522f7816d4SBarry Smith y[16 * i + 9] = sum10; 20532f7816d4SBarry Smith y[16 * i + 10] = sum11; 20542f7816d4SBarry Smith y[16 * i + 11] = sum12; 20552f7816d4SBarry Smith y[16 * i + 12] = sum13; 20562f7816d4SBarry Smith y[16 * i + 13] = sum14; 20572f7816d4SBarry Smith y[16 * i + 14] = sum15; 20582f7816d4SBarry Smith y[16 * i + 15] = sum16; 20592f7816d4SBarry Smith } 20602f7816d4SBarry Smith 20619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow)); 20629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 20639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 20642f7816d4SBarry Smith PetscFunctionReturn(0); 20652f7816d4SBarry Smith } 20662f7816d4SBarry Smith 20679371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 20682f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 20692f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2070d2840e09SBarry Smith const PetscScalar *x, *v; 2071d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 20722f7816d4SBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2073d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2074d2840e09SBarry Smith PetscInt n, i; 20752f7816d4SBarry Smith 20762f7816d4SBarry Smith PetscFunctionBegin; 20779566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 20789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 20799566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2080bfec09a0SHong Zhang 20812f7816d4SBarry Smith for (i = 0; i < m; i++) { 2082bfec09a0SHong Zhang idx = a->j + a->i[i]; 2083bfec09a0SHong Zhang v = a->a + a->i[i]; 20842f7816d4SBarry Smith n = a->i[i + 1] - a->i[i]; 20852f7816d4SBarry Smith alpha1 = x[16 * i]; 20862f7816d4SBarry Smith alpha2 = x[16 * i + 1]; 20872f7816d4SBarry Smith alpha3 = x[16 * i + 2]; 20882f7816d4SBarry Smith alpha4 = x[16 * i + 3]; 20892f7816d4SBarry Smith alpha5 = x[16 * i + 4]; 20902f7816d4SBarry Smith alpha6 = x[16 * i + 5]; 20912f7816d4SBarry Smith alpha7 = x[16 * i + 6]; 20922f7816d4SBarry Smith alpha8 = x[16 * i + 7]; 20932f7816d4SBarry Smith alpha9 = x[16 * i + 8]; 20942f7816d4SBarry Smith alpha10 = x[16 * i + 9]; 20952f7816d4SBarry Smith alpha11 = x[16 * i + 10]; 20962f7816d4SBarry Smith alpha12 = x[16 * i + 11]; 20972f7816d4SBarry Smith alpha13 = x[16 * i + 12]; 20982f7816d4SBarry Smith alpha14 = x[16 * i + 13]; 20992f7816d4SBarry Smith alpha15 = x[16 * i + 14]; 21002f7816d4SBarry Smith alpha16 = x[16 * i + 15]; 21012f7816d4SBarry Smith while (n-- > 0) { 21022f7816d4SBarry Smith y[16 * (*idx)] += alpha1 * (*v); 21032f7816d4SBarry Smith y[16 * (*idx) + 1] += alpha2 * (*v); 21042f7816d4SBarry Smith y[16 * (*idx) + 2] += alpha3 * (*v); 21052f7816d4SBarry Smith y[16 * (*idx) + 3] += alpha4 * (*v); 21062f7816d4SBarry Smith y[16 * (*idx) + 4] += alpha5 * (*v); 21072f7816d4SBarry Smith y[16 * (*idx) + 5] += alpha6 * (*v); 21082f7816d4SBarry Smith y[16 * (*idx) + 6] += alpha7 * (*v); 21092f7816d4SBarry Smith y[16 * (*idx) + 7] += alpha8 * (*v); 21102f7816d4SBarry Smith y[16 * (*idx) + 8] += alpha9 * (*v); 21112f7816d4SBarry Smith y[16 * (*idx) + 9] += alpha10 * (*v); 21122f7816d4SBarry Smith y[16 * (*idx) + 10] += alpha11 * (*v); 21132f7816d4SBarry Smith y[16 * (*idx) + 11] += alpha12 * (*v); 21142f7816d4SBarry Smith y[16 * (*idx) + 12] += alpha13 * (*v); 21152f7816d4SBarry Smith y[16 * (*idx) + 13] += alpha14 * (*v); 21162f7816d4SBarry Smith y[16 * (*idx) + 14] += alpha15 * (*v); 21172f7816d4SBarry Smith y[16 * (*idx) + 15] += alpha16 * (*v); 21189371c9d4SSatish Balay idx++; 21199371c9d4SSatish Balay v++; 21202f7816d4SBarry Smith } 21212f7816d4SBarry Smith } 21229566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 21239566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 21249566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 21252f7816d4SBarry Smith PetscFunctionReturn(0); 21262f7816d4SBarry Smith } 21272f7816d4SBarry Smith 21289371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 21292f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 21302f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2131d2840e09SBarry Smith const PetscScalar *x, *v; 2132d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21332f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2134d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2135b24ad042SBarry Smith PetscInt n, i, jrow, j; 21362f7816d4SBarry Smith 21372f7816d4SBarry Smith PetscFunctionBegin; 21389566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 21399566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21409566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 21412f7816d4SBarry Smith idx = a->j; 21422f7816d4SBarry Smith v = a->a; 21432f7816d4SBarry Smith ii = a->i; 21442f7816d4SBarry Smith 21452f7816d4SBarry Smith for (i = 0; i < m; i++) { 21462f7816d4SBarry Smith jrow = ii[i]; 21472f7816d4SBarry Smith n = ii[i + 1] - jrow; 21482f7816d4SBarry Smith sum1 = 0.0; 21492f7816d4SBarry Smith sum2 = 0.0; 21502f7816d4SBarry Smith sum3 = 0.0; 21512f7816d4SBarry Smith sum4 = 0.0; 21522f7816d4SBarry Smith sum5 = 0.0; 21532f7816d4SBarry Smith sum6 = 0.0; 21542f7816d4SBarry Smith sum7 = 0.0; 21552f7816d4SBarry Smith sum8 = 0.0; 21562f7816d4SBarry Smith sum9 = 0.0; 21572f7816d4SBarry Smith sum10 = 0.0; 21582f7816d4SBarry Smith sum11 = 0.0; 21592f7816d4SBarry Smith sum12 = 0.0; 21602f7816d4SBarry Smith sum13 = 0.0; 21612f7816d4SBarry Smith sum14 = 0.0; 21622f7816d4SBarry Smith sum15 = 0.0; 21632f7816d4SBarry Smith sum16 = 0.0; 21642f7816d4SBarry Smith for (j = 0; j < n; j++) { 21652f7816d4SBarry Smith sum1 += v[jrow] * x[16 * idx[jrow]]; 21662f7816d4SBarry Smith sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 21672f7816d4SBarry Smith sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 21682f7816d4SBarry Smith sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 21692f7816d4SBarry Smith sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 21702f7816d4SBarry Smith sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 21712f7816d4SBarry Smith sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 21722f7816d4SBarry Smith sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 21732f7816d4SBarry Smith sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 21742f7816d4SBarry Smith sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 21752f7816d4SBarry Smith sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 21762f7816d4SBarry Smith sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 21772f7816d4SBarry Smith sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 21782f7816d4SBarry Smith sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 21792f7816d4SBarry Smith sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 21802f7816d4SBarry Smith sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 21812f7816d4SBarry Smith jrow++; 21822f7816d4SBarry Smith } 21832f7816d4SBarry Smith y[16 * i] += sum1; 21842f7816d4SBarry Smith y[16 * i + 1] += sum2; 21852f7816d4SBarry Smith y[16 * i + 2] += sum3; 21862f7816d4SBarry Smith y[16 * i + 3] += sum4; 21872f7816d4SBarry Smith y[16 * i + 4] += sum5; 21882f7816d4SBarry Smith y[16 * i + 5] += sum6; 21892f7816d4SBarry Smith y[16 * i + 6] += sum7; 21902f7816d4SBarry Smith y[16 * i + 7] += sum8; 21912f7816d4SBarry Smith y[16 * i + 8] += sum9; 21922f7816d4SBarry Smith y[16 * i + 9] += sum10; 21932f7816d4SBarry Smith y[16 * i + 10] += sum11; 21942f7816d4SBarry Smith y[16 * i + 11] += sum12; 21952f7816d4SBarry Smith y[16 * i + 12] += sum13; 21962f7816d4SBarry Smith y[16 * i + 13] += sum14; 21972f7816d4SBarry Smith y[16 * i + 14] += sum15; 21982f7816d4SBarry Smith y[16 * i + 15] += sum16; 21992f7816d4SBarry Smith } 22002f7816d4SBarry Smith 22019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 22029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 22039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 22042f7816d4SBarry Smith PetscFunctionReturn(0); 22052f7816d4SBarry Smith } 22062f7816d4SBarry Smith 22079371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 22082f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 22092f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2210d2840e09SBarry Smith const PetscScalar *x, *v; 2211d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 22122f7816d4SBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2213d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2214d2840e09SBarry Smith PetscInt n, i; 22152f7816d4SBarry Smith 22162f7816d4SBarry Smith PetscFunctionBegin; 22179566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 22189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 22199566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 22202f7816d4SBarry Smith for (i = 0; i < m; i++) { 2221bfec09a0SHong Zhang idx = a->j + a->i[i]; 2222bfec09a0SHong Zhang v = a->a + a->i[i]; 22232f7816d4SBarry Smith n = a->i[i + 1] - a->i[i]; 22242f7816d4SBarry Smith alpha1 = x[16 * i]; 22252f7816d4SBarry Smith alpha2 = x[16 * i + 1]; 22262f7816d4SBarry Smith alpha3 = x[16 * i + 2]; 22272f7816d4SBarry Smith alpha4 = x[16 * i + 3]; 22282f7816d4SBarry Smith alpha5 = x[16 * i + 4]; 22292f7816d4SBarry Smith alpha6 = x[16 * i + 5]; 22302f7816d4SBarry Smith alpha7 = x[16 * i + 6]; 22312f7816d4SBarry Smith alpha8 = x[16 * i + 7]; 22322f7816d4SBarry Smith alpha9 = x[16 * i + 8]; 22332f7816d4SBarry Smith alpha10 = x[16 * i + 9]; 22342f7816d4SBarry Smith alpha11 = x[16 * i + 10]; 22352f7816d4SBarry Smith alpha12 = x[16 * i + 11]; 22362f7816d4SBarry Smith alpha13 = x[16 * i + 12]; 22372f7816d4SBarry Smith alpha14 = x[16 * i + 13]; 22382f7816d4SBarry Smith alpha15 = x[16 * i + 14]; 22392f7816d4SBarry Smith alpha16 = x[16 * i + 15]; 22402f7816d4SBarry Smith while (n-- > 0) { 22412f7816d4SBarry Smith y[16 * (*idx)] += alpha1 * (*v); 22422f7816d4SBarry Smith y[16 * (*idx) + 1] += alpha2 * (*v); 22432f7816d4SBarry Smith y[16 * (*idx) + 2] += alpha3 * (*v); 22442f7816d4SBarry Smith y[16 * (*idx) + 3] += alpha4 * (*v); 22452f7816d4SBarry Smith y[16 * (*idx) + 4] += alpha5 * (*v); 22462f7816d4SBarry Smith y[16 * (*idx) + 5] += alpha6 * (*v); 22472f7816d4SBarry Smith y[16 * (*idx) + 6] += alpha7 * (*v); 22482f7816d4SBarry Smith y[16 * (*idx) + 7] += alpha8 * (*v); 22492f7816d4SBarry Smith y[16 * (*idx) + 8] += alpha9 * (*v); 22502f7816d4SBarry Smith y[16 * (*idx) + 9] += alpha10 * (*v); 22512f7816d4SBarry Smith y[16 * (*idx) + 10] += alpha11 * (*v); 22522f7816d4SBarry Smith y[16 * (*idx) + 11] += alpha12 * (*v); 22532f7816d4SBarry Smith y[16 * (*idx) + 12] += alpha13 * (*v); 22542f7816d4SBarry Smith y[16 * (*idx) + 13] += alpha14 * (*v); 22552f7816d4SBarry Smith y[16 * (*idx) + 14] += alpha15 * (*v); 22562f7816d4SBarry Smith y[16 * (*idx) + 15] += alpha16 * (*v); 22579371c9d4SSatish Balay idx++; 22589371c9d4SSatish Balay v++; 22592f7816d4SBarry Smith } 22602f7816d4SBarry Smith } 22619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 22629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 22639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 226466ed3db0SBarry Smith PetscFunctionReturn(0); 226566ed3db0SBarry Smith } 226666ed3db0SBarry Smith 2267ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 22689371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2269ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2270ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2271d2840e09SBarry Smith const PetscScalar *x, *v; 2272d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2273ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2274d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2275d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 2276ed1c418cSBarry Smith 2277ed1c418cSBarry Smith PetscFunctionBegin; 22789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 22799566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2280ed1c418cSBarry Smith idx = a->j; 2281ed1c418cSBarry Smith v = a->a; 2282ed1c418cSBarry Smith ii = a->i; 2283ed1c418cSBarry Smith 2284ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2285ed1c418cSBarry Smith jrow = ii[i]; 2286ed1c418cSBarry Smith n = ii[i + 1] - jrow; 2287ed1c418cSBarry Smith sum1 = 0.0; 2288ed1c418cSBarry Smith sum2 = 0.0; 2289ed1c418cSBarry Smith sum3 = 0.0; 2290ed1c418cSBarry Smith sum4 = 0.0; 2291ed1c418cSBarry Smith sum5 = 0.0; 2292ed1c418cSBarry Smith sum6 = 0.0; 2293ed1c418cSBarry Smith sum7 = 0.0; 2294ed1c418cSBarry Smith sum8 = 0.0; 2295ed1c418cSBarry Smith sum9 = 0.0; 2296ed1c418cSBarry Smith sum10 = 0.0; 2297ed1c418cSBarry Smith sum11 = 0.0; 2298ed1c418cSBarry Smith sum12 = 0.0; 2299ed1c418cSBarry Smith sum13 = 0.0; 2300ed1c418cSBarry Smith sum14 = 0.0; 2301ed1c418cSBarry Smith sum15 = 0.0; 2302ed1c418cSBarry Smith sum16 = 0.0; 2303ed1c418cSBarry Smith sum17 = 0.0; 2304ed1c418cSBarry Smith sum18 = 0.0; 230526fbe8dcSKarl Rupp 2306ed1c418cSBarry Smith nonzerorow += (n > 0); 2307ed1c418cSBarry Smith for (j = 0; j < n; j++) { 2308ed1c418cSBarry Smith sum1 += v[jrow] * x[18 * idx[jrow]]; 2309ed1c418cSBarry Smith sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2310ed1c418cSBarry Smith sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2311ed1c418cSBarry Smith sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2312ed1c418cSBarry Smith sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2313ed1c418cSBarry Smith sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2314ed1c418cSBarry Smith sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2315ed1c418cSBarry Smith sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2316ed1c418cSBarry Smith sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2317ed1c418cSBarry Smith sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2318ed1c418cSBarry Smith sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2319ed1c418cSBarry Smith sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2320ed1c418cSBarry Smith sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2321ed1c418cSBarry Smith sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2322ed1c418cSBarry Smith sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2323ed1c418cSBarry Smith sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2324ed1c418cSBarry Smith sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2325ed1c418cSBarry Smith sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2326ed1c418cSBarry Smith jrow++; 2327ed1c418cSBarry Smith } 2328ed1c418cSBarry Smith y[18 * i] = sum1; 2329ed1c418cSBarry Smith y[18 * i + 1] = sum2; 2330ed1c418cSBarry Smith y[18 * i + 2] = sum3; 2331ed1c418cSBarry Smith y[18 * i + 3] = sum4; 2332ed1c418cSBarry Smith y[18 * i + 4] = sum5; 2333ed1c418cSBarry Smith y[18 * i + 5] = sum6; 2334ed1c418cSBarry Smith y[18 * i + 6] = sum7; 2335ed1c418cSBarry Smith y[18 * i + 7] = sum8; 2336ed1c418cSBarry Smith y[18 * i + 8] = sum9; 2337ed1c418cSBarry Smith y[18 * i + 9] = sum10; 2338ed1c418cSBarry Smith y[18 * i + 10] = sum11; 2339ed1c418cSBarry Smith y[18 * i + 11] = sum12; 2340ed1c418cSBarry Smith y[18 * i + 12] = sum13; 2341ed1c418cSBarry Smith y[18 * i + 13] = sum14; 2342ed1c418cSBarry Smith y[18 * i + 14] = sum15; 2343ed1c418cSBarry Smith y[18 * i + 15] = sum16; 2344ed1c418cSBarry Smith y[18 * i + 16] = sum17; 2345ed1c418cSBarry Smith y[18 * i + 17] = sum18; 2346ed1c418cSBarry Smith } 2347ed1c418cSBarry Smith 23489566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow)); 23499566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 23509566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2351ed1c418cSBarry Smith PetscFunctionReturn(0); 2352ed1c418cSBarry Smith } 2353ed1c418cSBarry Smith 23549371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2355ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2356ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2357d2840e09SBarry Smith const PetscScalar *x, *v; 2358d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2359ed1c418cSBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2360d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2361d2840e09SBarry Smith PetscInt n, i; 2362ed1c418cSBarry Smith 2363ed1c418cSBarry Smith PetscFunctionBegin; 23649566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 23659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 23669566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2367ed1c418cSBarry Smith 2368ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2369ed1c418cSBarry Smith idx = a->j + a->i[i]; 2370ed1c418cSBarry Smith v = a->a + a->i[i]; 2371ed1c418cSBarry Smith n = a->i[i + 1] - a->i[i]; 2372ed1c418cSBarry Smith alpha1 = x[18 * i]; 2373ed1c418cSBarry Smith alpha2 = x[18 * i + 1]; 2374ed1c418cSBarry Smith alpha3 = x[18 * i + 2]; 2375ed1c418cSBarry Smith alpha4 = x[18 * i + 3]; 2376ed1c418cSBarry Smith alpha5 = x[18 * i + 4]; 2377ed1c418cSBarry Smith alpha6 = x[18 * i + 5]; 2378ed1c418cSBarry Smith alpha7 = x[18 * i + 6]; 2379ed1c418cSBarry Smith alpha8 = x[18 * i + 7]; 2380ed1c418cSBarry Smith alpha9 = x[18 * i + 8]; 2381ed1c418cSBarry Smith alpha10 = x[18 * i + 9]; 2382ed1c418cSBarry Smith alpha11 = x[18 * i + 10]; 2383ed1c418cSBarry Smith alpha12 = x[18 * i + 11]; 2384ed1c418cSBarry Smith alpha13 = x[18 * i + 12]; 2385ed1c418cSBarry Smith alpha14 = x[18 * i + 13]; 2386ed1c418cSBarry Smith alpha15 = x[18 * i + 14]; 2387ed1c418cSBarry Smith alpha16 = x[18 * i + 15]; 2388ed1c418cSBarry Smith alpha17 = x[18 * i + 16]; 2389ed1c418cSBarry Smith alpha18 = x[18 * i + 17]; 2390ed1c418cSBarry Smith while (n-- > 0) { 2391ed1c418cSBarry Smith y[18 * (*idx)] += alpha1 * (*v); 2392ed1c418cSBarry Smith y[18 * (*idx) + 1] += alpha2 * (*v); 2393ed1c418cSBarry Smith y[18 * (*idx) + 2] += alpha3 * (*v); 2394ed1c418cSBarry Smith y[18 * (*idx) + 3] += alpha4 * (*v); 2395ed1c418cSBarry Smith y[18 * (*idx) + 4] += alpha5 * (*v); 2396ed1c418cSBarry Smith y[18 * (*idx) + 5] += alpha6 * (*v); 2397ed1c418cSBarry Smith y[18 * (*idx) + 6] += alpha7 * (*v); 2398ed1c418cSBarry Smith y[18 * (*idx) + 7] += alpha8 * (*v); 2399ed1c418cSBarry Smith y[18 * (*idx) + 8] += alpha9 * (*v); 2400ed1c418cSBarry Smith y[18 * (*idx) + 9] += alpha10 * (*v); 2401ed1c418cSBarry Smith y[18 * (*idx) + 10] += alpha11 * (*v); 2402ed1c418cSBarry Smith y[18 * (*idx) + 11] += alpha12 * (*v); 2403ed1c418cSBarry Smith y[18 * (*idx) + 12] += alpha13 * (*v); 2404ed1c418cSBarry Smith y[18 * (*idx) + 13] += alpha14 * (*v); 2405ed1c418cSBarry Smith y[18 * (*idx) + 14] += alpha15 * (*v); 2406ed1c418cSBarry Smith y[18 * (*idx) + 15] += alpha16 * (*v); 2407ed1c418cSBarry Smith y[18 * (*idx) + 16] += alpha17 * (*v); 2408ed1c418cSBarry Smith y[18 * (*idx) + 17] += alpha18 * (*v); 24099371c9d4SSatish Balay idx++; 24109371c9d4SSatish Balay v++; 2411ed1c418cSBarry Smith } 2412ed1c418cSBarry Smith } 24139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 24149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2416ed1c418cSBarry Smith PetscFunctionReturn(0); 2417ed1c418cSBarry Smith } 2418ed1c418cSBarry Smith 24199371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2420ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2421ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2422d2840e09SBarry Smith const PetscScalar *x, *v; 2423d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2424ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2425d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2426ed1c418cSBarry Smith PetscInt n, i, jrow, j; 2427ed1c418cSBarry Smith 2428ed1c418cSBarry Smith PetscFunctionBegin; 24299566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 24309566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 24319566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 2432ed1c418cSBarry Smith idx = a->j; 2433ed1c418cSBarry Smith v = a->a; 2434ed1c418cSBarry Smith ii = a->i; 2435ed1c418cSBarry Smith 2436ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2437ed1c418cSBarry Smith jrow = ii[i]; 2438ed1c418cSBarry Smith n = ii[i + 1] - jrow; 2439ed1c418cSBarry Smith sum1 = 0.0; 2440ed1c418cSBarry Smith sum2 = 0.0; 2441ed1c418cSBarry Smith sum3 = 0.0; 2442ed1c418cSBarry Smith sum4 = 0.0; 2443ed1c418cSBarry Smith sum5 = 0.0; 2444ed1c418cSBarry Smith sum6 = 0.0; 2445ed1c418cSBarry Smith sum7 = 0.0; 2446ed1c418cSBarry Smith sum8 = 0.0; 2447ed1c418cSBarry Smith sum9 = 0.0; 2448ed1c418cSBarry Smith sum10 = 0.0; 2449ed1c418cSBarry Smith sum11 = 0.0; 2450ed1c418cSBarry Smith sum12 = 0.0; 2451ed1c418cSBarry Smith sum13 = 0.0; 2452ed1c418cSBarry Smith sum14 = 0.0; 2453ed1c418cSBarry Smith sum15 = 0.0; 2454ed1c418cSBarry Smith sum16 = 0.0; 2455ed1c418cSBarry Smith sum17 = 0.0; 2456ed1c418cSBarry Smith sum18 = 0.0; 2457ed1c418cSBarry Smith for (j = 0; j < n; j++) { 2458ed1c418cSBarry Smith sum1 += v[jrow] * x[18 * idx[jrow]]; 2459ed1c418cSBarry Smith sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2460ed1c418cSBarry Smith sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2461ed1c418cSBarry Smith sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2462ed1c418cSBarry Smith sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2463ed1c418cSBarry Smith sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2464ed1c418cSBarry Smith sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2465ed1c418cSBarry Smith sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2466ed1c418cSBarry Smith sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2467ed1c418cSBarry Smith sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2468ed1c418cSBarry Smith sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2469ed1c418cSBarry Smith sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2470ed1c418cSBarry Smith sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2471ed1c418cSBarry Smith sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2472ed1c418cSBarry Smith sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2473ed1c418cSBarry Smith sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2474ed1c418cSBarry Smith sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2475ed1c418cSBarry Smith sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2476ed1c418cSBarry Smith jrow++; 2477ed1c418cSBarry Smith } 2478ed1c418cSBarry Smith y[18 * i] += sum1; 2479ed1c418cSBarry Smith y[18 * i + 1] += sum2; 2480ed1c418cSBarry Smith y[18 * i + 2] += sum3; 2481ed1c418cSBarry Smith y[18 * i + 3] += sum4; 2482ed1c418cSBarry Smith y[18 * i + 4] += sum5; 2483ed1c418cSBarry Smith y[18 * i + 5] += sum6; 2484ed1c418cSBarry Smith y[18 * i + 6] += sum7; 2485ed1c418cSBarry Smith y[18 * i + 7] += sum8; 2486ed1c418cSBarry Smith y[18 * i + 8] += sum9; 2487ed1c418cSBarry Smith y[18 * i + 9] += sum10; 2488ed1c418cSBarry Smith y[18 * i + 10] += sum11; 2489ed1c418cSBarry Smith y[18 * i + 11] += sum12; 2490ed1c418cSBarry Smith y[18 * i + 12] += sum13; 2491ed1c418cSBarry Smith y[18 * i + 13] += sum14; 2492ed1c418cSBarry Smith y[18 * i + 14] += sum15; 2493ed1c418cSBarry Smith y[18 * i + 15] += sum16; 2494ed1c418cSBarry Smith y[18 * i + 16] += sum17; 2495ed1c418cSBarry Smith y[18 * i + 17] += sum18; 2496ed1c418cSBarry Smith } 2497ed1c418cSBarry Smith 24989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 24999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2501ed1c418cSBarry Smith PetscFunctionReturn(0); 2502ed1c418cSBarry Smith } 2503ed1c418cSBarry Smith 25049371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2505ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2506ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2507d2840e09SBarry Smith const PetscScalar *x, *v; 2508d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2509ed1c418cSBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2510d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2511d2840e09SBarry Smith PetscInt n, i; 2512ed1c418cSBarry Smith 2513ed1c418cSBarry Smith PetscFunctionBegin; 25149566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 25159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 25169566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 2517ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2518ed1c418cSBarry Smith idx = a->j + a->i[i]; 2519ed1c418cSBarry Smith v = a->a + a->i[i]; 2520ed1c418cSBarry Smith n = a->i[i + 1] - a->i[i]; 2521ed1c418cSBarry Smith alpha1 = x[18 * i]; 2522ed1c418cSBarry Smith alpha2 = x[18 * i + 1]; 2523ed1c418cSBarry Smith alpha3 = x[18 * i + 2]; 2524ed1c418cSBarry Smith alpha4 = x[18 * i + 3]; 2525ed1c418cSBarry Smith alpha5 = x[18 * i + 4]; 2526ed1c418cSBarry Smith alpha6 = x[18 * i + 5]; 2527ed1c418cSBarry Smith alpha7 = x[18 * i + 6]; 2528ed1c418cSBarry Smith alpha8 = x[18 * i + 7]; 2529ed1c418cSBarry Smith alpha9 = x[18 * i + 8]; 2530ed1c418cSBarry Smith alpha10 = x[18 * i + 9]; 2531ed1c418cSBarry Smith alpha11 = x[18 * i + 10]; 2532ed1c418cSBarry Smith alpha12 = x[18 * i + 11]; 2533ed1c418cSBarry Smith alpha13 = x[18 * i + 12]; 2534ed1c418cSBarry Smith alpha14 = x[18 * i + 13]; 2535ed1c418cSBarry Smith alpha15 = x[18 * i + 14]; 2536ed1c418cSBarry Smith alpha16 = x[18 * i + 15]; 2537ed1c418cSBarry Smith alpha17 = x[18 * i + 16]; 2538ed1c418cSBarry Smith alpha18 = x[18 * i + 17]; 2539ed1c418cSBarry Smith while (n-- > 0) { 2540ed1c418cSBarry Smith y[18 * (*idx)] += alpha1 * (*v); 2541ed1c418cSBarry Smith y[18 * (*idx) + 1] += alpha2 * (*v); 2542ed1c418cSBarry Smith y[18 * (*idx) + 2] += alpha3 * (*v); 2543ed1c418cSBarry Smith y[18 * (*idx) + 3] += alpha4 * (*v); 2544ed1c418cSBarry Smith y[18 * (*idx) + 4] += alpha5 * (*v); 2545ed1c418cSBarry Smith y[18 * (*idx) + 5] += alpha6 * (*v); 2546ed1c418cSBarry Smith y[18 * (*idx) + 6] += alpha7 * (*v); 2547ed1c418cSBarry Smith y[18 * (*idx) + 7] += alpha8 * (*v); 2548ed1c418cSBarry Smith y[18 * (*idx) + 8] += alpha9 * (*v); 2549ed1c418cSBarry Smith y[18 * (*idx) + 9] += alpha10 * (*v); 2550ed1c418cSBarry Smith y[18 * (*idx) + 10] += alpha11 * (*v); 2551ed1c418cSBarry Smith y[18 * (*idx) + 11] += alpha12 * (*v); 2552ed1c418cSBarry Smith y[18 * (*idx) + 12] += alpha13 * (*v); 2553ed1c418cSBarry Smith y[18 * (*idx) + 13] += alpha14 * (*v); 2554ed1c418cSBarry Smith y[18 * (*idx) + 14] += alpha15 * (*v); 2555ed1c418cSBarry Smith y[18 * (*idx) + 15] += alpha16 * (*v); 2556ed1c418cSBarry Smith y[18 * (*idx) + 16] += alpha17 * (*v); 2557ed1c418cSBarry Smith y[18 * (*idx) + 17] += alpha18 * (*v); 25589371c9d4SSatish Balay idx++; 25599371c9d4SSatish Balay v++; 2560ed1c418cSBarry Smith } 2561ed1c418cSBarry Smith } 25629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 25639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2565ed1c418cSBarry Smith PetscFunctionReturn(0); 2566ed1c418cSBarry Smith } 2567ed1c418cSBarry Smith 25689371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 25696861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 25706861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 25716861f193SBarry Smith const PetscScalar *x, *v; 25726861f193SBarry Smith PetscScalar *y, *sums; 25736861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 25746861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 25756861f193SBarry Smith 25766861f193SBarry Smith PetscFunctionBegin; 25779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 25789566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 25799566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 25806861f193SBarry Smith idx = a->j; 25816861f193SBarry Smith v = a->a; 25826861f193SBarry Smith ii = a->i; 25836861f193SBarry Smith 25846861f193SBarry Smith for (i = 0; i < m; i++) { 25856861f193SBarry Smith jrow = ii[i]; 25866861f193SBarry Smith n = ii[i + 1] - jrow; 25876861f193SBarry Smith sums = y + dof * i; 25886861f193SBarry Smith for (j = 0; j < n; j++) { 2589ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 25906861f193SBarry Smith jrow++; 25916861f193SBarry Smith } 25926861f193SBarry Smith } 25936861f193SBarry Smith 25949566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 25959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 25976861f193SBarry Smith PetscFunctionReturn(0); 25986861f193SBarry Smith } 25996861f193SBarry Smith 26009371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 26016861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26026861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26036861f193SBarry Smith const PetscScalar *x, *v; 26046861f193SBarry Smith PetscScalar *y, *sums; 26056861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 26066861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 26076861f193SBarry Smith 26086861f193SBarry Smith PetscFunctionBegin; 26099566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 26109566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26119566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 26126861f193SBarry Smith idx = a->j; 26136861f193SBarry Smith v = a->a; 26146861f193SBarry Smith ii = a->i; 26156861f193SBarry Smith 26166861f193SBarry Smith for (i = 0; i < m; i++) { 26176861f193SBarry Smith jrow = ii[i]; 26186861f193SBarry Smith n = ii[i + 1] - jrow; 26196861f193SBarry Smith sums = y + dof * i; 26206861f193SBarry Smith for (j = 0; j < n; j++) { 2621ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 26226861f193SBarry Smith jrow++; 26236861f193SBarry Smith } 26246861f193SBarry Smith } 26256861f193SBarry Smith 26269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 26296861f193SBarry Smith PetscFunctionReturn(0); 26306861f193SBarry Smith } 26316861f193SBarry Smith 26329371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 26336861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26346861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26356861f193SBarry Smith const PetscScalar *x, *v, *alpha; 26366861f193SBarry Smith PetscScalar *y; 26376861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 26386861f193SBarry Smith PetscInt n, i, k; 26396861f193SBarry Smith 26406861f193SBarry Smith PetscFunctionBegin; 26419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26429566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 26439566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 26446861f193SBarry Smith for (i = 0; i < m; i++) { 26456861f193SBarry Smith idx = a->j + a->i[i]; 26466861f193SBarry Smith v = a->a + a->i[i]; 26476861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 26486861f193SBarry Smith alpha = x + dof * i; 26496861f193SBarry Smith while (n-- > 0) { 2650ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 26519371c9d4SSatish Balay idx++; 26529371c9d4SSatish Balay v++; 26536861f193SBarry Smith } 26546861f193SBarry Smith } 26559566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26569566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 26586861f193SBarry Smith PetscFunctionReturn(0); 26596861f193SBarry Smith } 26606861f193SBarry Smith 26619371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 26626861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26636861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26646861f193SBarry Smith const PetscScalar *x, *v, *alpha; 26656861f193SBarry Smith PetscScalar *y; 26666861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 26676861f193SBarry Smith PetscInt n, i, k; 26686861f193SBarry Smith 26696861f193SBarry Smith PetscFunctionBegin; 26709566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 26719566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26729566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 26736861f193SBarry Smith for (i = 0; i < m; i++) { 26746861f193SBarry Smith idx = a->j + a->i[i]; 26756861f193SBarry Smith v = a->a + a->i[i]; 26766861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 26776861f193SBarry Smith alpha = x + dof * i; 26786861f193SBarry Smith while (n-- > 0) { 2679ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 26809371c9d4SSatish Balay idx++; 26819371c9d4SSatish Balay v++; 26826861f193SBarry Smith } 26836861f193SBarry Smith } 26849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 26876861f193SBarry Smith PetscFunctionReturn(0); 26886861f193SBarry Smith } 26896861f193SBarry Smith 2690f4a53059SBarry Smith /*===================================================================================*/ 26919371c9d4SSatish Balay PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 26924cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2693f4a53059SBarry Smith 2694b24ad042SBarry Smith PetscFunctionBegin; 2695f4a53059SBarry Smith /* start the scatter */ 26969566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 26979566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 26989566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 26999566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 2700f4a53059SBarry Smith PetscFunctionReturn(0); 2701f4a53059SBarry Smith } 2702f4a53059SBarry Smith 27039371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 27044cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2705b24ad042SBarry Smith 27064cb9d9b8SBarry Smith PetscFunctionBegin; 27079566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 27089566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 27099566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 27109566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 27114cb9d9b8SBarry Smith PetscFunctionReturn(0); 27124cb9d9b8SBarry Smith } 27134cb9d9b8SBarry Smith 27149371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 27154cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 27164cb9d9b8SBarry Smith 2717b24ad042SBarry Smith PetscFunctionBegin; 27184cb9d9b8SBarry Smith /* start the scatter */ 27199566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27209566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 27219566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27229566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 27234cb9d9b8SBarry Smith PetscFunctionReturn(0); 27244cb9d9b8SBarry Smith } 27254cb9d9b8SBarry Smith 27269371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 27274cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2728b24ad042SBarry Smith 27294cb9d9b8SBarry Smith PetscFunctionBegin; 27309566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 27319566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 27329566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 27339566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 27344cb9d9b8SBarry Smith PetscFunctionReturn(0); 27354cb9d9b8SBarry Smith } 27364cb9d9b8SBarry Smith 273795ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 27389371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) { 27394222ddf1SHong Zhang Mat_Product *product = C->product; 27404222ddf1SHong Zhang 27414222ddf1SHong Zhang PetscFunctionBegin; 27424222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 27434222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 274498921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 27454222ddf1SHong Zhang PetscFunctionReturn(0); 27464222ddf1SHong Zhang } 27474222ddf1SHong Zhang 27489371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) { 27494222ddf1SHong Zhang Mat_Product *product = C->product; 27504222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 27514222ddf1SHong Zhang Mat A = product->A, P = product->B; 27524222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 27534222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 27544222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 27554222ddf1SHong Zhang PetscInt nalg = 4; 27564222ddf1SHong Zhang #else 27574222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 27584222ddf1SHong Zhang PetscInt nalg = 5; 27594222ddf1SHong Zhang #endif 27604222ddf1SHong Zhang 27614222ddf1SHong Zhang PetscFunctionBegin; 276208401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 27634222ddf1SHong Zhang 27644222ddf1SHong Zhang /* PtAP */ 27654222ddf1SHong Zhang /* Check matrix local sizes */ 27669371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 27679371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 27689371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 27699371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 27704222ddf1SHong Zhang 27714222ddf1SHong Zhang /* Set the default algorithm */ 27729566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 277348a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 27744222ddf1SHong Zhang 27754222ddf1SHong Zhang /* Get runtime option */ 2776d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 27779566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 277848a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2779d0609cedSBarry Smith PetscOptionsEnd(); 27804222ddf1SHong Zhang 27819566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 27824222ddf1SHong Zhang if (flg) { 27834222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 27844222ddf1SHong Zhang PetscFunctionReturn(0); 27854222ddf1SHong Zhang } 27864222ddf1SHong Zhang 27879566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 27884222ddf1SHong Zhang if (flg) { 27894222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 27904222ddf1SHong Zhang PetscFunctionReturn(0); 27914222ddf1SHong Zhang } 27924222ddf1SHong Zhang 27934222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 27949566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 27959566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 27969566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 27974222ddf1SHong Zhang PetscFunctionReturn(0); 27984222ddf1SHong Zhang } 27994222ddf1SHong Zhang 28004222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 28019371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) { 28020298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 28037ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 28047ba1a0bfSKris Buschelman Mat P = pp->AIJ; 28057ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 2806d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 28077ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 2808d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 2809d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 28107ba1a0bfSKris Buschelman MatScalar *ca; 2811d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 28127ba1a0bfSKris Buschelman 28137ba1a0bfSKris Buschelman PetscFunctionBegin; 28147ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 28159566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 28167ba1a0bfSKris Buschelman 28177ba1a0bfSKris Buschelman cn = pn * ppdof; 28187ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 28197ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 28209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 28217ba1a0bfSKris Buschelman ci[0] = 0; 28227ba1a0bfSKris Buschelman 28237ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 28249566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 28259566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 28269566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 28277ba1a0bfSKris Buschelman 28287ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 28297ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 28307ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 28319566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 28327ba1a0bfSKris Buschelman current_space = free_space; 28337ba1a0bfSKris Buschelman 28347ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 28357ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 28367ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 28377ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 28387ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 28397ba1a0bfSKris Buschelman ptanzi = 0; 28407ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 28417ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 28427ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 28437ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 28447ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 28457ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 28467ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 28477ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 28487ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 28497ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 28507ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 28517ba1a0bfSKris Buschelman } 28527ba1a0bfSKris Buschelman } 28537ba1a0bfSKris Buschelman } 28547ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 28557ba1a0bfSKris Buschelman ptaj = ptasparserow; 28567ba1a0bfSKris Buschelman cnzi = 0; 28577ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 28587ba1a0bfSKris Buschelman /* Get offset within block of P */ 28597ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 28607ba1a0bfSKris Buschelman /* Get block row of P */ 28617ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 28627ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 28637ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 28647ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 28657ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 28667ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 28677ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 28687ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 28697ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 28707ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 28717ba1a0bfSKris Buschelman } 28727ba1a0bfSKris Buschelman } 28737ba1a0bfSKris Buschelman } 28747ba1a0bfSKris Buschelman 28757ba1a0bfSKris Buschelman /* sort sparserow */ 28769566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 28777ba1a0bfSKris Buschelman 28787ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 28797ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 288048a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 28817ba1a0bfSKris Buschelman 28827ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 28839566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 288426fbe8dcSKarl Rupp 28857ba1a0bfSKris Buschelman current_space->array += cnzi; 28867ba1a0bfSKris Buschelman current_space->local_used += cnzi; 28877ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 28887ba1a0bfSKris Buschelman 288926fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 289026fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 289126fbe8dcSKarl Rupp 28927ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 28937ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 28947ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 28957ba1a0bfSKris Buschelman } 28967ba1a0bfSKris Buschelman } 28977ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 28987ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 28997ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 29009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 29019566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 29029566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 29037ba1a0bfSKris Buschelman 29047ba1a0bfSKris Buschelman /* Allocate space for ca */ 29059566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 29067ba1a0bfSKris Buschelman 29077ba1a0bfSKris Buschelman /* put together the new matrix */ 29089566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 29099566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 29107ba1a0bfSKris Buschelman 29117ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 29127ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 29134222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 2914e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2915e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 29167ba1a0bfSKris Buschelman c->nonew = 0; 291726fbe8dcSKarl Rupp 29184222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29194222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 29207ba1a0bfSKris Buschelman 29217ba1a0bfSKris Buschelman /* Clean up. */ 29229566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 29237ba1a0bfSKris Buschelman PetscFunctionReturn(0); 29247ba1a0bfSKris Buschelman } 29257ba1a0bfSKris Buschelman 29269371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) { 29277ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 29287ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 29297ba1a0bfSKris Buschelman Mat P = pp->AIJ; 29307ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29317ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 29327ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 2933a2ea699eSBarry Smith const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 2934d2840e09SBarry Smith const PetscInt *ci = c->i, *cj = c->j, *cjj; 2935d2840e09SBarry Smith const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 2936d2840e09SBarry Smith PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 2937a2ea699eSBarry Smith const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 2938d2840e09SBarry Smith MatScalar *ca = c->a, *caj, *apa; 29397ba1a0bfSKris Buschelman 29407ba1a0bfSKris Buschelman PetscFunctionBegin; 29417ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 29429566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 29437ba1a0bfSKris Buschelman 29447ba1a0bfSKris Buschelman /* Clear old values in C */ 29459566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 29467ba1a0bfSKris Buschelman 29477ba1a0bfSKris Buschelman for (i = 0; i < am; i++) { 29487ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 29497ba1a0bfSKris Buschelman anzi = ai[i + 1] - ai[i]; 29507ba1a0bfSKris Buschelman apnzj = 0; 29517ba1a0bfSKris Buschelman for (j = 0; j < anzi; j++) { 29527ba1a0bfSKris Buschelman /* Get offset within block of P */ 29537ba1a0bfSKris Buschelman pshift = *aj % ppdof; 29547ba1a0bfSKris Buschelman /* Get block row of P */ 29557ba1a0bfSKris Buschelman prow = *aj++ / ppdof; /* integer division */ 29567ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 29577ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29587ba1a0bfSKris Buschelman paj = pa + pi[prow]; 29597ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 29607ba1a0bfSKris Buschelman poffset = pjj[k] * ppdof + pshift; 29617ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 29627ba1a0bfSKris Buschelman apjdense[poffset] = -1; 29637ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 29647ba1a0bfSKris Buschelman } 29657ba1a0bfSKris Buschelman apa[poffset] += (*aa) * paj[k]; 29667ba1a0bfSKris Buschelman } 29679566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 29687ba1a0bfSKris Buschelman aa++; 29697ba1a0bfSKris Buschelman } 29707ba1a0bfSKris Buschelman 29717ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 29727ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 29739566063dSJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 29747ba1a0bfSKris Buschelman 29757ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 29767ba1a0bfSKris Buschelman prow = i / ppdof; /* integer division */ 29777ba1a0bfSKris Buschelman pshift = i % ppdof; 29787ba1a0bfSKris Buschelman poffset = pi[prow]; 29797ba1a0bfSKris Buschelman pnzi = pi[prow + 1] - poffset; 29807ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 29817ba1a0bfSKris Buschelman pJ = pj + poffset; 29827ba1a0bfSKris Buschelman pA = pa + poffset; 29837ba1a0bfSKris Buschelman for (j = 0; j < pnzi; j++) { 29847ba1a0bfSKris Buschelman crow = (*pJ) * ppdof + pshift; 29857ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 29867ba1a0bfSKris Buschelman caj = ca + ci[crow]; 29877ba1a0bfSKris Buschelman pJ++; 29887ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 29897ba1a0bfSKris Buschelman for (k = 0, nextap = 0; nextap < apnzj; k++) { 299026fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 29917ba1a0bfSKris Buschelman } 29929566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 29937ba1a0bfSKris Buschelman pA++; 29947ba1a0bfSKris Buschelman } 29957ba1a0bfSKris Buschelman 29967ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 29977ba1a0bfSKris Buschelman for (j = 0; j < apnzj; j++) { 29987ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 29997ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 30007ba1a0bfSKris Buschelman } 30017ba1a0bfSKris Buschelman } 30027ba1a0bfSKris Buschelman 30037ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 30049566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 30059566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 30069566063dSJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 300795ddefa2SHong Zhang PetscFunctionReturn(0); 300895ddefa2SHong Zhang } 30097ba1a0bfSKris Buschelman 30109371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) { 30114222ddf1SHong Zhang Mat_Product *product = C->product; 30124222ddf1SHong Zhang Mat A = product->A, P = product->B; 30132121bac1SHong Zhang 30142121bac1SHong Zhang PetscFunctionBegin; 30159566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 30162121bac1SHong Zhang PetscFunctionReturn(0); 30172121bac1SHong Zhang } 30182121bac1SHong Zhang 30199371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) { 302095ddefa2SHong Zhang PetscFunctionBegin; 30213e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 302295ddefa2SHong Zhang } 302395ddefa2SHong Zhang 30249371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) { 302595ddefa2SHong Zhang PetscFunctionBegin; 30263e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 30277ba1a0bfSKris Buschelman } 30285c65b9ecSFande Kong 3029bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 3030bc8e477aSFande Kong 30319371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) { 3032bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3033bc8e477aSFande Kong 3034bc8e477aSFande Kong PetscFunctionBegin; 303534bcad68SFande Kong 30369566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 3037bc8e477aSFande Kong PetscFunctionReturn(0); 3038bc8e477aSFande Kong } 3039bc8e477aSFande Kong 30404222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 3041bc8e477aSFande Kong 30429371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) { 3043bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3044bc8e477aSFande Kong 3045bc8e477aSFande Kong PetscFunctionBegin; 30469566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 30474222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3048bc8e477aSFande Kong PetscFunctionReturn(0); 3049bc8e477aSFande Kong } 3050bc8e477aSFande Kong 3051bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 3052bc8e477aSFande Kong 30539371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) { 3054bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3055bc8e477aSFande Kong 3056bc8e477aSFande Kong PetscFunctionBegin; 305734bcad68SFande Kong 30589566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 3059bc8e477aSFande Kong PetscFunctionReturn(0); 3060bc8e477aSFande Kong } 3061bc8e477aSFande Kong 30624222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 3063bc8e477aSFande Kong 30649371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) { 3065bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3066bc8e477aSFande Kong 3067bc8e477aSFande Kong PetscFunctionBegin; 306834bcad68SFande Kong 30699566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 30704222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3071bc8e477aSFande Kong PetscFunctionReturn(0); 3072bc8e477aSFande Kong } 3073bc8e477aSFande Kong 30749371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) { 30754222ddf1SHong Zhang Mat_Product *product = C->product; 30764222ddf1SHong Zhang Mat A = product->A, P = product->B; 30774222ddf1SHong Zhang PetscBool flg; 3078bc8e477aSFande Kong 3079bc8e477aSFande Kong PetscFunctionBegin; 30809566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 30814222ddf1SHong Zhang if (flg) { 30829566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 30834222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 30844222ddf1SHong Zhang PetscFunctionReturn(0); 3085bc8e477aSFande Kong } 308634bcad68SFande Kong 30879566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 30884222ddf1SHong Zhang if (flg) { 30899566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 30904222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 30914222ddf1SHong Zhang PetscFunctionReturn(0); 30924222ddf1SHong Zhang } 309334bcad68SFande Kong 30944222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 3095bc8e477aSFande Kong } 3096bc8e477aSFande Kong 30979371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 30989c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3099ceb03754SKris Buschelman Mat a = b->AIJ, B; 31009c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 31010fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 3102ba8c8a56SBarry Smith PetscInt *cols; 3103ba8c8a56SBarry Smith PetscScalar *vals; 31049c4fc4c7SBarry Smith 31059c4fc4c7SBarry Smith PetscFunctionBegin; 31069566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 31079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 31089c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 31099c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 311026fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 31119c4fc4c7SBarry Smith } 31129566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 31139566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 31149566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 31159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 31169566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 31179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 31189c4fc4c7SBarry Smith ii = 0; 31199c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 31209566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 31210fd73130SBarry Smith for (j = 0; j < dof; j++) { 312226fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 31239566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 31249c4fc4c7SBarry Smith ii++; 31259c4fc4c7SBarry Smith } 31269566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 31279c4fc4c7SBarry Smith } 31289566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 31299566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 31309566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3131ceb03754SKris Buschelman 3132511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 31339566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 3134c3d102feSKris Buschelman } else { 3135ceb03754SKris Buschelman *newmat = B; 3136c3d102feSKris Buschelman } 31379c4fc4c7SBarry Smith PetscFunctionReturn(0); 31389c4fc4c7SBarry Smith } 31399c4fc4c7SBarry Smith 3140c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3141be1d678aSKris Buschelman 31429371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 31430fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 3144ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 31450fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 31460fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 31470fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 31480fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 31490298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 31500298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 31510fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 31520fd73130SBarry Smith PetscScalar *vals, *ovals; 31530fd73130SBarry Smith 31540fd73130SBarry Smith PetscFunctionBegin; 31559566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 3156d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 31570fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 31580fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 31590fd73130SBarry Smith for (j = 0; j < dof; j++) { 31600fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 31610fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 31620fd73130SBarry Smith } 31630fd73130SBarry Smith } 31649566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 31659566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 31669566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 31679566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 31689566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 31699566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 31700fd73130SBarry Smith 31719566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 3172d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 3173d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 31740fd73130SBarry Smith garray = mpiaij->garray; 31750fd73130SBarry Smith 31760fd73130SBarry Smith ii = rstart; 3177d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 31789566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 31799566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 31800fd73130SBarry Smith for (j = 0; j < dof; j++) { 3181ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 3182ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 31839566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 31849566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 31850fd73130SBarry Smith ii++; 31860fd73130SBarry Smith } 31879566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 31889566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 31890fd73130SBarry Smith } 31909566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 31910fd73130SBarry Smith 31929566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 31939566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3194ceb03754SKris Buschelman 3195511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 31967adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 31977adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 319826fbe8dcSKarl Rupp 31999566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 320026fbe8dcSKarl Rupp 32017adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3202c3d102feSKris Buschelman } else { 3203ceb03754SKris Buschelman *newmat = B; 3204c3d102feSKris Buschelman } 32050fd73130SBarry Smith PetscFunctionReturn(0); 32060fd73130SBarry Smith } 32070fd73130SBarry Smith 32089371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 32099e516c8fSBarry Smith Mat A; 32109e516c8fSBarry Smith 32119e516c8fSBarry Smith PetscFunctionBegin; 32129566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 32139566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 32149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 32159e516c8fSBarry Smith PetscFunctionReturn(0); 32169e516c8fSBarry Smith } 32170fd73130SBarry Smith 32189371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) { 3219ec626240SStefano Zampini Mat A; 3220ec626240SStefano Zampini 3221ec626240SStefano Zampini PetscFunctionBegin; 32229566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 32239566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 32249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 3225ec626240SStefano Zampini PetscFunctionReturn(0); 3226ec626240SStefano Zampini } 3227ec626240SStefano Zampini 3228bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3229480dffcfSJed Brown /*@ 32300bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 32310bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 3232*11a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 3233*11a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 32340bad9183SKris Buschelman 3235ff585edeSJed Brown Collective 3236ff585edeSJed Brown 3237ff585edeSJed Brown Input Parameters: 3238*11a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 3239ff585edeSJed Brown - dof - the block size (number of components per node) 3240ff585edeSJed Brown 3241ff585edeSJed Brown Output Parameter: 3242*11a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 3243ff585edeSJed Brown 32440bad9183SKris Buschelman Operations provided: 324567b8a455SSatish Balay .vb 3246*11a5261eSBarry Smith MatMult() 3247*11a5261eSBarry Smith MatMultTranspose() 3248*11a5261eSBarry Smith MatMultAdd() 3249*11a5261eSBarry Smith MatMultTransposeAdd() 3250*11a5261eSBarry Smith MatView() 325167b8a455SSatish Balay .ve 32520bad9183SKris Buschelman 32530bad9183SKris Buschelman Level: advanced 32540bad9183SKris Buschelman 3255*11a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3256ff585edeSJed Brown @*/ 32579371c9d4SSatish Balay PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) { 3258b24ad042SBarry Smith PetscInt n; 325982b1193eSBarry Smith Mat B; 326090f67b22SBarry Smith PetscBool flg; 3261fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3262fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3263fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3264fdc842d1SBarry Smith #endif 326582b1193eSBarry Smith 326682b1193eSBarry Smith PetscFunctionBegin; 3267fdc842d1SBarry Smith dof = PetscAbs(dof); 32689566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 3269d72c5c08SBarry Smith 327026fbe8dcSKarl Rupp if (dof == 1) *maij = A; 327126fbe8dcSKarl Rupp else { 32729566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3273c248e2fdSStefano Zampini /* propagate vec type */ 32749566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 32759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 32769566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 32779566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 32789566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 32799566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 328026fbe8dcSKarl Rupp 3281cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3282d72c5c08SBarry Smith 328390f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 328490f67b22SBarry Smith if (flg) { 3285feb9fe23SJed Brown Mat_SeqMAIJ *b; 3286feb9fe23SJed Brown 32879566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 328826fbe8dcSKarl Rupp 32890298fd71SBarry Smith B->ops->setup = NULL; 32904cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 32910fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 32924222ddf1SHong Zhang 3293feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 3294b9b97703SBarry Smith b->dof = dof; 32954cb9d9b8SBarry Smith b->AIJ = A; 329626fbe8dcSKarl Rupp 3297d72c5c08SBarry Smith if (dof == 2) { 3298cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3299cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3300cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3301cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3302bcc973b7SBarry Smith } else if (dof == 3) { 3303bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3304bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3305bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3306bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3307bcc973b7SBarry Smith } else if (dof == 4) { 3308bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3309bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3310bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3311bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3312f9fae5adSBarry Smith } else if (dof == 5) { 3313f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3314f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3315f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3316f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 33176cd98798SBarry Smith } else if (dof == 6) { 33186cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 33196cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 33206cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 33216cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3322ed8eea03SBarry Smith } else if (dof == 7) { 3323ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3324ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3325ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3326ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 332766ed3db0SBarry Smith } else if (dof == 8) { 332866ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 332966ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 333066ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 333166ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 33322b692628SMatthew Knepley } else if (dof == 9) { 33332b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 33342b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 33352b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 33362b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3337b9cda22cSBarry Smith } else if (dof == 10) { 3338b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3339b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3340b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3341b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3342dbdd7285SBarry Smith } else if (dof == 11) { 3343dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3344dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3345dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3346dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 33472f7816d4SBarry Smith } else if (dof == 16) { 33482f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 33492f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 33502f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 33512f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3352ed1c418cSBarry Smith } else if (dof == 18) { 3353ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3354ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3355ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3356ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 335782b1193eSBarry Smith } else { 33586861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 33596861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 33606861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 33616861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 336282b1193eSBarry Smith } 3363fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 33649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 3365fdc842d1SBarry Smith #endif 33669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 33679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3368f4a53059SBarry Smith } else { 3369f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3370feb9fe23SJed Brown Mat_MPIMAIJ *b; 3371f4a53059SBarry Smith IS from, to; 3372f4a53059SBarry Smith Vec gvec; 3373f4a53059SBarry Smith 33749566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 337526fbe8dcSKarl Rupp 33760298fd71SBarry Smith B->ops->setup = NULL; 3377d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 33780fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 337926fbe8dcSKarl Rupp 3380b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 3381b9b97703SBarry Smith b->dof = dof; 3382b9b97703SBarry Smith b->A = A; 338326fbe8dcSKarl Rupp 33849566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 33859566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 33864cb9d9b8SBarry Smith 33879566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 33889566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 33899566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 33909566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 33919566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 3392f4a53059SBarry Smith 3393f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 33949566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 33959566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 3396f4a53059SBarry Smith 3397f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 33989566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 3399f4a53059SBarry Smith 3400f4a53059SBarry Smith /* generate the scatter context */ 34019566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 3402f4a53059SBarry Smith 34039566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 34049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 34059566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 3406f4a53059SBarry Smith 3407f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 34084cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 34094cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 34104cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 341126fbe8dcSKarl Rupp 3412fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 34139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 3414fdc842d1SBarry Smith #endif 34159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 34169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3417f4a53059SBarry Smith } 34187dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3419ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 34209566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3421fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3422fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 3423fdc842d1SBarry Smith { 3424fdc842d1SBarry Smith PetscBool flg; 3425fdc842d1SBarry Smith if (convert) { 34269566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 342748a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 3428fdc842d1SBarry Smith } 3429fdc842d1SBarry Smith } 3430fdc842d1SBarry Smith #endif 3431cd3bbe55SBarry Smith *maij = B; 3432d72c5c08SBarry Smith } 343382b1193eSBarry Smith PetscFunctionReturn(0); 343482b1193eSBarry Smith } 3435