1be1d678aSKris Buschelman 2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 482b1193eSBarry Smith 5b350b9acSSatish Balay /*@ 611a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7ff585edeSJed Brown 811a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9ff585edeSJed Brown 10ff585edeSJed Brown Input Parameter: 1111a5261eSBarry Smith . A - the `MATMAIJ` matrix 12ff585edeSJed Brown 13ff585edeSJed Brown Output Parameter: 1411a5261eSBarry Smith . B - the `MATAIJ` matrix 15ff585edeSJed Brown 16ff585edeSJed Brown Level: advanced 17ff585edeSJed Brown 1811a5261eSBarry Smith Note: 1911a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20ff585edeSJed Brown 2111a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22ff585edeSJed Brown @*/ 23*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24*d71ae5a4SJacob Faibussowitsch { 25ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 26b9b97703SBarry Smith 27b9b97703SBarry Smith PetscFunctionBegin; 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 299566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30b9b97703SBarry Smith if (ismpimaij) { 31b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32b9b97703SBarry Smith 33b9b97703SBarry Smith *B = b->A; 34b9b97703SBarry Smith } else if (isseqmaij) { 35b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->AIJ; 38b9b97703SBarry Smith } else { 39b9b97703SBarry Smith *B = A; 40b9b97703SBarry Smith } 41b9b97703SBarry Smith PetscFunctionReturn(0); 42b9b97703SBarry Smith } 43b9b97703SBarry Smith 44480dffcfSJed Brown /*@ 4511a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46ff585edeSJed Brown 473f9fe445SBarry Smith Logically Collective 48ff585edeSJed Brown 49d8d19677SJose E. Roman Input Parameters: 5011a5261eSBarry Smith + A - the `MATMAIJ` matrix 51ff585edeSJed Brown - dof - the block size for the new matrix 52ff585edeSJed Brown 53ff585edeSJed Brown Output Parameter: 5411a5261eSBarry Smith . B - the new `MATMAIJ` matrix 55ff585edeSJed Brown 56ff585edeSJed Brown Level: advanced 57ff585edeSJed Brown 5811a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()` 59ff585edeSJed Brown @*/ 60*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61*d71ae5a4SJacob Faibussowitsch { 620298fd71SBarry Smith Mat Aij = NULL; 63b9b97703SBarry Smith 64b9b97703SBarry Smith PetscFunctionBegin; 65c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 669566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 679566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 68b9b97703SBarry Smith PetscFunctionReturn(0); 69b9b97703SBarry Smith } 70b9b97703SBarry Smith 71*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72*d71ae5a4SJacob Faibussowitsch { 734cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7482b1193eSBarry Smith 7582b1193eSBarry Smith PetscFunctionBegin; 769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 779566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 782e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 814cb9d9b8SBarry Smith PetscFunctionReturn(0); 824cb9d9b8SBarry Smith } 834cb9d9b8SBarry Smith 84*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_MAIJ(Mat A) 85*d71ae5a4SJacob Faibussowitsch { 86356c569eSBarry Smith PetscFunctionBegin; 87ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88356c569eSBarry Smith } 89356c569eSBarry Smith 90*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91*d71ae5a4SJacob Faibussowitsch { 920fd73130SBarry Smith Mat B; 930fd73130SBarry Smith 940fd73130SBarry Smith PetscFunctionBegin; 959566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 969566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 980fd73130SBarry Smith PetscFunctionReturn(0); 990fd73130SBarry Smith } 1000fd73130SBarry Smith 101*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102*d71ae5a4SJacob Faibussowitsch { 1030fd73130SBarry Smith Mat B; 1040fd73130SBarry Smith 1050fd73130SBarry Smith PetscFunctionBegin; 1069566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1079566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1090fd73130SBarry Smith PetscFunctionReturn(0); 1100fd73130SBarry Smith } 1110fd73130SBarry Smith 112*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113*d71ae5a4SJacob Faibussowitsch { 1144cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1154cb9d9b8SBarry Smith 1164cb9d9b8SBarry Smith PetscFunctionBegin; 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1219566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1229566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1232e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1269566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 127b9b97703SBarry Smith PetscFunctionReturn(0); 128b9b97703SBarry Smith } 129b9b97703SBarry Smith 1300bad9183SKris Buschelman /*MC 131fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1320bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 13311a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1340bad9183SKris Buschelman 1350bad9183SKris Buschelman Operations provided: 13667b8a455SSatish Balay .vb 13711a5261eSBarry Smith MatMult() 13811a5261eSBarry Smith MatMultTranspose() 13911a5261eSBarry Smith MatMultAdd() 14011a5261eSBarry Smith MatMultTransposeAdd() 14167b8a455SSatish Balay .ve 1420bad9183SKris Buschelman 1430bad9183SKris Buschelman Level: advanced 1440bad9183SKris Buschelman 14511a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1460bad9183SKris Buschelman M*/ 1470bad9183SKris Buschelman 148*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149*d71ae5a4SJacob Faibussowitsch { 1504cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15164b52464SHong Zhang PetscMPIInt size; 15282b1193eSBarry Smith 15382b1193eSBarry Smith PetscFunctionBegin; 1544dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 155b0a32e0cSBarry Smith A->data = (void *)b; 15626fbe8dcSKarl Rupp 1579566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15826fbe8dcSKarl Rupp 159356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 160f4a53059SBarry Smith 161f4259b30SLisandro Dalcin b->AIJ = NULL; 162cd3bbe55SBarry Smith b->dof = 0; 163f4259b30SLisandro Dalcin b->OAIJ = NULL; 164f4259b30SLisandro Dalcin b->ctx = NULL; 165f4259b30SLisandro Dalcin b->w = NULL; 1669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 16764b52464SHong Zhang if (size == 1) { 1689566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16964b52464SHong Zhang } else { 1709566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 17164b52464SHong Zhang } 17232e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 17332e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 17482b1193eSBarry Smith PetscFunctionReturn(0); 17582b1193eSBarry Smith } 17682b1193eSBarry Smith 177cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 178*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 179*d71ae5a4SJacob Faibussowitsch { 1804cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 181bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 182d2840e09SBarry Smith const PetscScalar *x, *v; 183d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 184d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 185d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 18682b1193eSBarry Smith 187bcc973b7SBarry Smith PetscFunctionBegin; 1889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1899566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 190bcc973b7SBarry Smith idx = a->j; 191bcc973b7SBarry Smith v = a->a; 192bcc973b7SBarry Smith ii = a->i; 193bcc973b7SBarry Smith 194bcc973b7SBarry Smith for (i = 0; i < m; i++) { 195bcc973b7SBarry Smith jrow = ii[i]; 196bcc973b7SBarry Smith n = ii[i + 1] - jrow; 197bcc973b7SBarry Smith sum1 = 0.0; 198bcc973b7SBarry Smith sum2 = 0.0; 19926fbe8dcSKarl Rupp 200b3c51c66SMatthew Knepley nonzerorow += (n > 0); 201bcc973b7SBarry Smith for (j = 0; j < n; j++) { 202bcc973b7SBarry Smith sum1 += v[jrow] * x[2 * idx[jrow]]; 203bcc973b7SBarry Smith sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 204bcc973b7SBarry Smith jrow++; 205bcc973b7SBarry Smith } 206bcc973b7SBarry Smith y[2 * i] = sum1; 207bcc973b7SBarry Smith y[2 * i + 1] = sum2; 208bcc973b7SBarry Smith } 209bcc973b7SBarry Smith 2109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow)); 2119566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2129566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 21382b1193eSBarry Smith PetscFunctionReturn(0); 21482b1193eSBarry Smith } 215bcc973b7SBarry Smith 216*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 217*d71ae5a4SJacob Faibussowitsch { 2184cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 219bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 220d2840e09SBarry Smith const PetscScalar *x, *v; 221d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2; 222d2840e09SBarry Smith PetscInt n, i; 223d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 22482b1193eSBarry Smith 225bcc973b7SBarry Smith PetscFunctionBegin; 2269566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 2279566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2289566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 229bfec09a0SHong Zhang 230bcc973b7SBarry Smith for (i = 0; i < m; i++) { 231bfec09a0SHong Zhang idx = a->j + a->i[i]; 232bfec09a0SHong Zhang v = a->a + a->i[i]; 233bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 234bcc973b7SBarry Smith alpha1 = x[2 * i]; 235bcc973b7SBarry Smith alpha2 = x[2 * i + 1]; 23626fbe8dcSKarl Rupp while (n-- > 0) { 23726fbe8dcSKarl Rupp y[2 * (*idx)] += alpha1 * (*v); 23826fbe8dcSKarl Rupp y[2 * (*idx) + 1] += alpha2 * (*v); 2399371c9d4SSatish Balay idx++; 2409371c9d4SSatish Balay v++; 24126fbe8dcSKarl Rupp } 242bcc973b7SBarry Smith } 2439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 2449566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2459566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 24682b1193eSBarry Smith PetscFunctionReturn(0); 24782b1193eSBarry Smith } 248bcc973b7SBarry Smith 249*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 250*d71ae5a4SJacob Faibussowitsch { 2514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 252bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 253d2840e09SBarry Smith const PetscScalar *x, *v; 254d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 255b24ad042SBarry Smith PetscInt n, i, jrow, j; 256d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 25782b1193eSBarry Smith 258bcc973b7SBarry Smith PetscFunctionBegin; 2599566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 2609566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2619566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 262bcc973b7SBarry Smith idx = a->j; 263bcc973b7SBarry Smith v = a->a; 264bcc973b7SBarry Smith ii = a->i; 265bcc973b7SBarry Smith 266bcc973b7SBarry Smith for (i = 0; i < m; i++) { 267bcc973b7SBarry Smith jrow = ii[i]; 268bcc973b7SBarry Smith n = ii[i + 1] - jrow; 269bcc973b7SBarry Smith sum1 = 0.0; 270bcc973b7SBarry Smith sum2 = 0.0; 271bcc973b7SBarry Smith for (j = 0; j < n; j++) { 272bcc973b7SBarry Smith sum1 += v[jrow] * x[2 * idx[jrow]]; 273bcc973b7SBarry Smith sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 274bcc973b7SBarry Smith jrow++; 275bcc973b7SBarry Smith } 276bcc973b7SBarry Smith y[2 * i] += sum1; 277bcc973b7SBarry Smith y[2 * i + 1] += sum2; 278bcc973b7SBarry Smith } 279bcc973b7SBarry Smith 2809566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 2819566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2829566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 283bcc973b7SBarry Smith PetscFunctionReturn(0); 28482b1193eSBarry Smith } 285*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 286*d71ae5a4SJacob Faibussowitsch { 2874cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 288bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 289d2840e09SBarry Smith const PetscScalar *x, *v; 290d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2; 291d2840e09SBarry Smith PetscInt n, i; 292d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 29382b1193eSBarry Smith 294bcc973b7SBarry Smith PetscFunctionBegin; 2959566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 2969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2979566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 298bfec09a0SHong Zhang 299bcc973b7SBarry Smith for (i = 0; i < m; i++) { 300bfec09a0SHong Zhang idx = a->j + a->i[i]; 301bfec09a0SHong Zhang v = a->a + a->i[i]; 302bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 303bcc973b7SBarry Smith alpha1 = x[2 * i]; 304bcc973b7SBarry Smith alpha2 = x[2 * i + 1]; 30526fbe8dcSKarl Rupp while (n-- > 0) { 30626fbe8dcSKarl Rupp y[2 * (*idx)] += alpha1 * (*v); 30726fbe8dcSKarl Rupp y[2 * (*idx) + 1] += alpha2 * (*v); 3089371c9d4SSatish Balay idx++; 3099371c9d4SSatish Balay v++; 31026fbe8dcSKarl Rupp } 311bcc973b7SBarry Smith } 3129566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 3139566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3149566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 315bcc973b7SBarry Smith PetscFunctionReturn(0); 31682b1193eSBarry Smith } 317cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 318*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 319*d71ae5a4SJacob Faibussowitsch { 3204cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 321bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 322d2840e09SBarry Smith const PetscScalar *x, *v; 323d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3; 324d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 325d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 32682b1193eSBarry Smith 327bcc973b7SBarry Smith PetscFunctionBegin; 3289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3299566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 330bcc973b7SBarry Smith idx = a->j; 331bcc973b7SBarry Smith v = a->a; 332bcc973b7SBarry Smith ii = a->i; 333bcc973b7SBarry Smith 334bcc973b7SBarry Smith for (i = 0; i < m; i++) { 335bcc973b7SBarry Smith jrow = ii[i]; 336bcc973b7SBarry Smith n = ii[i + 1] - jrow; 337bcc973b7SBarry Smith sum1 = 0.0; 338bcc973b7SBarry Smith sum2 = 0.0; 339bcc973b7SBarry Smith sum3 = 0.0; 34026fbe8dcSKarl Rupp 341b3c51c66SMatthew Knepley nonzerorow += (n > 0); 342bcc973b7SBarry Smith for (j = 0; j < n; j++) { 343bcc973b7SBarry Smith sum1 += v[jrow] * x[3 * idx[jrow]]; 344bcc973b7SBarry Smith sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 345bcc973b7SBarry Smith sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 346bcc973b7SBarry Smith jrow++; 347bcc973b7SBarry Smith } 348bcc973b7SBarry Smith y[3 * i] = sum1; 349bcc973b7SBarry Smith y[3 * i + 1] = sum2; 350bcc973b7SBarry Smith y[3 * i + 2] = sum3; 351bcc973b7SBarry Smith } 352bcc973b7SBarry Smith 3539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow)); 3549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 356bcc973b7SBarry Smith PetscFunctionReturn(0); 357bcc973b7SBarry Smith } 358bcc973b7SBarry Smith 359*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 360*d71ae5a4SJacob Faibussowitsch { 3614cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 362bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 363d2840e09SBarry Smith const PetscScalar *x, *v; 364d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3; 365d2840e09SBarry Smith PetscInt n, i; 366d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 367bcc973b7SBarry Smith 368bcc973b7SBarry Smith PetscFunctionBegin; 3699566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 3709566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3719566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 372bfec09a0SHong Zhang 373bcc973b7SBarry Smith for (i = 0; i < m; i++) { 374bfec09a0SHong Zhang idx = a->j + a->i[i]; 375bfec09a0SHong Zhang v = a->a + a->i[i]; 376bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 377bcc973b7SBarry Smith alpha1 = x[3 * i]; 378bcc973b7SBarry Smith alpha2 = x[3 * i + 1]; 379bcc973b7SBarry Smith alpha3 = x[3 * i + 2]; 380bcc973b7SBarry Smith while (n-- > 0) { 381bcc973b7SBarry Smith y[3 * (*idx)] += alpha1 * (*v); 382bcc973b7SBarry Smith y[3 * (*idx) + 1] += alpha2 * (*v); 383bcc973b7SBarry Smith y[3 * (*idx) + 2] += alpha3 * (*v); 3849371c9d4SSatish Balay idx++; 3859371c9d4SSatish Balay v++; 386bcc973b7SBarry Smith } 387bcc973b7SBarry Smith } 3889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 3899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 391bcc973b7SBarry Smith PetscFunctionReturn(0); 392bcc973b7SBarry Smith } 393bcc973b7SBarry Smith 394*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 395*d71ae5a4SJacob Faibussowitsch { 3964cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 397bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 398d2840e09SBarry Smith const PetscScalar *x, *v; 399d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3; 400b24ad042SBarry Smith PetscInt n, i, jrow, j; 401d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 402bcc973b7SBarry Smith 403bcc973b7SBarry Smith PetscFunctionBegin; 4049566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4059566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4069566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 407bcc973b7SBarry Smith idx = a->j; 408bcc973b7SBarry Smith v = a->a; 409bcc973b7SBarry Smith ii = a->i; 410bcc973b7SBarry Smith 411bcc973b7SBarry Smith for (i = 0; i < m; i++) { 412bcc973b7SBarry Smith jrow = ii[i]; 413bcc973b7SBarry Smith n = ii[i + 1] - jrow; 414bcc973b7SBarry Smith sum1 = 0.0; 415bcc973b7SBarry Smith sum2 = 0.0; 416bcc973b7SBarry Smith sum3 = 0.0; 417bcc973b7SBarry Smith for (j = 0; j < n; j++) { 418bcc973b7SBarry Smith sum1 += v[jrow] * x[3 * idx[jrow]]; 419bcc973b7SBarry Smith sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 420bcc973b7SBarry Smith sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 421bcc973b7SBarry Smith jrow++; 422bcc973b7SBarry Smith } 423bcc973b7SBarry Smith y[3 * i] += sum1; 424bcc973b7SBarry Smith y[3 * i + 1] += sum2; 425bcc973b7SBarry Smith y[3 * i + 2] += sum3; 426bcc973b7SBarry Smith } 427bcc973b7SBarry Smith 4289566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 4299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 431bcc973b7SBarry Smith PetscFunctionReturn(0); 432bcc973b7SBarry Smith } 433*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 434*d71ae5a4SJacob Faibussowitsch { 4354cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 436bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 437d2840e09SBarry Smith const PetscScalar *x, *v; 438d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3; 439d2840e09SBarry Smith PetscInt n, i; 440d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 441bcc973b7SBarry Smith 442bcc973b7SBarry Smith PetscFunctionBegin; 4439566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4459566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 446bcc973b7SBarry Smith for (i = 0; i < m; i++) { 447bfec09a0SHong Zhang idx = a->j + a->i[i]; 448bfec09a0SHong Zhang v = a->a + a->i[i]; 449bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 450bcc973b7SBarry Smith alpha1 = x[3 * i]; 451bcc973b7SBarry Smith alpha2 = x[3 * i + 1]; 452bcc973b7SBarry Smith alpha3 = x[3 * i + 2]; 453bcc973b7SBarry Smith while (n-- > 0) { 454bcc973b7SBarry Smith y[3 * (*idx)] += alpha1 * (*v); 455bcc973b7SBarry Smith y[3 * (*idx) + 1] += alpha2 * (*v); 456bcc973b7SBarry Smith y[3 * (*idx) + 2] += alpha3 * (*v); 4579371c9d4SSatish Balay idx++; 4589371c9d4SSatish Balay v++; 459bcc973b7SBarry Smith } 460bcc973b7SBarry Smith } 4619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 4629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 464bcc973b7SBarry Smith PetscFunctionReturn(0); 465bcc973b7SBarry Smith } 466bcc973b7SBarry Smith 467bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 468*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 469*d71ae5a4SJacob Faibussowitsch { 4704cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 471bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 472d2840e09SBarry Smith const PetscScalar *x, *v; 473d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4; 474d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 475d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 476bcc973b7SBarry Smith 477bcc973b7SBarry Smith PetscFunctionBegin; 4789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4799566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 480bcc973b7SBarry Smith idx = a->j; 481bcc973b7SBarry Smith v = a->a; 482bcc973b7SBarry Smith ii = a->i; 483bcc973b7SBarry Smith 484bcc973b7SBarry Smith for (i = 0; i < m; i++) { 485bcc973b7SBarry Smith jrow = ii[i]; 486bcc973b7SBarry Smith n = ii[i + 1] - jrow; 487bcc973b7SBarry Smith sum1 = 0.0; 488bcc973b7SBarry Smith sum2 = 0.0; 489bcc973b7SBarry Smith sum3 = 0.0; 490bcc973b7SBarry Smith sum4 = 0.0; 491b3c51c66SMatthew Knepley nonzerorow += (n > 0); 492bcc973b7SBarry Smith for (j = 0; j < n; j++) { 493bcc973b7SBarry Smith sum1 += v[jrow] * x[4 * idx[jrow]]; 494bcc973b7SBarry Smith sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 495bcc973b7SBarry Smith sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 496bcc973b7SBarry Smith sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 497bcc973b7SBarry Smith jrow++; 498bcc973b7SBarry Smith } 499bcc973b7SBarry Smith y[4 * i] = sum1; 500bcc973b7SBarry Smith y[4 * i + 1] = sum2; 501bcc973b7SBarry Smith y[4 * i + 2] = sum3; 502bcc973b7SBarry Smith y[4 * i + 3] = sum4; 503bcc973b7SBarry Smith } 504bcc973b7SBarry Smith 5059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow)); 5069566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5079566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 508bcc973b7SBarry Smith PetscFunctionReturn(0); 509bcc973b7SBarry Smith } 510bcc973b7SBarry Smith 511*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 512*d71ae5a4SJacob Faibussowitsch { 5134cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 514bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 515d2840e09SBarry Smith const PetscScalar *x, *v; 516d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 517d2840e09SBarry Smith PetscInt n, i; 518d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 519bcc973b7SBarry Smith 520bcc973b7SBarry Smith PetscFunctionBegin; 5219566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 5229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5239566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 524bcc973b7SBarry Smith for (i = 0; i < m; i++) { 525bfec09a0SHong Zhang idx = a->j + a->i[i]; 526bfec09a0SHong Zhang v = a->a + a->i[i]; 527bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 528bcc973b7SBarry Smith alpha1 = x[4 * i]; 529bcc973b7SBarry Smith alpha2 = x[4 * i + 1]; 530bcc973b7SBarry Smith alpha3 = x[4 * i + 2]; 531bcc973b7SBarry Smith alpha4 = x[4 * i + 3]; 532bcc973b7SBarry Smith while (n-- > 0) { 533bcc973b7SBarry Smith y[4 * (*idx)] += alpha1 * (*v); 534bcc973b7SBarry Smith y[4 * (*idx) + 1] += alpha2 * (*v); 535bcc973b7SBarry Smith y[4 * (*idx) + 2] += alpha3 * (*v); 536bcc973b7SBarry Smith y[4 * (*idx) + 3] += alpha4 * (*v); 5379371c9d4SSatish Balay idx++; 5389371c9d4SSatish Balay v++; 539bcc973b7SBarry Smith } 540bcc973b7SBarry Smith } 5419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 5429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 544bcc973b7SBarry Smith PetscFunctionReturn(0); 545bcc973b7SBarry Smith } 546bcc973b7SBarry Smith 547*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 548*d71ae5a4SJacob Faibussowitsch { 5494cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 550f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 551d2840e09SBarry Smith const PetscScalar *x, *v; 552d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4; 553b24ad042SBarry Smith PetscInt n, i, jrow, j; 554d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 555f9fae5adSBarry Smith 556f9fae5adSBarry Smith PetscFunctionBegin; 5579566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 5589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5599566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 560f9fae5adSBarry Smith idx = a->j; 561f9fae5adSBarry Smith v = a->a; 562f9fae5adSBarry Smith ii = a->i; 563f9fae5adSBarry Smith 564f9fae5adSBarry Smith for (i = 0; i < m; i++) { 565f9fae5adSBarry Smith jrow = ii[i]; 566f9fae5adSBarry Smith n = ii[i + 1] - jrow; 567f9fae5adSBarry Smith sum1 = 0.0; 568f9fae5adSBarry Smith sum2 = 0.0; 569f9fae5adSBarry Smith sum3 = 0.0; 570f9fae5adSBarry Smith sum4 = 0.0; 571f9fae5adSBarry Smith for (j = 0; j < n; j++) { 572f9fae5adSBarry Smith sum1 += v[jrow] * x[4 * idx[jrow]]; 573f9fae5adSBarry Smith sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 574f9fae5adSBarry Smith sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 575f9fae5adSBarry Smith sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 576f9fae5adSBarry Smith jrow++; 577f9fae5adSBarry Smith } 578f9fae5adSBarry Smith y[4 * i] += sum1; 579f9fae5adSBarry Smith y[4 * i + 1] += sum2; 580f9fae5adSBarry Smith y[4 * i + 2] += sum3; 581f9fae5adSBarry Smith y[4 * i + 3] += sum4; 582f9fae5adSBarry Smith } 583f9fae5adSBarry Smith 5849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 5859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 587f9fae5adSBarry Smith PetscFunctionReturn(0); 588bcc973b7SBarry Smith } 589*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 590*d71ae5a4SJacob Faibussowitsch { 5914cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 592f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 593d2840e09SBarry Smith const PetscScalar *x, *v; 594d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 595d2840e09SBarry Smith PetscInt n, i; 596d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 597f9fae5adSBarry Smith 598f9fae5adSBarry Smith PetscFunctionBegin; 5999566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 6009566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6019566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 602bfec09a0SHong Zhang 603f9fae5adSBarry Smith for (i = 0; i < m; i++) { 604bfec09a0SHong Zhang idx = a->j + a->i[i]; 605bfec09a0SHong Zhang v = a->a + a->i[i]; 606f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 607f9fae5adSBarry Smith alpha1 = x[4 * i]; 608f9fae5adSBarry Smith alpha2 = x[4 * i + 1]; 609f9fae5adSBarry Smith alpha3 = x[4 * i + 2]; 610f9fae5adSBarry Smith alpha4 = x[4 * i + 3]; 611f9fae5adSBarry Smith while (n-- > 0) { 612f9fae5adSBarry Smith y[4 * (*idx)] += alpha1 * (*v); 613f9fae5adSBarry Smith y[4 * (*idx) + 1] += alpha2 * (*v); 614f9fae5adSBarry Smith y[4 * (*idx) + 2] += alpha3 * (*v); 615f9fae5adSBarry Smith y[4 * (*idx) + 3] += alpha4 * (*v); 6169371c9d4SSatish Balay idx++; 6179371c9d4SSatish Balay v++; 618f9fae5adSBarry Smith } 619f9fae5adSBarry Smith } 6209566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 6219566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6229566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 623f9fae5adSBarry Smith PetscFunctionReturn(0); 624f9fae5adSBarry Smith } 625f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6266cd98798SBarry Smith 627*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 628*d71ae5a4SJacob Faibussowitsch { 6294cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 630f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 631d2840e09SBarry Smith const PetscScalar *x, *v; 632d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 633d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 634d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 635f9fae5adSBarry Smith 636f9fae5adSBarry Smith PetscFunctionBegin; 6379566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6389566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 639f9fae5adSBarry Smith idx = a->j; 640f9fae5adSBarry Smith v = a->a; 641f9fae5adSBarry Smith ii = a->i; 642f9fae5adSBarry Smith 643f9fae5adSBarry Smith for (i = 0; i < m; i++) { 644f9fae5adSBarry Smith jrow = ii[i]; 645f9fae5adSBarry Smith n = ii[i + 1] - jrow; 646f9fae5adSBarry Smith sum1 = 0.0; 647f9fae5adSBarry Smith sum2 = 0.0; 648f9fae5adSBarry Smith sum3 = 0.0; 649f9fae5adSBarry Smith sum4 = 0.0; 650f9fae5adSBarry Smith sum5 = 0.0; 65126fbe8dcSKarl Rupp 652b3c51c66SMatthew Knepley nonzerorow += (n > 0); 653f9fae5adSBarry Smith for (j = 0; j < n; j++) { 654f9fae5adSBarry Smith sum1 += v[jrow] * x[5 * idx[jrow]]; 655f9fae5adSBarry Smith sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 656f9fae5adSBarry Smith sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 657f9fae5adSBarry Smith sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 658f9fae5adSBarry Smith sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 659f9fae5adSBarry Smith jrow++; 660f9fae5adSBarry Smith } 661f9fae5adSBarry Smith y[5 * i] = sum1; 662f9fae5adSBarry Smith y[5 * i + 1] = sum2; 663f9fae5adSBarry Smith y[5 * i + 2] = sum3; 664f9fae5adSBarry Smith y[5 * i + 3] = sum4; 665f9fae5adSBarry Smith y[5 * i + 4] = sum5; 666f9fae5adSBarry Smith } 667f9fae5adSBarry Smith 6689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow)); 6699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 671f9fae5adSBarry Smith PetscFunctionReturn(0); 672f9fae5adSBarry Smith } 673f9fae5adSBarry Smith 674*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 675*d71ae5a4SJacob Faibussowitsch { 6764cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 677f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 678d2840e09SBarry Smith const PetscScalar *x, *v; 679d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 680d2840e09SBarry Smith PetscInt n, i; 681d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 682f9fae5adSBarry Smith 683f9fae5adSBarry Smith PetscFunctionBegin; 6849566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 6859566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6869566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 687bfec09a0SHong Zhang 688f9fae5adSBarry Smith for (i = 0; i < m; i++) { 689bfec09a0SHong Zhang idx = a->j + a->i[i]; 690bfec09a0SHong Zhang v = a->a + a->i[i]; 691f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 692f9fae5adSBarry Smith alpha1 = x[5 * i]; 693f9fae5adSBarry Smith alpha2 = x[5 * i + 1]; 694f9fae5adSBarry Smith alpha3 = x[5 * i + 2]; 695f9fae5adSBarry Smith alpha4 = x[5 * i + 3]; 696f9fae5adSBarry Smith alpha5 = x[5 * i + 4]; 697f9fae5adSBarry Smith while (n-- > 0) { 698f9fae5adSBarry Smith y[5 * (*idx)] += alpha1 * (*v); 699f9fae5adSBarry Smith y[5 * (*idx) + 1] += alpha2 * (*v); 700f9fae5adSBarry Smith y[5 * (*idx) + 2] += alpha3 * (*v); 701f9fae5adSBarry Smith y[5 * (*idx) + 3] += alpha4 * (*v); 702f9fae5adSBarry Smith y[5 * (*idx) + 4] += alpha5 * (*v); 7039371c9d4SSatish Balay idx++; 7049371c9d4SSatish Balay v++; 705f9fae5adSBarry Smith } 706f9fae5adSBarry Smith } 7079566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7099566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 710f9fae5adSBarry Smith PetscFunctionReturn(0); 711f9fae5adSBarry Smith } 712f9fae5adSBarry Smith 713*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 714*d71ae5a4SJacob Faibussowitsch { 7154cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 716f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 717d2840e09SBarry Smith const PetscScalar *x, *v; 718d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 719b24ad042SBarry Smith PetscInt n, i, jrow, j; 720d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 721f9fae5adSBarry Smith 722f9fae5adSBarry Smith PetscFunctionBegin; 7239566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7249566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7259566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 726f9fae5adSBarry Smith idx = a->j; 727f9fae5adSBarry Smith v = a->a; 728f9fae5adSBarry Smith ii = a->i; 729f9fae5adSBarry Smith 730f9fae5adSBarry Smith for (i = 0; i < m; i++) { 731f9fae5adSBarry Smith jrow = ii[i]; 732f9fae5adSBarry Smith n = ii[i + 1] - jrow; 733f9fae5adSBarry Smith sum1 = 0.0; 734f9fae5adSBarry Smith sum2 = 0.0; 735f9fae5adSBarry Smith sum3 = 0.0; 736f9fae5adSBarry Smith sum4 = 0.0; 737f9fae5adSBarry Smith sum5 = 0.0; 738f9fae5adSBarry Smith for (j = 0; j < n; j++) { 739f9fae5adSBarry Smith sum1 += v[jrow] * x[5 * idx[jrow]]; 740f9fae5adSBarry Smith sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 741f9fae5adSBarry Smith sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 742f9fae5adSBarry Smith sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 743f9fae5adSBarry Smith sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 744f9fae5adSBarry Smith jrow++; 745f9fae5adSBarry Smith } 746f9fae5adSBarry Smith y[5 * i] += sum1; 747f9fae5adSBarry Smith y[5 * i + 1] += sum2; 748f9fae5adSBarry Smith y[5 * i + 2] += sum3; 749f9fae5adSBarry Smith y[5 * i + 3] += sum4; 750f9fae5adSBarry Smith y[5 * i + 4] += sum5; 751f9fae5adSBarry Smith } 752f9fae5adSBarry Smith 7539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 756f9fae5adSBarry Smith PetscFunctionReturn(0); 757f9fae5adSBarry Smith } 758f9fae5adSBarry Smith 759*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 760*d71ae5a4SJacob Faibussowitsch { 7614cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 762f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 763d2840e09SBarry Smith const PetscScalar *x, *v; 764d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 765d2840e09SBarry Smith PetscInt n, i; 766d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 767f9fae5adSBarry Smith 768f9fae5adSBarry Smith PetscFunctionBegin; 7699566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7709566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7719566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 772bfec09a0SHong Zhang 773f9fae5adSBarry Smith for (i = 0; i < m; i++) { 774bfec09a0SHong Zhang idx = a->j + a->i[i]; 775bfec09a0SHong Zhang v = a->a + a->i[i]; 776f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 777f9fae5adSBarry Smith alpha1 = x[5 * i]; 778f9fae5adSBarry Smith alpha2 = x[5 * i + 1]; 779f9fae5adSBarry Smith alpha3 = x[5 * i + 2]; 780f9fae5adSBarry Smith alpha4 = x[5 * i + 3]; 781f9fae5adSBarry Smith alpha5 = x[5 * i + 4]; 782f9fae5adSBarry Smith while (n-- > 0) { 783f9fae5adSBarry Smith y[5 * (*idx)] += alpha1 * (*v); 784f9fae5adSBarry Smith y[5 * (*idx) + 1] += alpha2 * (*v); 785f9fae5adSBarry Smith y[5 * (*idx) + 2] += alpha3 * (*v); 786f9fae5adSBarry Smith y[5 * (*idx) + 3] += alpha4 * (*v); 787f9fae5adSBarry Smith y[5 * (*idx) + 4] += alpha5 * (*v); 7889371c9d4SSatish Balay idx++; 7899371c9d4SSatish Balay v++; 790f9fae5adSBarry Smith } 791f9fae5adSBarry Smith } 7929566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7939566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7949566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 795f9fae5adSBarry Smith PetscFunctionReturn(0); 796bcc973b7SBarry Smith } 797bcc973b7SBarry Smith 7986cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 799*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 800*d71ae5a4SJacob Faibussowitsch { 8016cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8026cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 803d2840e09SBarry Smith const PetscScalar *x, *v; 804d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 805d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 806d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 8076cd98798SBarry Smith 8086cd98798SBarry Smith PetscFunctionBegin; 8099566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8109566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 8116cd98798SBarry Smith idx = a->j; 8126cd98798SBarry Smith v = a->a; 8136cd98798SBarry Smith ii = a->i; 8146cd98798SBarry Smith 8156cd98798SBarry Smith for (i = 0; i < m; i++) { 8166cd98798SBarry Smith jrow = ii[i]; 8176cd98798SBarry Smith n = ii[i + 1] - jrow; 8186cd98798SBarry Smith sum1 = 0.0; 8196cd98798SBarry Smith sum2 = 0.0; 8206cd98798SBarry Smith sum3 = 0.0; 8216cd98798SBarry Smith sum4 = 0.0; 8226cd98798SBarry Smith sum5 = 0.0; 8236cd98798SBarry Smith sum6 = 0.0; 82426fbe8dcSKarl Rupp 825b3c51c66SMatthew Knepley nonzerorow += (n > 0); 8266cd98798SBarry Smith for (j = 0; j < n; j++) { 8276cd98798SBarry Smith sum1 += v[jrow] * x[6 * idx[jrow]]; 8286cd98798SBarry Smith sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 8296cd98798SBarry Smith sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 8306cd98798SBarry Smith sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 8316cd98798SBarry Smith sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 8326cd98798SBarry Smith sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 8336cd98798SBarry Smith jrow++; 8346cd98798SBarry Smith } 8356cd98798SBarry Smith y[6 * i] = sum1; 8366cd98798SBarry Smith y[6 * i + 1] = sum2; 8376cd98798SBarry Smith y[6 * i + 2] = sum3; 8386cd98798SBarry Smith y[6 * i + 3] = sum4; 8396cd98798SBarry Smith y[6 * i + 4] = sum5; 8406cd98798SBarry Smith y[6 * i + 5] = sum6; 8416cd98798SBarry Smith } 8426cd98798SBarry Smith 8439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow)); 8449566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 8459566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 8466cd98798SBarry Smith PetscFunctionReturn(0); 8476cd98798SBarry Smith } 8486cd98798SBarry Smith 849*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 850*d71ae5a4SJacob Faibussowitsch { 8516cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8526cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 853d2840e09SBarry Smith const PetscScalar *x, *v; 854d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 855d2840e09SBarry Smith PetscInt n, i; 856d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 8576cd98798SBarry Smith 8586cd98798SBarry Smith PetscFunctionBegin; 8599566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 8609566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8619566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 862bfec09a0SHong Zhang 8636cd98798SBarry Smith for (i = 0; i < m; i++) { 864bfec09a0SHong Zhang idx = a->j + a->i[i]; 865bfec09a0SHong Zhang v = a->a + a->i[i]; 8666cd98798SBarry Smith n = a->i[i + 1] - a->i[i]; 8676cd98798SBarry Smith alpha1 = x[6 * i]; 8686cd98798SBarry Smith alpha2 = x[6 * i + 1]; 8696cd98798SBarry Smith alpha3 = x[6 * i + 2]; 8706cd98798SBarry Smith alpha4 = x[6 * i + 3]; 8716cd98798SBarry Smith alpha5 = x[6 * i + 4]; 8726cd98798SBarry Smith alpha6 = x[6 * i + 5]; 8736cd98798SBarry Smith while (n-- > 0) { 8746cd98798SBarry Smith y[6 * (*idx)] += alpha1 * (*v); 8756cd98798SBarry Smith y[6 * (*idx) + 1] += alpha2 * (*v); 8766cd98798SBarry Smith y[6 * (*idx) + 2] += alpha3 * (*v); 8776cd98798SBarry Smith y[6 * (*idx) + 3] += alpha4 * (*v); 8786cd98798SBarry Smith y[6 * (*idx) + 4] += alpha5 * (*v); 8796cd98798SBarry Smith y[6 * (*idx) + 5] += alpha6 * (*v); 8809371c9d4SSatish Balay idx++; 8819371c9d4SSatish Balay v++; 8826cd98798SBarry Smith } 8836cd98798SBarry Smith } 8849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 8859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 8869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 8876cd98798SBarry Smith PetscFunctionReturn(0); 8886cd98798SBarry Smith } 8896cd98798SBarry Smith 890*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 891*d71ae5a4SJacob Faibussowitsch { 8926cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8936cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 894d2840e09SBarry Smith const PetscScalar *x, *v; 895d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 896b24ad042SBarry Smith PetscInt n, i, jrow, j; 897d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 8986cd98798SBarry Smith 8996cd98798SBarry Smith PetscFunctionBegin; 9009566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 9019566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9029566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 9036cd98798SBarry Smith idx = a->j; 9046cd98798SBarry Smith v = a->a; 9056cd98798SBarry Smith ii = a->i; 9066cd98798SBarry Smith 9076cd98798SBarry Smith for (i = 0; i < m; i++) { 9086cd98798SBarry Smith jrow = ii[i]; 9096cd98798SBarry Smith n = ii[i + 1] - jrow; 9106cd98798SBarry Smith sum1 = 0.0; 9116cd98798SBarry Smith sum2 = 0.0; 9126cd98798SBarry Smith sum3 = 0.0; 9136cd98798SBarry Smith sum4 = 0.0; 9146cd98798SBarry Smith sum5 = 0.0; 9156cd98798SBarry Smith sum6 = 0.0; 9166cd98798SBarry Smith for (j = 0; j < n; j++) { 9176cd98798SBarry Smith sum1 += v[jrow] * x[6 * idx[jrow]]; 9186cd98798SBarry Smith sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 9196cd98798SBarry Smith sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 9206cd98798SBarry Smith sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 9216cd98798SBarry Smith sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 9226cd98798SBarry Smith sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 9236cd98798SBarry Smith jrow++; 9246cd98798SBarry Smith } 9256cd98798SBarry Smith y[6 * i] += sum1; 9266cd98798SBarry Smith y[6 * i + 1] += sum2; 9276cd98798SBarry Smith y[6 * i + 2] += sum3; 9286cd98798SBarry Smith y[6 * i + 3] += sum4; 9296cd98798SBarry Smith y[6 * i + 4] += sum5; 9306cd98798SBarry Smith y[6 * i + 5] += sum6; 9316cd98798SBarry Smith } 9326cd98798SBarry Smith 9339566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 9349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 9359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 9366cd98798SBarry Smith PetscFunctionReturn(0); 9376cd98798SBarry Smith } 9386cd98798SBarry Smith 939*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 940*d71ae5a4SJacob Faibussowitsch { 9416cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 9426cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 943d2840e09SBarry Smith const PetscScalar *x, *v; 944d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 945d2840e09SBarry Smith PetscInt n, i; 946d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 9476cd98798SBarry Smith 9486cd98798SBarry Smith PetscFunctionBegin; 9499566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 9509566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9519566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 952bfec09a0SHong Zhang 9536cd98798SBarry Smith for (i = 0; i < m; i++) { 954bfec09a0SHong Zhang idx = a->j + a->i[i]; 955bfec09a0SHong Zhang v = a->a + a->i[i]; 9566cd98798SBarry Smith n = a->i[i + 1] - a->i[i]; 9576cd98798SBarry Smith alpha1 = x[6 * i]; 9586cd98798SBarry Smith alpha2 = x[6 * i + 1]; 9596cd98798SBarry Smith alpha3 = x[6 * i + 2]; 9606cd98798SBarry Smith alpha4 = x[6 * i + 3]; 9616cd98798SBarry Smith alpha5 = x[6 * i + 4]; 9626cd98798SBarry Smith alpha6 = x[6 * i + 5]; 9636cd98798SBarry Smith while (n-- > 0) { 9646cd98798SBarry Smith y[6 * (*idx)] += alpha1 * (*v); 9656cd98798SBarry Smith y[6 * (*idx) + 1] += alpha2 * (*v); 9666cd98798SBarry Smith y[6 * (*idx) + 2] += alpha3 * (*v); 9676cd98798SBarry Smith y[6 * (*idx) + 3] += alpha4 * (*v); 9686cd98798SBarry Smith y[6 * (*idx) + 4] += alpha5 * (*v); 9696cd98798SBarry Smith y[6 * (*idx) + 5] += alpha6 * (*v); 9709371c9d4SSatish Balay idx++; 9719371c9d4SSatish Balay v++; 9726cd98798SBarry Smith } 9736cd98798SBarry Smith } 9749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 9759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 9769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 9776cd98798SBarry Smith PetscFunctionReturn(0); 9786cd98798SBarry Smith } 9796cd98798SBarry Smith 98066ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 981*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 982*d71ae5a4SJacob Faibussowitsch { 983ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 984ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 985d2840e09SBarry Smith const PetscScalar *x, *v; 986d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 987d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 988d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 989ed8eea03SBarry Smith 990ed8eea03SBarry Smith PetscFunctionBegin; 9919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9929566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 993ed8eea03SBarry Smith idx = a->j; 994ed8eea03SBarry Smith v = a->a; 995ed8eea03SBarry Smith ii = a->i; 996ed8eea03SBarry Smith 997ed8eea03SBarry Smith for (i = 0; i < m; i++) { 998ed8eea03SBarry Smith jrow = ii[i]; 999ed8eea03SBarry Smith n = ii[i + 1] - jrow; 1000ed8eea03SBarry Smith sum1 = 0.0; 1001ed8eea03SBarry Smith sum2 = 0.0; 1002ed8eea03SBarry Smith sum3 = 0.0; 1003ed8eea03SBarry Smith sum4 = 0.0; 1004ed8eea03SBarry Smith sum5 = 0.0; 1005ed8eea03SBarry Smith sum6 = 0.0; 1006ed8eea03SBarry Smith sum7 = 0.0; 100726fbe8dcSKarl Rupp 1008b3c51c66SMatthew Knepley nonzerorow += (n > 0); 1009ed8eea03SBarry Smith for (j = 0; j < n; j++) { 1010ed8eea03SBarry Smith sum1 += v[jrow] * x[7 * idx[jrow]]; 1011ed8eea03SBarry Smith sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1012ed8eea03SBarry Smith sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1013ed8eea03SBarry Smith sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1014ed8eea03SBarry Smith sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1015ed8eea03SBarry Smith sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1016ed8eea03SBarry Smith sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1017ed8eea03SBarry Smith jrow++; 1018ed8eea03SBarry Smith } 1019ed8eea03SBarry Smith y[7 * i] = sum1; 1020ed8eea03SBarry Smith y[7 * i + 1] = sum2; 1021ed8eea03SBarry Smith y[7 * i + 2] = sum3; 1022ed8eea03SBarry Smith y[7 * i + 3] = sum4; 1023ed8eea03SBarry Smith y[7 * i + 4] = sum5; 1024ed8eea03SBarry Smith y[7 * i + 5] = sum6; 1025ed8eea03SBarry Smith y[7 * i + 6] = sum7; 1026ed8eea03SBarry Smith } 1027ed8eea03SBarry Smith 10289566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow)); 10299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1031ed8eea03SBarry Smith PetscFunctionReturn(0); 1032ed8eea03SBarry Smith } 1033ed8eea03SBarry Smith 1034*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 1035*d71ae5a4SJacob Faibussowitsch { 1036ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1037ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1038d2840e09SBarry Smith const PetscScalar *x, *v; 1039d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1040d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1041d2840e09SBarry Smith PetscInt n, i; 1042ed8eea03SBarry Smith 1043ed8eea03SBarry Smith PetscFunctionBegin; 10449566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 10459566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 10469566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1047ed8eea03SBarry Smith 1048ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1049ed8eea03SBarry Smith idx = a->j + a->i[i]; 1050ed8eea03SBarry Smith v = a->a + a->i[i]; 1051ed8eea03SBarry Smith n = a->i[i + 1] - a->i[i]; 1052ed8eea03SBarry Smith alpha1 = x[7 * i]; 1053ed8eea03SBarry Smith alpha2 = x[7 * i + 1]; 1054ed8eea03SBarry Smith alpha3 = x[7 * i + 2]; 1055ed8eea03SBarry Smith alpha4 = x[7 * i + 3]; 1056ed8eea03SBarry Smith alpha5 = x[7 * i + 4]; 1057ed8eea03SBarry Smith alpha6 = x[7 * i + 5]; 1058ed8eea03SBarry Smith alpha7 = x[7 * i + 6]; 1059ed8eea03SBarry Smith while (n-- > 0) { 1060ed8eea03SBarry Smith y[7 * (*idx)] += alpha1 * (*v); 1061ed8eea03SBarry Smith y[7 * (*idx) + 1] += alpha2 * (*v); 1062ed8eea03SBarry Smith y[7 * (*idx) + 2] += alpha3 * (*v); 1063ed8eea03SBarry Smith y[7 * (*idx) + 3] += alpha4 * (*v); 1064ed8eea03SBarry Smith y[7 * (*idx) + 4] += alpha5 * (*v); 1065ed8eea03SBarry Smith y[7 * (*idx) + 5] += alpha6 * (*v); 1066ed8eea03SBarry Smith y[7 * (*idx) + 6] += alpha7 * (*v); 10679371c9d4SSatish Balay idx++; 10689371c9d4SSatish Balay v++; 1069ed8eea03SBarry Smith } 1070ed8eea03SBarry Smith } 10719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 10729566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10739566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1074ed8eea03SBarry Smith PetscFunctionReturn(0); 1075ed8eea03SBarry Smith } 1076ed8eea03SBarry Smith 1077*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 1078*d71ae5a4SJacob Faibussowitsch { 1079ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1080ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1081d2840e09SBarry Smith const PetscScalar *x, *v; 1082d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1083d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1084ed8eea03SBarry Smith PetscInt n, i, jrow, j; 1085ed8eea03SBarry Smith 1086ed8eea03SBarry Smith PetscFunctionBegin; 10879566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 10889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 10899566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1090ed8eea03SBarry Smith idx = a->j; 1091ed8eea03SBarry Smith v = a->a; 1092ed8eea03SBarry Smith ii = a->i; 1093ed8eea03SBarry Smith 1094ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1095ed8eea03SBarry Smith jrow = ii[i]; 1096ed8eea03SBarry Smith n = ii[i + 1] - jrow; 1097ed8eea03SBarry Smith sum1 = 0.0; 1098ed8eea03SBarry Smith sum2 = 0.0; 1099ed8eea03SBarry Smith sum3 = 0.0; 1100ed8eea03SBarry Smith sum4 = 0.0; 1101ed8eea03SBarry Smith sum5 = 0.0; 1102ed8eea03SBarry Smith sum6 = 0.0; 1103ed8eea03SBarry Smith sum7 = 0.0; 1104ed8eea03SBarry Smith for (j = 0; j < n; j++) { 1105ed8eea03SBarry Smith sum1 += v[jrow] * x[7 * idx[jrow]]; 1106ed8eea03SBarry Smith sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1107ed8eea03SBarry Smith sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1108ed8eea03SBarry Smith sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1109ed8eea03SBarry Smith sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1110ed8eea03SBarry Smith sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1111ed8eea03SBarry Smith sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1112ed8eea03SBarry Smith jrow++; 1113ed8eea03SBarry Smith } 1114ed8eea03SBarry Smith y[7 * i] += sum1; 1115ed8eea03SBarry Smith y[7 * i + 1] += sum2; 1116ed8eea03SBarry Smith y[7 * i + 2] += sum3; 1117ed8eea03SBarry Smith y[7 * i + 3] += sum4; 1118ed8eea03SBarry Smith y[7 * i + 4] += sum5; 1119ed8eea03SBarry Smith y[7 * i + 5] += sum6; 1120ed8eea03SBarry Smith y[7 * i + 6] += sum7; 1121ed8eea03SBarry Smith } 1122ed8eea03SBarry Smith 11239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 11249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1126ed8eea03SBarry Smith PetscFunctionReturn(0); 1127ed8eea03SBarry Smith } 1128ed8eea03SBarry Smith 1129*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 1130*d71ae5a4SJacob Faibussowitsch { 1131ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1132ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1133d2840e09SBarry Smith const PetscScalar *x, *v; 1134d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1135d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1136d2840e09SBarry Smith PetscInt n, i; 1137ed8eea03SBarry Smith 1138ed8eea03SBarry Smith PetscFunctionBegin; 11399566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 11409566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11419566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1142ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1143ed8eea03SBarry Smith idx = a->j + a->i[i]; 1144ed8eea03SBarry Smith v = a->a + a->i[i]; 1145ed8eea03SBarry Smith n = a->i[i + 1] - a->i[i]; 1146ed8eea03SBarry Smith alpha1 = x[7 * i]; 1147ed8eea03SBarry Smith alpha2 = x[7 * i + 1]; 1148ed8eea03SBarry Smith alpha3 = x[7 * i + 2]; 1149ed8eea03SBarry Smith alpha4 = x[7 * i + 3]; 1150ed8eea03SBarry Smith alpha5 = x[7 * i + 4]; 1151ed8eea03SBarry Smith alpha6 = x[7 * i + 5]; 1152ed8eea03SBarry Smith alpha7 = x[7 * i + 6]; 1153ed8eea03SBarry Smith while (n-- > 0) { 1154ed8eea03SBarry Smith y[7 * (*idx)] += alpha1 * (*v); 1155ed8eea03SBarry Smith y[7 * (*idx) + 1] += alpha2 * (*v); 1156ed8eea03SBarry Smith y[7 * (*idx) + 2] += alpha3 * (*v); 1157ed8eea03SBarry Smith y[7 * (*idx) + 3] += alpha4 * (*v); 1158ed8eea03SBarry Smith y[7 * (*idx) + 4] += alpha5 * (*v); 1159ed8eea03SBarry Smith y[7 * (*idx) + 5] += alpha6 * (*v); 1160ed8eea03SBarry Smith y[7 * (*idx) + 6] += alpha7 * (*v); 11619371c9d4SSatish Balay idx++; 11629371c9d4SSatish Balay v++; 1163ed8eea03SBarry Smith } 1164ed8eea03SBarry Smith } 11659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 11669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11679566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1168ed8eea03SBarry Smith PetscFunctionReturn(0); 1169ed8eea03SBarry Smith } 1170ed8eea03SBarry Smith 1171*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 1172*d71ae5a4SJacob Faibussowitsch { 117366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 117466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1175d2840e09SBarry Smith const PetscScalar *x, *v; 1176d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1177d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1178d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 117966ed3db0SBarry Smith 118066ed3db0SBarry Smith PetscFunctionBegin; 11819566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11829566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 118366ed3db0SBarry Smith idx = a->j; 118466ed3db0SBarry Smith v = a->a; 118566ed3db0SBarry Smith ii = a->i; 118666ed3db0SBarry Smith 118766ed3db0SBarry Smith for (i = 0; i < m; i++) { 118866ed3db0SBarry Smith jrow = ii[i]; 118966ed3db0SBarry Smith n = ii[i + 1] - jrow; 119066ed3db0SBarry Smith sum1 = 0.0; 119166ed3db0SBarry Smith sum2 = 0.0; 119266ed3db0SBarry Smith sum3 = 0.0; 119366ed3db0SBarry Smith sum4 = 0.0; 119466ed3db0SBarry Smith sum5 = 0.0; 119566ed3db0SBarry Smith sum6 = 0.0; 119666ed3db0SBarry Smith sum7 = 0.0; 119766ed3db0SBarry Smith sum8 = 0.0; 119826fbe8dcSKarl Rupp 1199b3c51c66SMatthew Knepley nonzerorow += (n > 0); 120066ed3db0SBarry Smith for (j = 0; j < n; j++) { 120166ed3db0SBarry Smith sum1 += v[jrow] * x[8 * idx[jrow]]; 120266ed3db0SBarry Smith sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 120366ed3db0SBarry Smith sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 120466ed3db0SBarry Smith sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 120566ed3db0SBarry Smith sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 120666ed3db0SBarry Smith sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 120766ed3db0SBarry Smith sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 120866ed3db0SBarry Smith sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 120966ed3db0SBarry Smith jrow++; 121066ed3db0SBarry Smith } 121166ed3db0SBarry Smith y[8 * i] = sum1; 121266ed3db0SBarry Smith y[8 * i + 1] = sum2; 121366ed3db0SBarry Smith y[8 * i + 2] = sum3; 121466ed3db0SBarry Smith y[8 * i + 3] = sum4; 121566ed3db0SBarry Smith y[8 * i + 4] = sum5; 121666ed3db0SBarry Smith y[8 * i + 5] = sum6; 121766ed3db0SBarry Smith y[8 * i + 6] = sum7; 121866ed3db0SBarry Smith y[8 * i + 7] = sum8; 121966ed3db0SBarry Smith } 122066ed3db0SBarry Smith 12219566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow)); 12229566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 12239566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 122466ed3db0SBarry Smith PetscFunctionReturn(0); 122566ed3db0SBarry Smith } 122666ed3db0SBarry Smith 1227*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 1228*d71ae5a4SJacob Faibussowitsch { 122966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 123066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1231d2840e09SBarry Smith const PetscScalar *x, *v; 1232d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1233d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1234d2840e09SBarry Smith PetscInt n, i; 123566ed3db0SBarry Smith 123666ed3db0SBarry Smith PetscFunctionBegin; 12379566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 12389566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 12399566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1240bfec09a0SHong Zhang 124166ed3db0SBarry Smith for (i = 0; i < m; i++) { 1242bfec09a0SHong Zhang idx = a->j + a->i[i]; 1243bfec09a0SHong Zhang v = a->a + a->i[i]; 124466ed3db0SBarry Smith n = a->i[i + 1] - a->i[i]; 124566ed3db0SBarry Smith alpha1 = x[8 * i]; 124666ed3db0SBarry Smith alpha2 = x[8 * i + 1]; 124766ed3db0SBarry Smith alpha3 = x[8 * i + 2]; 124866ed3db0SBarry Smith alpha4 = x[8 * i + 3]; 124966ed3db0SBarry Smith alpha5 = x[8 * i + 4]; 125066ed3db0SBarry Smith alpha6 = x[8 * i + 5]; 125166ed3db0SBarry Smith alpha7 = x[8 * i + 6]; 125266ed3db0SBarry Smith alpha8 = x[8 * i + 7]; 125366ed3db0SBarry Smith while (n-- > 0) { 125466ed3db0SBarry Smith y[8 * (*idx)] += alpha1 * (*v); 125566ed3db0SBarry Smith y[8 * (*idx) + 1] += alpha2 * (*v); 125666ed3db0SBarry Smith y[8 * (*idx) + 2] += alpha3 * (*v); 125766ed3db0SBarry Smith y[8 * (*idx) + 3] += alpha4 * (*v); 125866ed3db0SBarry Smith y[8 * (*idx) + 4] += alpha5 * (*v); 125966ed3db0SBarry Smith y[8 * (*idx) + 5] += alpha6 * (*v); 126066ed3db0SBarry Smith y[8 * (*idx) + 6] += alpha7 * (*v); 126166ed3db0SBarry Smith y[8 * (*idx) + 7] += alpha8 * (*v); 12629371c9d4SSatish Balay idx++; 12639371c9d4SSatish Balay v++; 126466ed3db0SBarry Smith } 126566ed3db0SBarry Smith } 12669566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 12679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 12689566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 126966ed3db0SBarry Smith PetscFunctionReturn(0); 127066ed3db0SBarry Smith } 127166ed3db0SBarry Smith 1272*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 1273*d71ae5a4SJacob Faibussowitsch { 127466ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 127566ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1276d2840e09SBarry Smith const PetscScalar *x, *v; 1277d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1278d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1279b24ad042SBarry Smith PetscInt n, i, jrow, j; 128066ed3db0SBarry Smith 128166ed3db0SBarry Smith PetscFunctionBegin; 12829566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 12839566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 12849566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 128566ed3db0SBarry Smith idx = a->j; 128666ed3db0SBarry Smith v = a->a; 128766ed3db0SBarry Smith ii = a->i; 128866ed3db0SBarry Smith 128966ed3db0SBarry Smith for (i = 0; i < m; i++) { 129066ed3db0SBarry Smith jrow = ii[i]; 129166ed3db0SBarry Smith n = ii[i + 1] - jrow; 129266ed3db0SBarry Smith sum1 = 0.0; 129366ed3db0SBarry Smith sum2 = 0.0; 129466ed3db0SBarry Smith sum3 = 0.0; 129566ed3db0SBarry Smith sum4 = 0.0; 129666ed3db0SBarry Smith sum5 = 0.0; 129766ed3db0SBarry Smith sum6 = 0.0; 129866ed3db0SBarry Smith sum7 = 0.0; 129966ed3db0SBarry Smith sum8 = 0.0; 130066ed3db0SBarry Smith for (j = 0; j < n; j++) { 130166ed3db0SBarry Smith sum1 += v[jrow] * x[8 * idx[jrow]]; 130266ed3db0SBarry Smith sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 130366ed3db0SBarry Smith sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 130466ed3db0SBarry Smith sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 130566ed3db0SBarry Smith sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 130666ed3db0SBarry Smith sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 130766ed3db0SBarry Smith sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 130866ed3db0SBarry Smith sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 130966ed3db0SBarry Smith jrow++; 131066ed3db0SBarry Smith } 131166ed3db0SBarry Smith y[8 * i] += sum1; 131266ed3db0SBarry Smith y[8 * i + 1] += sum2; 131366ed3db0SBarry Smith y[8 * i + 2] += sum3; 131466ed3db0SBarry Smith y[8 * i + 3] += sum4; 131566ed3db0SBarry Smith y[8 * i + 4] += sum5; 131666ed3db0SBarry Smith y[8 * i + 5] += sum6; 131766ed3db0SBarry Smith y[8 * i + 6] += sum7; 131866ed3db0SBarry Smith y[8 * i + 7] += sum8; 131966ed3db0SBarry Smith } 132066ed3db0SBarry Smith 13219566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 13229566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 13239566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 132466ed3db0SBarry Smith PetscFunctionReturn(0); 132566ed3db0SBarry Smith } 132666ed3db0SBarry Smith 1327*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 1328*d71ae5a4SJacob Faibussowitsch { 132966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 133066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1331d2840e09SBarry Smith const PetscScalar *x, *v; 1332d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1333d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1334d2840e09SBarry Smith PetscInt n, i; 133566ed3db0SBarry Smith 133666ed3db0SBarry Smith PetscFunctionBegin; 13379566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 13389566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 13399566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 134066ed3db0SBarry Smith for (i = 0; i < m; i++) { 1341bfec09a0SHong Zhang idx = a->j + a->i[i]; 1342bfec09a0SHong Zhang v = a->a + a->i[i]; 134366ed3db0SBarry Smith n = a->i[i + 1] - a->i[i]; 134466ed3db0SBarry Smith alpha1 = x[8 * i]; 134566ed3db0SBarry Smith alpha2 = x[8 * i + 1]; 134666ed3db0SBarry Smith alpha3 = x[8 * i + 2]; 134766ed3db0SBarry Smith alpha4 = x[8 * i + 3]; 134866ed3db0SBarry Smith alpha5 = x[8 * i + 4]; 134966ed3db0SBarry Smith alpha6 = x[8 * i + 5]; 135066ed3db0SBarry Smith alpha7 = x[8 * i + 6]; 135166ed3db0SBarry Smith alpha8 = x[8 * i + 7]; 135266ed3db0SBarry Smith while (n-- > 0) { 135366ed3db0SBarry Smith y[8 * (*idx)] += alpha1 * (*v); 135466ed3db0SBarry Smith y[8 * (*idx) + 1] += alpha2 * (*v); 135566ed3db0SBarry Smith y[8 * (*idx) + 2] += alpha3 * (*v); 135666ed3db0SBarry Smith y[8 * (*idx) + 3] += alpha4 * (*v); 135766ed3db0SBarry Smith y[8 * (*idx) + 4] += alpha5 * (*v); 135866ed3db0SBarry Smith y[8 * (*idx) + 5] += alpha6 * (*v); 135966ed3db0SBarry Smith y[8 * (*idx) + 6] += alpha7 * (*v); 136066ed3db0SBarry Smith y[8 * (*idx) + 7] += alpha8 * (*v); 13619371c9d4SSatish Balay idx++; 13629371c9d4SSatish Balay v++; 136366ed3db0SBarry Smith } 136466ed3db0SBarry Smith } 13659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 13669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 13679566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 13682f7816d4SBarry Smith PetscFunctionReturn(0); 13692f7816d4SBarry Smith } 13702f7816d4SBarry Smith 13712b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 1372*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 1373*d71ae5a4SJacob Faibussowitsch { 13742b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 13752b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1376d2840e09SBarry Smith const PetscScalar *x, *v; 1377d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1378d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1379d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 13802b692628SMatthew Knepley 13812b692628SMatthew Knepley PetscFunctionBegin; 13829566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 13839566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 13842b692628SMatthew Knepley idx = a->j; 13852b692628SMatthew Knepley v = a->a; 13862b692628SMatthew Knepley ii = a->i; 13872b692628SMatthew Knepley 13882b692628SMatthew Knepley for (i = 0; i < m; i++) { 13892b692628SMatthew Knepley jrow = ii[i]; 13902b692628SMatthew Knepley n = ii[i + 1] - jrow; 13912b692628SMatthew Knepley sum1 = 0.0; 13922b692628SMatthew Knepley sum2 = 0.0; 13932b692628SMatthew Knepley sum3 = 0.0; 13942b692628SMatthew Knepley sum4 = 0.0; 13952b692628SMatthew Knepley sum5 = 0.0; 13962b692628SMatthew Knepley sum6 = 0.0; 13972b692628SMatthew Knepley sum7 = 0.0; 13982b692628SMatthew Knepley sum8 = 0.0; 13992b692628SMatthew Knepley sum9 = 0.0; 140026fbe8dcSKarl Rupp 1401b3c51c66SMatthew Knepley nonzerorow += (n > 0); 14022b692628SMatthew Knepley for (j = 0; j < n; j++) { 14032b692628SMatthew Knepley sum1 += v[jrow] * x[9 * idx[jrow]]; 14042b692628SMatthew Knepley sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 14052b692628SMatthew Knepley sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 14062b692628SMatthew Knepley sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 14072b692628SMatthew Knepley sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 14082b692628SMatthew Knepley sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 14092b692628SMatthew Knepley sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 14102b692628SMatthew Knepley sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 14112b692628SMatthew Knepley sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 14122b692628SMatthew Knepley jrow++; 14132b692628SMatthew Knepley } 14142b692628SMatthew Knepley y[9 * i] = sum1; 14152b692628SMatthew Knepley y[9 * i + 1] = sum2; 14162b692628SMatthew Knepley y[9 * i + 2] = sum3; 14172b692628SMatthew Knepley y[9 * i + 3] = sum4; 14182b692628SMatthew Knepley y[9 * i + 4] = sum5; 14192b692628SMatthew Knepley y[9 * i + 5] = sum6; 14202b692628SMatthew Knepley y[9 * i + 6] = sum7; 14212b692628SMatthew Knepley y[9 * i + 7] = sum8; 14222b692628SMatthew Knepley y[9 * i + 8] = sum9; 14232b692628SMatthew Knepley } 14242b692628SMatthew Knepley 14259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow)); 14269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 14279566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 14282b692628SMatthew Knepley PetscFunctionReturn(0); 14292b692628SMatthew Knepley } 14302b692628SMatthew Knepley 1431b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1432b9cda22cSBarry Smith 1433*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 1434*d71ae5a4SJacob Faibussowitsch { 14352b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 14362b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1437d2840e09SBarry Smith const PetscScalar *x, *v; 1438d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1439d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1440d2840e09SBarry Smith PetscInt n, i; 14412b692628SMatthew Knepley 14422b692628SMatthew Knepley PetscFunctionBegin; 14439566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 14449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 14459566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 14462b692628SMatthew Knepley 14472b692628SMatthew Knepley for (i = 0; i < m; i++) { 14482b692628SMatthew Knepley idx = a->j + a->i[i]; 14492b692628SMatthew Knepley v = a->a + a->i[i]; 14502b692628SMatthew Knepley n = a->i[i + 1] - a->i[i]; 14512b692628SMatthew Knepley alpha1 = x[9 * i]; 14522b692628SMatthew Knepley alpha2 = x[9 * i + 1]; 14532b692628SMatthew Knepley alpha3 = x[9 * i + 2]; 14542b692628SMatthew Knepley alpha4 = x[9 * i + 3]; 14552b692628SMatthew Knepley alpha5 = x[9 * i + 4]; 14562b692628SMatthew Knepley alpha6 = x[9 * i + 5]; 14572b692628SMatthew Knepley alpha7 = x[9 * i + 6]; 14582b692628SMatthew Knepley alpha8 = x[9 * i + 7]; 14592f6bd0c6SMatthew Knepley alpha9 = x[9 * i + 8]; 14602b692628SMatthew Knepley while (n-- > 0) { 14612b692628SMatthew Knepley y[9 * (*idx)] += alpha1 * (*v); 14622b692628SMatthew Knepley y[9 * (*idx) + 1] += alpha2 * (*v); 14632b692628SMatthew Knepley y[9 * (*idx) + 2] += alpha3 * (*v); 14642b692628SMatthew Knepley y[9 * (*idx) + 3] += alpha4 * (*v); 14652b692628SMatthew Knepley y[9 * (*idx) + 4] += alpha5 * (*v); 14662b692628SMatthew Knepley y[9 * (*idx) + 5] += alpha6 * (*v); 14672b692628SMatthew Knepley y[9 * (*idx) + 6] += alpha7 * (*v); 14682b692628SMatthew Knepley y[9 * (*idx) + 7] += alpha8 * (*v); 14692b692628SMatthew Knepley y[9 * (*idx) + 8] += alpha9 * (*v); 14709371c9d4SSatish Balay idx++; 14719371c9d4SSatish Balay v++; 14722b692628SMatthew Knepley } 14732b692628SMatthew Knepley } 14749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 14759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 14769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 14772b692628SMatthew Knepley PetscFunctionReturn(0); 14782b692628SMatthew Knepley } 14792b692628SMatthew Knepley 1480*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 1481*d71ae5a4SJacob Faibussowitsch { 14822b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 14832b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1484d2840e09SBarry Smith const PetscScalar *x, *v; 1485d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1486d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1487b24ad042SBarry Smith PetscInt n, i, jrow, j; 14882b692628SMatthew Knepley 14892b692628SMatthew Knepley PetscFunctionBegin; 14909566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 14919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 14929566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 14932b692628SMatthew Knepley idx = a->j; 14942b692628SMatthew Knepley v = a->a; 14952b692628SMatthew Knepley ii = a->i; 14962b692628SMatthew Knepley 14972b692628SMatthew Knepley for (i = 0; i < m; i++) { 14982b692628SMatthew Knepley jrow = ii[i]; 14992b692628SMatthew Knepley n = ii[i + 1] - jrow; 15002b692628SMatthew Knepley sum1 = 0.0; 15012b692628SMatthew Knepley sum2 = 0.0; 15022b692628SMatthew Knepley sum3 = 0.0; 15032b692628SMatthew Knepley sum4 = 0.0; 15042b692628SMatthew Knepley sum5 = 0.0; 15052b692628SMatthew Knepley sum6 = 0.0; 15062b692628SMatthew Knepley sum7 = 0.0; 15072b692628SMatthew Knepley sum8 = 0.0; 15082b692628SMatthew Knepley sum9 = 0.0; 15092b692628SMatthew Knepley for (j = 0; j < n; j++) { 15102b692628SMatthew Knepley sum1 += v[jrow] * x[9 * idx[jrow]]; 15112b692628SMatthew Knepley sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 15122b692628SMatthew Knepley sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 15132b692628SMatthew Knepley sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 15142b692628SMatthew Knepley sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 15152b692628SMatthew Knepley sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 15162b692628SMatthew Knepley sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 15172b692628SMatthew Knepley sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 15182b692628SMatthew Knepley sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 15192b692628SMatthew Knepley jrow++; 15202b692628SMatthew Knepley } 15212b692628SMatthew Knepley y[9 * i] += sum1; 15222b692628SMatthew Knepley y[9 * i + 1] += sum2; 15232b692628SMatthew Knepley y[9 * i + 2] += sum3; 15242b692628SMatthew Knepley y[9 * i + 3] += sum4; 15252b692628SMatthew Knepley y[9 * i + 4] += sum5; 15262b692628SMatthew Knepley y[9 * i + 5] += sum6; 15272b692628SMatthew Knepley y[9 * i + 6] += sum7; 15282b692628SMatthew Knepley y[9 * i + 7] += sum8; 15292b692628SMatthew Knepley y[9 * i + 8] += sum9; 15302b692628SMatthew Knepley } 15312b692628SMatthew Knepley 15329566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 15339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 15349566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 15352b692628SMatthew Knepley PetscFunctionReturn(0); 15362b692628SMatthew Knepley } 15372b692628SMatthew Knepley 1538*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 1539*d71ae5a4SJacob Faibussowitsch { 15402b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 15412b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1542d2840e09SBarry Smith const PetscScalar *x, *v; 1543d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1544d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1545d2840e09SBarry Smith PetscInt n, i; 15462b692628SMatthew Knepley 15472b692628SMatthew Knepley PetscFunctionBegin; 15489566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 15499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 15509566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 15512b692628SMatthew Knepley for (i = 0; i < m; i++) { 15522b692628SMatthew Knepley idx = a->j + a->i[i]; 15532b692628SMatthew Knepley v = a->a + a->i[i]; 15542b692628SMatthew Knepley n = a->i[i + 1] - a->i[i]; 15552b692628SMatthew Knepley alpha1 = x[9 * i]; 15562b692628SMatthew Knepley alpha2 = x[9 * i + 1]; 15572b692628SMatthew Knepley alpha3 = x[9 * i + 2]; 15582b692628SMatthew Knepley alpha4 = x[9 * i + 3]; 15592b692628SMatthew Knepley alpha5 = x[9 * i + 4]; 15602b692628SMatthew Knepley alpha6 = x[9 * i + 5]; 15612b692628SMatthew Knepley alpha7 = x[9 * i + 6]; 15622b692628SMatthew Knepley alpha8 = x[9 * i + 7]; 15632b692628SMatthew Knepley alpha9 = x[9 * i + 8]; 15642b692628SMatthew Knepley while (n-- > 0) { 15652b692628SMatthew Knepley y[9 * (*idx)] += alpha1 * (*v); 15662b692628SMatthew Knepley y[9 * (*idx) + 1] += alpha2 * (*v); 15672b692628SMatthew Knepley y[9 * (*idx) + 2] += alpha3 * (*v); 15682b692628SMatthew Knepley y[9 * (*idx) + 3] += alpha4 * (*v); 15692b692628SMatthew Knepley y[9 * (*idx) + 4] += alpha5 * (*v); 15702b692628SMatthew Knepley y[9 * (*idx) + 5] += alpha6 * (*v); 15712b692628SMatthew Knepley y[9 * (*idx) + 6] += alpha7 * (*v); 15722b692628SMatthew Knepley y[9 * (*idx) + 7] += alpha8 * (*v); 15732b692628SMatthew Knepley y[9 * (*idx) + 8] += alpha9 * (*v); 15749371c9d4SSatish Balay idx++; 15759371c9d4SSatish Balay v++; 15762b692628SMatthew Knepley } 15772b692628SMatthew Knepley } 15789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 15799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 15809566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 15812b692628SMatthew Knepley PetscFunctionReturn(0); 15822b692628SMatthew Knepley } 1583*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 1584*d71ae5a4SJacob Faibussowitsch { 1585b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1586b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1587d2840e09SBarry Smith const PetscScalar *x, *v; 1588d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1589d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1590d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 1591b9cda22cSBarry Smith 1592b9cda22cSBarry Smith PetscFunctionBegin; 15939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 15949566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1595b9cda22cSBarry Smith idx = a->j; 1596b9cda22cSBarry Smith v = a->a; 1597b9cda22cSBarry Smith ii = a->i; 1598b9cda22cSBarry Smith 1599b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1600b9cda22cSBarry Smith jrow = ii[i]; 1601b9cda22cSBarry Smith n = ii[i + 1] - jrow; 1602b9cda22cSBarry Smith sum1 = 0.0; 1603b9cda22cSBarry Smith sum2 = 0.0; 1604b9cda22cSBarry Smith sum3 = 0.0; 1605b9cda22cSBarry Smith sum4 = 0.0; 1606b9cda22cSBarry Smith sum5 = 0.0; 1607b9cda22cSBarry Smith sum6 = 0.0; 1608b9cda22cSBarry Smith sum7 = 0.0; 1609b9cda22cSBarry Smith sum8 = 0.0; 1610b9cda22cSBarry Smith sum9 = 0.0; 1611b9cda22cSBarry Smith sum10 = 0.0; 161226fbe8dcSKarl Rupp 1613b3c51c66SMatthew Knepley nonzerorow += (n > 0); 1614b9cda22cSBarry Smith for (j = 0; j < n; j++) { 1615b9cda22cSBarry Smith sum1 += v[jrow] * x[10 * idx[jrow]]; 1616b9cda22cSBarry Smith sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1617b9cda22cSBarry Smith sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1618b9cda22cSBarry Smith sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1619b9cda22cSBarry Smith sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1620b9cda22cSBarry Smith sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1621b9cda22cSBarry Smith sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1622b9cda22cSBarry Smith sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1623b9cda22cSBarry Smith sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1624b9cda22cSBarry Smith sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1625b9cda22cSBarry Smith jrow++; 1626b9cda22cSBarry Smith } 1627b9cda22cSBarry Smith y[10 * i] = sum1; 1628b9cda22cSBarry Smith y[10 * i + 1] = sum2; 1629b9cda22cSBarry Smith y[10 * i + 2] = sum3; 1630b9cda22cSBarry Smith y[10 * i + 3] = sum4; 1631b9cda22cSBarry Smith y[10 * i + 4] = sum5; 1632b9cda22cSBarry Smith y[10 * i + 5] = sum6; 1633b9cda22cSBarry Smith y[10 * i + 6] = sum7; 1634b9cda22cSBarry Smith y[10 * i + 7] = sum8; 1635b9cda22cSBarry Smith y[10 * i + 8] = sum9; 1636b9cda22cSBarry Smith y[10 * i + 9] = sum10; 1637b9cda22cSBarry Smith } 1638b9cda22cSBarry Smith 16399566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow)); 16409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 16419566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1642b9cda22cSBarry Smith PetscFunctionReturn(0); 1643b9cda22cSBarry Smith } 1644b9cda22cSBarry Smith 1645*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 1646*d71ae5a4SJacob Faibussowitsch { 1647b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1648b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1649d2840e09SBarry Smith const PetscScalar *x, *v; 1650d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1651d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1652b9cda22cSBarry Smith PetscInt n, i, jrow, j; 1653b9cda22cSBarry Smith 1654b9cda22cSBarry Smith PetscFunctionBegin; 16559566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 16569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 16579566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1658b9cda22cSBarry Smith idx = a->j; 1659b9cda22cSBarry Smith v = a->a; 1660b9cda22cSBarry Smith ii = a->i; 1661b9cda22cSBarry Smith 1662b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1663b9cda22cSBarry Smith jrow = ii[i]; 1664b9cda22cSBarry Smith n = ii[i + 1] - jrow; 1665b9cda22cSBarry Smith sum1 = 0.0; 1666b9cda22cSBarry Smith sum2 = 0.0; 1667b9cda22cSBarry Smith sum3 = 0.0; 1668b9cda22cSBarry Smith sum4 = 0.0; 1669b9cda22cSBarry Smith sum5 = 0.0; 1670b9cda22cSBarry Smith sum6 = 0.0; 1671b9cda22cSBarry Smith sum7 = 0.0; 1672b9cda22cSBarry Smith sum8 = 0.0; 1673b9cda22cSBarry Smith sum9 = 0.0; 1674b9cda22cSBarry Smith sum10 = 0.0; 1675b9cda22cSBarry Smith for (j = 0; j < n; j++) { 1676b9cda22cSBarry Smith sum1 += v[jrow] * x[10 * idx[jrow]]; 1677b9cda22cSBarry Smith sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1678b9cda22cSBarry Smith sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1679b9cda22cSBarry Smith sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1680b9cda22cSBarry Smith sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1681b9cda22cSBarry Smith sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1682b9cda22cSBarry Smith sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1683b9cda22cSBarry Smith sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1684b9cda22cSBarry Smith sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1685b9cda22cSBarry Smith sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1686b9cda22cSBarry Smith jrow++; 1687b9cda22cSBarry Smith } 1688b9cda22cSBarry Smith y[10 * i] += sum1; 1689b9cda22cSBarry Smith y[10 * i + 1] += sum2; 1690b9cda22cSBarry Smith y[10 * i + 2] += sum3; 1691b9cda22cSBarry Smith y[10 * i + 3] += sum4; 1692b9cda22cSBarry Smith y[10 * i + 4] += sum5; 1693b9cda22cSBarry Smith y[10 * i + 5] += sum6; 1694b9cda22cSBarry Smith y[10 * i + 6] += sum7; 1695b9cda22cSBarry Smith y[10 * i + 7] += sum8; 1696b9cda22cSBarry Smith y[10 * i + 8] += sum9; 1697b9cda22cSBarry Smith y[10 * i + 9] += sum10; 1698b9cda22cSBarry Smith } 1699b9cda22cSBarry Smith 17009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17029566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1703b9cda22cSBarry Smith PetscFunctionReturn(0); 1704b9cda22cSBarry Smith } 1705b9cda22cSBarry Smith 1706*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 1707*d71ae5a4SJacob Faibussowitsch { 1708b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1709b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1710d2840e09SBarry Smith const PetscScalar *x, *v; 1711d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1712d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1713d2840e09SBarry Smith PetscInt n, i; 1714b9cda22cSBarry Smith 1715b9cda22cSBarry Smith PetscFunctionBegin; 17169566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 17179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 17189566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1719b9cda22cSBarry Smith 1720b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1721b9cda22cSBarry Smith idx = a->j + a->i[i]; 1722b9cda22cSBarry Smith v = a->a + a->i[i]; 1723b9cda22cSBarry Smith n = a->i[i + 1] - a->i[i]; 1724e29fdc22SBarry Smith alpha1 = x[10 * i]; 1725e29fdc22SBarry Smith alpha2 = x[10 * i + 1]; 1726e29fdc22SBarry Smith alpha3 = x[10 * i + 2]; 1727e29fdc22SBarry Smith alpha4 = x[10 * i + 3]; 1728e29fdc22SBarry Smith alpha5 = x[10 * i + 4]; 1729e29fdc22SBarry Smith alpha6 = x[10 * i + 5]; 1730e29fdc22SBarry Smith alpha7 = x[10 * i + 6]; 1731e29fdc22SBarry Smith alpha8 = x[10 * i + 7]; 1732e29fdc22SBarry Smith alpha9 = x[10 * i + 8]; 1733b9cda22cSBarry Smith alpha10 = x[10 * i + 9]; 1734b9cda22cSBarry Smith while (n-- > 0) { 1735e29fdc22SBarry Smith y[10 * (*idx)] += alpha1 * (*v); 1736e29fdc22SBarry Smith y[10 * (*idx) + 1] += alpha2 * (*v); 1737e29fdc22SBarry Smith y[10 * (*idx) + 2] += alpha3 * (*v); 1738e29fdc22SBarry Smith y[10 * (*idx) + 3] += alpha4 * (*v); 1739e29fdc22SBarry Smith y[10 * (*idx) + 4] += alpha5 * (*v); 1740e29fdc22SBarry Smith y[10 * (*idx) + 5] += alpha6 * (*v); 1741e29fdc22SBarry Smith y[10 * (*idx) + 6] += alpha7 * (*v); 1742e29fdc22SBarry Smith y[10 * (*idx) + 7] += alpha8 * (*v); 1743e29fdc22SBarry Smith y[10 * (*idx) + 8] += alpha9 * (*v); 1744b9cda22cSBarry Smith y[10 * (*idx) + 9] += alpha10 * (*v); 17459371c9d4SSatish Balay idx++; 17469371c9d4SSatish Balay v++; 1747b9cda22cSBarry Smith } 1748b9cda22cSBarry Smith } 17499566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17519566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1752b9cda22cSBarry Smith PetscFunctionReturn(0); 1753b9cda22cSBarry Smith } 1754b9cda22cSBarry Smith 1755*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 1756*d71ae5a4SJacob Faibussowitsch { 1757b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1758b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1759d2840e09SBarry Smith const PetscScalar *x, *v; 1760d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1761d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1762d2840e09SBarry Smith PetscInt n, i; 1763b9cda22cSBarry Smith 1764b9cda22cSBarry Smith PetscFunctionBegin; 17659566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 17669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 17679566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1768b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1769b9cda22cSBarry Smith idx = a->j + a->i[i]; 1770b9cda22cSBarry Smith v = a->a + a->i[i]; 1771b9cda22cSBarry Smith n = a->i[i + 1] - a->i[i]; 1772b9cda22cSBarry Smith alpha1 = x[10 * i]; 1773b9cda22cSBarry Smith alpha2 = x[10 * i + 1]; 1774b9cda22cSBarry Smith alpha3 = x[10 * i + 2]; 1775b9cda22cSBarry Smith alpha4 = x[10 * i + 3]; 1776b9cda22cSBarry Smith alpha5 = x[10 * i + 4]; 1777b9cda22cSBarry Smith alpha6 = x[10 * i + 5]; 1778b9cda22cSBarry Smith alpha7 = x[10 * i + 6]; 1779b9cda22cSBarry Smith alpha8 = x[10 * i + 7]; 1780b9cda22cSBarry Smith alpha9 = x[10 * i + 8]; 1781b9cda22cSBarry Smith alpha10 = x[10 * i + 9]; 1782b9cda22cSBarry Smith while (n-- > 0) { 1783b9cda22cSBarry Smith y[10 * (*idx)] += alpha1 * (*v); 1784b9cda22cSBarry Smith y[10 * (*idx) + 1] += alpha2 * (*v); 1785b9cda22cSBarry Smith y[10 * (*idx) + 2] += alpha3 * (*v); 1786b9cda22cSBarry Smith y[10 * (*idx) + 3] += alpha4 * (*v); 1787b9cda22cSBarry Smith y[10 * (*idx) + 4] += alpha5 * (*v); 1788b9cda22cSBarry Smith y[10 * (*idx) + 5] += alpha6 * (*v); 1789b9cda22cSBarry Smith y[10 * (*idx) + 6] += alpha7 * (*v); 1790b9cda22cSBarry Smith y[10 * (*idx) + 7] += alpha8 * (*v); 1791b9cda22cSBarry Smith y[10 * (*idx) + 8] += alpha9 * (*v); 1792b9cda22cSBarry Smith y[10 * (*idx) + 9] += alpha10 * (*v); 17939371c9d4SSatish Balay idx++; 17949371c9d4SSatish Balay v++; 1795b9cda22cSBarry Smith } 1796b9cda22cSBarry Smith } 17979566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17989566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17999566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1800b9cda22cSBarry Smith PetscFunctionReturn(0); 1801b9cda22cSBarry Smith } 1802b9cda22cSBarry Smith 18032f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 1804*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 1805*d71ae5a4SJacob Faibussowitsch { 1806dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1807dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1808d2840e09SBarry Smith const PetscScalar *x, *v; 1809d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1810d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1811d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 1812dbdd7285SBarry Smith 1813dbdd7285SBarry Smith PetscFunctionBegin; 18149566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 18159566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1816dbdd7285SBarry Smith idx = a->j; 1817dbdd7285SBarry Smith v = a->a; 1818dbdd7285SBarry Smith ii = a->i; 1819dbdd7285SBarry Smith 1820dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1821dbdd7285SBarry Smith jrow = ii[i]; 1822dbdd7285SBarry Smith n = ii[i + 1] - jrow; 1823dbdd7285SBarry Smith sum1 = 0.0; 1824dbdd7285SBarry Smith sum2 = 0.0; 1825dbdd7285SBarry Smith sum3 = 0.0; 1826dbdd7285SBarry Smith sum4 = 0.0; 1827dbdd7285SBarry Smith sum5 = 0.0; 1828dbdd7285SBarry Smith sum6 = 0.0; 1829dbdd7285SBarry Smith sum7 = 0.0; 1830dbdd7285SBarry Smith sum8 = 0.0; 1831dbdd7285SBarry Smith sum9 = 0.0; 1832dbdd7285SBarry Smith sum10 = 0.0; 1833dbdd7285SBarry Smith sum11 = 0.0; 183426fbe8dcSKarl Rupp 1835dbdd7285SBarry Smith nonzerorow += (n > 0); 1836dbdd7285SBarry Smith for (j = 0; j < n; j++) { 1837dbdd7285SBarry Smith sum1 += v[jrow] * x[11 * idx[jrow]]; 1838dbdd7285SBarry Smith sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1839dbdd7285SBarry Smith sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1840dbdd7285SBarry Smith sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1841dbdd7285SBarry Smith sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1842dbdd7285SBarry Smith sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1843dbdd7285SBarry Smith sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1844dbdd7285SBarry Smith sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1845dbdd7285SBarry Smith sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1846dbdd7285SBarry Smith sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1847dbdd7285SBarry Smith sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1848dbdd7285SBarry Smith jrow++; 1849dbdd7285SBarry Smith } 1850dbdd7285SBarry Smith y[11 * i] = sum1; 1851dbdd7285SBarry Smith y[11 * i + 1] = sum2; 1852dbdd7285SBarry Smith y[11 * i + 2] = sum3; 1853dbdd7285SBarry Smith y[11 * i + 3] = sum4; 1854dbdd7285SBarry Smith y[11 * i + 4] = sum5; 1855dbdd7285SBarry Smith y[11 * i + 5] = sum6; 1856dbdd7285SBarry Smith y[11 * i + 6] = sum7; 1857dbdd7285SBarry Smith y[11 * i + 7] = sum8; 1858dbdd7285SBarry Smith y[11 * i + 8] = sum9; 1859dbdd7285SBarry Smith y[11 * i + 9] = sum10; 1860dbdd7285SBarry Smith y[11 * i + 10] = sum11; 1861dbdd7285SBarry Smith } 1862dbdd7285SBarry Smith 18639566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow)); 18649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 18659566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1866dbdd7285SBarry Smith PetscFunctionReturn(0); 1867dbdd7285SBarry Smith } 1868dbdd7285SBarry Smith 1869*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 1870*d71ae5a4SJacob Faibussowitsch { 1871dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1872dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1873d2840e09SBarry Smith const PetscScalar *x, *v; 1874d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1875d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1876dbdd7285SBarry Smith PetscInt n, i, jrow, j; 1877dbdd7285SBarry Smith 1878dbdd7285SBarry Smith PetscFunctionBegin; 18799566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 18809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 18819566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1882dbdd7285SBarry Smith idx = a->j; 1883dbdd7285SBarry Smith v = a->a; 1884dbdd7285SBarry Smith ii = a->i; 1885dbdd7285SBarry Smith 1886dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1887dbdd7285SBarry Smith jrow = ii[i]; 1888dbdd7285SBarry Smith n = ii[i + 1] - jrow; 1889dbdd7285SBarry Smith sum1 = 0.0; 1890dbdd7285SBarry Smith sum2 = 0.0; 1891dbdd7285SBarry Smith sum3 = 0.0; 1892dbdd7285SBarry Smith sum4 = 0.0; 1893dbdd7285SBarry Smith sum5 = 0.0; 1894dbdd7285SBarry Smith sum6 = 0.0; 1895dbdd7285SBarry Smith sum7 = 0.0; 1896dbdd7285SBarry Smith sum8 = 0.0; 1897dbdd7285SBarry Smith sum9 = 0.0; 1898dbdd7285SBarry Smith sum10 = 0.0; 1899dbdd7285SBarry Smith sum11 = 0.0; 1900dbdd7285SBarry Smith for (j = 0; j < n; j++) { 1901dbdd7285SBarry Smith sum1 += v[jrow] * x[11 * idx[jrow]]; 1902dbdd7285SBarry Smith sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1903dbdd7285SBarry Smith sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1904dbdd7285SBarry Smith sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1905dbdd7285SBarry Smith sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1906dbdd7285SBarry Smith sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1907dbdd7285SBarry Smith sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1908dbdd7285SBarry Smith sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1909dbdd7285SBarry Smith sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1910dbdd7285SBarry Smith sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1911dbdd7285SBarry Smith sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1912dbdd7285SBarry Smith jrow++; 1913dbdd7285SBarry Smith } 1914dbdd7285SBarry Smith y[11 * i] += sum1; 1915dbdd7285SBarry Smith y[11 * i + 1] += sum2; 1916dbdd7285SBarry Smith y[11 * i + 2] += sum3; 1917dbdd7285SBarry Smith y[11 * i + 3] += sum4; 1918dbdd7285SBarry Smith y[11 * i + 4] += sum5; 1919dbdd7285SBarry Smith y[11 * i + 5] += sum6; 1920dbdd7285SBarry Smith y[11 * i + 6] += sum7; 1921dbdd7285SBarry Smith y[11 * i + 7] += sum8; 1922dbdd7285SBarry Smith y[11 * i + 8] += sum9; 1923dbdd7285SBarry Smith y[11 * i + 9] += sum10; 1924dbdd7285SBarry Smith y[11 * i + 10] += sum11; 1925dbdd7285SBarry Smith } 1926dbdd7285SBarry Smith 19279566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 19289566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1930dbdd7285SBarry Smith PetscFunctionReturn(0); 1931dbdd7285SBarry Smith } 1932dbdd7285SBarry Smith 1933*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 1934*d71ae5a4SJacob Faibussowitsch { 1935dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1936dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1937d2840e09SBarry Smith const PetscScalar *x, *v; 1938d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1939d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1940d2840e09SBarry Smith PetscInt n, i; 1941dbdd7285SBarry Smith 1942dbdd7285SBarry Smith PetscFunctionBegin; 19439566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 19449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 19459566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1946dbdd7285SBarry Smith 1947dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1948dbdd7285SBarry Smith idx = a->j + a->i[i]; 1949dbdd7285SBarry Smith v = a->a + a->i[i]; 1950dbdd7285SBarry Smith n = a->i[i + 1] - a->i[i]; 1951dbdd7285SBarry Smith alpha1 = x[11 * i]; 1952dbdd7285SBarry Smith alpha2 = x[11 * i + 1]; 1953dbdd7285SBarry Smith alpha3 = x[11 * i + 2]; 1954dbdd7285SBarry Smith alpha4 = x[11 * i + 3]; 1955dbdd7285SBarry Smith alpha5 = x[11 * i + 4]; 1956dbdd7285SBarry Smith alpha6 = x[11 * i + 5]; 1957dbdd7285SBarry Smith alpha7 = x[11 * i + 6]; 1958dbdd7285SBarry Smith alpha8 = x[11 * i + 7]; 1959dbdd7285SBarry Smith alpha9 = x[11 * i + 8]; 1960dbdd7285SBarry Smith alpha10 = x[11 * i + 9]; 1961dbdd7285SBarry Smith alpha11 = x[11 * i + 10]; 1962dbdd7285SBarry Smith while (n-- > 0) { 1963dbdd7285SBarry Smith y[11 * (*idx)] += alpha1 * (*v); 1964dbdd7285SBarry Smith y[11 * (*idx) + 1] += alpha2 * (*v); 1965dbdd7285SBarry Smith y[11 * (*idx) + 2] += alpha3 * (*v); 1966dbdd7285SBarry Smith y[11 * (*idx) + 3] += alpha4 * (*v); 1967dbdd7285SBarry Smith y[11 * (*idx) + 4] += alpha5 * (*v); 1968dbdd7285SBarry Smith y[11 * (*idx) + 5] += alpha6 * (*v); 1969dbdd7285SBarry Smith y[11 * (*idx) + 6] += alpha7 * (*v); 1970dbdd7285SBarry Smith y[11 * (*idx) + 7] += alpha8 * (*v); 1971dbdd7285SBarry Smith y[11 * (*idx) + 8] += alpha9 * (*v); 1972dbdd7285SBarry Smith y[11 * (*idx) + 9] += alpha10 * (*v); 1973dbdd7285SBarry Smith y[11 * (*idx) + 10] += alpha11 * (*v); 19749371c9d4SSatish Balay idx++; 19759371c9d4SSatish Balay v++; 1976dbdd7285SBarry Smith } 1977dbdd7285SBarry Smith } 19789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 19799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19809566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1981dbdd7285SBarry Smith PetscFunctionReturn(0); 1982dbdd7285SBarry Smith } 1983dbdd7285SBarry Smith 1984*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 1985*d71ae5a4SJacob Faibussowitsch { 1986dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1987dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1988d2840e09SBarry Smith const PetscScalar *x, *v; 1989d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1990d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1991d2840e09SBarry Smith PetscInt n, i; 1992dbdd7285SBarry Smith 1993dbdd7285SBarry Smith PetscFunctionBegin; 19949566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 19959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 19969566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1997dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1998dbdd7285SBarry Smith idx = a->j + a->i[i]; 1999dbdd7285SBarry Smith v = a->a + a->i[i]; 2000dbdd7285SBarry Smith n = a->i[i + 1] - a->i[i]; 2001dbdd7285SBarry Smith alpha1 = x[11 * i]; 2002dbdd7285SBarry Smith alpha2 = x[11 * i + 1]; 2003dbdd7285SBarry Smith alpha3 = x[11 * i + 2]; 2004dbdd7285SBarry Smith alpha4 = x[11 * i + 3]; 2005dbdd7285SBarry Smith alpha5 = x[11 * i + 4]; 2006dbdd7285SBarry Smith alpha6 = x[11 * i + 5]; 2007dbdd7285SBarry Smith alpha7 = x[11 * i + 6]; 2008dbdd7285SBarry Smith alpha8 = x[11 * i + 7]; 2009dbdd7285SBarry Smith alpha9 = x[11 * i + 8]; 2010dbdd7285SBarry Smith alpha10 = x[11 * i + 9]; 2011dbdd7285SBarry Smith alpha11 = x[11 * i + 10]; 2012dbdd7285SBarry Smith while (n-- > 0) { 2013dbdd7285SBarry Smith y[11 * (*idx)] += alpha1 * (*v); 2014dbdd7285SBarry Smith y[11 * (*idx) + 1] += alpha2 * (*v); 2015dbdd7285SBarry Smith y[11 * (*idx) + 2] += alpha3 * (*v); 2016dbdd7285SBarry Smith y[11 * (*idx) + 3] += alpha4 * (*v); 2017dbdd7285SBarry Smith y[11 * (*idx) + 4] += alpha5 * (*v); 2018dbdd7285SBarry Smith y[11 * (*idx) + 5] += alpha6 * (*v); 2019dbdd7285SBarry Smith y[11 * (*idx) + 6] += alpha7 * (*v); 2020dbdd7285SBarry Smith y[11 * (*idx) + 7] += alpha8 * (*v); 2021dbdd7285SBarry Smith y[11 * (*idx) + 8] += alpha9 * (*v); 2022dbdd7285SBarry Smith y[11 * (*idx) + 9] += alpha10 * (*v); 2023dbdd7285SBarry Smith y[11 * (*idx) + 10] += alpha11 * (*v); 20249371c9d4SSatish Balay idx++; 20259371c9d4SSatish Balay v++; 2026dbdd7285SBarry Smith } 2027dbdd7285SBarry Smith } 20289566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 20299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 20309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2031dbdd7285SBarry Smith PetscFunctionReturn(0); 2032dbdd7285SBarry Smith } 2033dbdd7285SBarry Smith 2034dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2035*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 2036*d71ae5a4SJacob Faibussowitsch { 20372f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 20382f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2039d2840e09SBarry Smith const PetscScalar *x, *v; 2040d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 20412f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2042d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2043d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 20442f7816d4SBarry Smith 20452f7816d4SBarry Smith PetscFunctionBegin; 20469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 20479566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 20482f7816d4SBarry Smith idx = a->j; 20492f7816d4SBarry Smith v = a->a; 20502f7816d4SBarry Smith ii = a->i; 20512f7816d4SBarry Smith 20522f7816d4SBarry Smith for (i = 0; i < m; i++) { 20532f7816d4SBarry Smith jrow = ii[i]; 20542f7816d4SBarry Smith n = ii[i + 1] - jrow; 20552f7816d4SBarry Smith sum1 = 0.0; 20562f7816d4SBarry Smith sum2 = 0.0; 20572f7816d4SBarry Smith sum3 = 0.0; 20582f7816d4SBarry Smith sum4 = 0.0; 20592f7816d4SBarry Smith sum5 = 0.0; 20602f7816d4SBarry Smith sum6 = 0.0; 20612f7816d4SBarry Smith sum7 = 0.0; 20622f7816d4SBarry Smith sum8 = 0.0; 20632f7816d4SBarry Smith sum9 = 0.0; 20642f7816d4SBarry Smith sum10 = 0.0; 20652f7816d4SBarry Smith sum11 = 0.0; 20662f7816d4SBarry Smith sum12 = 0.0; 20672f7816d4SBarry Smith sum13 = 0.0; 20682f7816d4SBarry Smith sum14 = 0.0; 20692f7816d4SBarry Smith sum15 = 0.0; 20702f7816d4SBarry Smith sum16 = 0.0; 207126fbe8dcSKarl Rupp 2072b3c51c66SMatthew Knepley nonzerorow += (n > 0); 20732f7816d4SBarry Smith for (j = 0; j < n; j++) { 20742f7816d4SBarry Smith sum1 += v[jrow] * x[16 * idx[jrow]]; 20752f7816d4SBarry Smith sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 20762f7816d4SBarry Smith sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 20772f7816d4SBarry Smith sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 20782f7816d4SBarry Smith sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 20792f7816d4SBarry Smith sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 20802f7816d4SBarry Smith sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 20812f7816d4SBarry Smith sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 20822f7816d4SBarry Smith sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 20832f7816d4SBarry Smith sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 20842f7816d4SBarry Smith sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 20852f7816d4SBarry Smith sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 20862f7816d4SBarry Smith sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 20872f7816d4SBarry Smith sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 20882f7816d4SBarry Smith sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 20892f7816d4SBarry Smith sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 20902f7816d4SBarry Smith jrow++; 20912f7816d4SBarry Smith } 20922f7816d4SBarry Smith y[16 * i] = sum1; 20932f7816d4SBarry Smith y[16 * i + 1] = sum2; 20942f7816d4SBarry Smith y[16 * i + 2] = sum3; 20952f7816d4SBarry Smith y[16 * i + 3] = sum4; 20962f7816d4SBarry Smith y[16 * i + 4] = sum5; 20972f7816d4SBarry Smith y[16 * i + 5] = sum6; 20982f7816d4SBarry Smith y[16 * i + 6] = sum7; 20992f7816d4SBarry Smith y[16 * i + 7] = sum8; 21002f7816d4SBarry Smith y[16 * i + 8] = sum9; 21012f7816d4SBarry Smith y[16 * i + 9] = sum10; 21022f7816d4SBarry Smith y[16 * i + 10] = sum11; 21032f7816d4SBarry Smith y[16 * i + 11] = sum12; 21042f7816d4SBarry Smith y[16 * i + 12] = sum13; 21052f7816d4SBarry Smith y[16 * i + 13] = sum14; 21062f7816d4SBarry Smith y[16 * i + 14] = sum15; 21072f7816d4SBarry Smith y[16 * i + 15] = sum16; 21082f7816d4SBarry Smith } 21092f7816d4SBarry Smith 21109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow)); 21119566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 21129566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 21132f7816d4SBarry Smith PetscFunctionReturn(0); 21142f7816d4SBarry Smith } 21152f7816d4SBarry Smith 2116*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 2117*d71ae5a4SJacob Faibussowitsch { 21182f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 21192f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2120d2840e09SBarry Smith const PetscScalar *x, *v; 2121d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 21222f7816d4SBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2123d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2124d2840e09SBarry Smith PetscInt n, i; 21252f7816d4SBarry Smith 21262f7816d4SBarry Smith PetscFunctionBegin; 21279566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 21289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21299566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2130bfec09a0SHong Zhang 21312f7816d4SBarry Smith for (i = 0; i < m; i++) { 2132bfec09a0SHong Zhang idx = a->j + a->i[i]; 2133bfec09a0SHong Zhang v = a->a + a->i[i]; 21342f7816d4SBarry Smith n = a->i[i + 1] - a->i[i]; 21352f7816d4SBarry Smith alpha1 = x[16 * i]; 21362f7816d4SBarry Smith alpha2 = x[16 * i + 1]; 21372f7816d4SBarry Smith alpha3 = x[16 * i + 2]; 21382f7816d4SBarry Smith alpha4 = x[16 * i + 3]; 21392f7816d4SBarry Smith alpha5 = x[16 * i + 4]; 21402f7816d4SBarry Smith alpha6 = x[16 * i + 5]; 21412f7816d4SBarry Smith alpha7 = x[16 * i + 6]; 21422f7816d4SBarry Smith alpha8 = x[16 * i + 7]; 21432f7816d4SBarry Smith alpha9 = x[16 * i + 8]; 21442f7816d4SBarry Smith alpha10 = x[16 * i + 9]; 21452f7816d4SBarry Smith alpha11 = x[16 * i + 10]; 21462f7816d4SBarry Smith alpha12 = x[16 * i + 11]; 21472f7816d4SBarry Smith alpha13 = x[16 * i + 12]; 21482f7816d4SBarry Smith alpha14 = x[16 * i + 13]; 21492f7816d4SBarry Smith alpha15 = x[16 * i + 14]; 21502f7816d4SBarry Smith alpha16 = x[16 * i + 15]; 21512f7816d4SBarry Smith while (n-- > 0) { 21522f7816d4SBarry Smith y[16 * (*idx)] += alpha1 * (*v); 21532f7816d4SBarry Smith y[16 * (*idx) + 1] += alpha2 * (*v); 21542f7816d4SBarry Smith y[16 * (*idx) + 2] += alpha3 * (*v); 21552f7816d4SBarry Smith y[16 * (*idx) + 3] += alpha4 * (*v); 21562f7816d4SBarry Smith y[16 * (*idx) + 4] += alpha5 * (*v); 21572f7816d4SBarry Smith y[16 * (*idx) + 5] += alpha6 * (*v); 21582f7816d4SBarry Smith y[16 * (*idx) + 6] += alpha7 * (*v); 21592f7816d4SBarry Smith y[16 * (*idx) + 7] += alpha8 * (*v); 21602f7816d4SBarry Smith y[16 * (*idx) + 8] += alpha9 * (*v); 21612f7816d4SBarry Smith y[16 * (*idx) + 9] += alpha10 * (*v); 21622f7816d4SBarry Smith y[16 * (*idx) + 10] += alpha11 * (*v); 21632f7816d4SBarry Smith y[16 * (*idx) + 11] += alpha12 * (*v); 21642f7816d4SBarry Smith y[16 * (*idx) + 12] += alpha13 * (*v); 21652f7816d4SBarry Smith y[16 * (*idx) + 13] += alpha14 * (*v); 21662f7816d4SBarry Smith y[16 * (*idx) + 14] += alpha15 * (*v); 21672f7816d4SBarry Smith y[16 * (*idx) + 15] += alpha16 * (*v); 21689371c9d4SSatish Balay idx++; 21699371c9d4SSatish Balay v++; 21702f7816d4SBarry Smith } 21712f7816d4SBarry Smith } 21729566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 21739566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 21749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 21752f7816d4SBarry Smith PetscFunctionReturn(0); 21762f7816d4SBarry Smith } 21772f7816d4SBarry Smith 2178*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 2179*d71ae5a4SJacob Faibussowitsch { 21802f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 21812f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2182d2840e09SBarry Smith const PetscScalar *x, *v; 2183d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21842f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2185d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2186b24ad042SBarry Smith PetscInt n, i, jrow, j; 21872f7816d4SBarry Smith 21882f7816d4SBarry Smith PetscFunctionBegin; 21899566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 21909566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21919566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 21922f7816d4SBarry Smith idx = a->j; 21932f7816d4SBarry Smith v = a->a; 21942f7816d4SBarry Smith ii = a->i; 21952f7816d4SBarry Smith 21962f7816d4SBarry Smith for (i = 0; i < m; i++) { 21972f7816d4SBarry Smith jrow = ii[i]; 21982f7816d4SBarry Smith n = ii[i + 1] - jrow; 21992f7816d4SBarry Smith sum1 = 0.0; 22002f7816d4SBarry Smith sum2 = 0.0; 22012f7816d4SBarry Smith sum3 = 0.0; 22022f7816d4SBarry Smith sum4 = 0.0; 22032f7816d4SBarry Smith sum5 = 0.0; 22042f7816d4SBarry Smith sum6 = 0.0; 22052f7816d4SBarry Smith sum7 = 0.0; 22062f7816d4SBarry Smith sum8 = 0.0; 22072f7816d4SBarry Smith sum9 = 0.0; 22082f7816d4SBarry Smith sum10 = 0.0; 22092f7816d4SBarry Smith sum11 = 0.0; 22102f7816d4SBarry Smith sum12 = 0.0; 22112f7816d4SBarry Smith sum13 = 0.0; 22122f7816d4SBarry Smith sum14 = 0.0; 22132f7816d4SBarry Smith sum15 = 0.0; 22142f7816d4SBarry Smith sum16 = 0.0; 22152f7816d4SBarry Smith for (j = 0; j < n; j++) { 22162f7816d4SBarry Smith sum1 += v[jrow] * x[16 * idx[jrow]]; 22172f7816d4SBarry Smith sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 22182f7816d4SBarry Smith sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 22192f7816d4SBarry Smith sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 22202f7816d4SBarry Smith sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 22212f7816d4SBarry Smith sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 22222f7816d4SBarry Smith sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 22232f7816d4SBarry Smith sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 22242f7816d4SBarry Smith sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 22252f7816d4SBarry Smith sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 22262f7816d4SBarry Smith sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 22272f7816d4SBarry Smith sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 22282f7816d4SBarry Smith sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 22292f7816d4SBarry Smith sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 22302f7816d4SBarry Smith sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 22312f7816d4SBarry Smith sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 22322f7816d4SBarry Smith jrow++; 22332f7816d4SBarry Smith } 22342f7816d4SBarry Smith y[16 * i] += sum1; 22352f7816d4SBarry Smith y[16 * i + 1] += sum2; 22362f7816d4SBarry Smith y[16 * i + 2] += sum3; 22372f7816d4SBarry Smith y[16 * i + 3] += sum4; 22382f7816d4SBarry Smith y[16 * i + 4] += sum5; 22392f7816d4SBarry Smith y[16 * i + 5] += sum6; 22402f7816d4SBarry Smith y[16 * i + 6] += sum7; 22412f7816d4SBarry Smith y[16 * i + 7] += sum8; 22422f7816d4SBarry Smith y[16 * i + 8] += sum9; 22432f7816d4SBarry Smith y[16 * i + 9] += sum10; 22442f7816d4SBarry Smith y[16 * i + 10] += sum11; 22452f7816d4SBarry Smith y[16 * i + 11] += sum12; 22462f7816d4SBarry Smith y[16 * i + 12] += sum13; 22472f7816d4SBarry Smith y[16 * i + 13] += sum14; 22482f7816d4SBarry Smith y[16 * i + 14] += sum15; 22492f7816d4SBarry Smith y[16 * i + 15] += sum16; 22502f7816d4SBarry Smith } 22512f7816d4SBarry Smith 22529566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 22539566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 22549566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 22552f7816d4SBarry Smith PetscFunctionReturn(0); 22562f7816d4SBarry Smith } 22572f7816d4SBarry Smith 2258*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 2259*d71ae5a4SJacob Faibussowitsch { 22602f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 22612f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2262d2840e09SBarry Smith const PetscScalar *x, *v; 2263d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 22642f7816d4SBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2265d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2266d2840e09SBarry Smith PetscInt n, i; 22672f7816d4SBarry Smith 22682f7816d4SBarry Smith PetscFunctionBegin; 22699566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 22709566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 22719566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 22722f7816d4SBarry Smith for (i = 0; i < m; i++) { 2273bfec09a0SHong Zhang idx = a->j + a->i[i]; 2274bfec09a0SHong Zhang v = a->a + a->i[i]; 22752f7816d4SBarry Smith n = a->i[i + 1] - a->i[i]; 22762f7816d4SBarry Smith alpha1 = x[16 * i]; 22772f7816d4SBarry Smith alpha2 = x[16 * i + 1]; 22782f7816d4SBarry Smith alpha3 = x[16 * i + 2]; 22792f7816d4SBarry Smith alpha4 = x[16 * i + 3]; 22802f7816d4SBarry Smith alpha5 = x[16 * i + 4]; 22812f7816d4SBarry Smith alpha6 = x[16 * i + 5]; 22822f7816d4SBarry Smith alpha7 = x[16 * i + 6]; 22832f7816d4SBarry Smith alpha8 = x[16 * i + 7]; 22842f7816d4SBarry Smith alpha9 = x[16 * i + 8]; 22852f7816d4SBarry Smith alpha10 = x[16 * i + 9]; 22862f7816d4SBarry Smith alpha11 = x[16 * i + 10]; 22872f7816d4SBarry Smith alpha12 = x[16 * i + 11]; 22882f7816d4SBarry Smith alpha13 = x[16 * i + 12]; 22892f7816d4SBarry Smith alpha14 = x[16 * i + 13]; 22902f7816d4SBarry Smith alpha15 = x[16 * i + 14]; 22912f7816d4SBarry Smith alpha16 = x[16 * i + 15]; 22922f7816d4SBarry Smith while (n-- > 0) { 22932f7816d4SBarry Smith y[16 * (*idx)] += alpha1 * (*v); 22942f7816d4SBarry Smith y[16 * (*idx) + 1] += alpha2 * (*v); 22952f7816d4SBarry Smith y[16 * (*idx) + 2] += alpha3 * (*v); 22962f7816d4SBarry Smith y[16 * (*idx) + 3] += alpha4 * (*v); 22972f7816d4SBarry Smith y[16 * (*idx) + 4] += alpha5 * (*v); 22982f7816d4SBarry Smith y[16 * (*idx) + 5] += alpha6 * (*v); 22992f7816d4SBarry Smith y[16 * (*idx) + 6] += alpha7 * (*v); 23002f7816d4SBarry Smith y[16 * (*idx) + 7] += alpha8 * (*v); 23012f7816d4SBarry Smith y[16 * (*idx) + 8] += alpha9 * (*v); 23022f7816d4SBarry Smith y[16 * (*idx) + 9] += alpha10 * (*v); 23032f7816d4SBarry Smith y[16 * (*idx) + 10] += alpha11 * (*v); 23042f7816d4SBarry Smith y[16 * (*idx) + 11] += alpha12 * (*v); 23052f7816d4SBarry Smith y[16 * (*idx) + 12] += alpha13 * (*v); 23062f7816d4SBarry Smith y[16 * (*idx) + 13] += alpha14 * (*v); 23072f7816d4SBarry Smith y[16 * (*idx) + 14] += alpha15 * (*v); 23082f7816d4SBarry Smith y[16 * (*idx) + 15] += alpha16 * (*v); 23099371c9d4SSatish Balay idx++; 23109371c9d4SSatish Balay v++; 23112f7816d4SBarry Smith } 23122f7816d4SBarry Smith } 23139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 23149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 23159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 231666ed3db0SBarry Smith PetscFunctionReturn(0); 231766ed3db0SBarry Smith } 231866ed3db0SBarry Smith 2319ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2320*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 2321*d71ae5a4SJacob Faibussowitsch { 2322ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2323ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2324d2840e09SBarry Smith const PetscScalar *x, *v; 2325d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2326ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2327d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2328d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 2329ed1c418cSBarry Smith 2330ed1c418cSBarry Smith PetscFunctionBegin; 23319566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 23329566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2333ed1c418cSBarry Smith idx = a->j; 2334ed1c418cSBarry Smith v = a->a; 2335ed1c418cSBarry Smith ii = a->i; 2336ed1c418cSBarry Smith 2337ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2338ed1c418cSBarry Smith jrow = ii[i]; 2339ed1c418cSBarry Smith n = ii[i + 1] - jrow; 2340ed1c418cSBarry Smith sum1 = 0.0; 2341ed1c418cSBarry Smith sum2 = 0.0; 2342ed1c418cSBarry Smith sum3 = 0.0; 2343ed1c418cSBarry Smith sum4 = 0.0; 2344ed1c418cSBarry Smith sum5 = 0.0; 2345ed1c418cSBarry Smith sum6 = 0.0; 2346ed1c418cSBarry Smith sum7 = 0.0; 2347ed1c418cSBarry Smith sum8 = 0.0; 2348ed1c418cSBarry Smith sum9 = 0.0; 2349ed1c418cSBarry Smith sum10 = 0.0; 2350ed1c418cSBarry Smith sum11 = 0.0; 2351ed1c418cSBarry Smith sum12 = 0.0; 2352ed1c418cSBarry Smith sum13 = 0.0; 2353ed1c418cSBarry Smith sum14 = 0.0; 2354ed1c418cSBarry Smith sum15 = 0.0; 2355ed1c418cSBarry Smith sum16 = 0.0; 2356ed1c418cSBarry Smith sum17 = 0.0; 2357ed1c418cSBarry Smith sum18 = 0.0; 235826fbe8dcSKarl Rupp 2359ed1c418cSBarry Smith nonzerorow += (n > 0); 2360ed1c418cSBarry Smith for (j = 0; j < n; j++) { 2361ed1c418cSBarry Smith sum1 += v[jrow] * x[18 * idx[jrow]]; 2362ed1c418cSBarry Smith sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2363ed1c418cSBarry Smith sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2364ed1c418cSBarry Smith sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2365ed1c418cSBarry Smith sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2366ed1c418cSBarry Smith sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2367ed1c418cSBarry Smith sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2368ed1c418cSBarry Smith sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2369ed1c418cSBarry Smith sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2370ed1c418cSBarry Smith sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2371ed1c418cSBarry Smith sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2372ed1c418cSBarry Smith sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2373ed1c418cSBarry Smith sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2374ed1c418cSBarry Smith sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2375ed1c418cSBarry Smith sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2376ed1c418cSBarry Smith sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2377ed1c418cSBarry Smith sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2378ed1c418cSBarry Smith sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2379ed1c418cSBarry Smith jrow++; 2380ed1c418cSBarry Smith } 2381ed1c418cSBarry Smith y[18 * i] = sum1; 2382ed1c418cSBarry Smith y[18 * i + 1] = sum2; 2383ed1c418cSBarry Smith y[18 * i + 2] = sum3; 2384ed1c418cSBarry Smith y[18 * i + 3] = sum4; 2385ed1c418cSBarry Smith y[18 * i + 4] = sum5; 2386ed1c418cSBarry Smith y[18 * i + 5] = sum6; 2387ed1c418cSBarry Smith y[18 * i + 6] = sum7; 2388ed1c418cSBarry Smith y[18 * i + 7] = sum8; 2389ed1c418cSBarry Smith y[18 * i + 8] = sum9; 2390ed1c418cSBarry Smith y[18 * i + 9] = sum10; 2391ed1c418cSBarry Smith y[18 * i + 10] = sum11; 2392ed1c418cSBarry Smith y[18 * i + 11] = sum12; 2393ed1c418cSBarry Smith y[18 * i + 12] = sum13; 2394ed1c418cSBarry Smith y[18 * i + 13] = sum14; 2395ed1c418cSBarry Smith y[18 * i + 14] = sum15; 2396ed1c418cSBarry Smith y[18 * i + 15] = sum16; 2397ed1c418cSBarry Smith y[18 * i + 16] = sum17; 2398ed1c418cSBarry Smith y[18 * i + 17] = sum18; 2399ed1c418cSBarry Smith } 2400ed1c418cSBarry Smith 24019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow)); 24029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2404ed1c418cSBarry Smith PetscFunctionReturn(0); 2405ed1c418cSBarry Smith } 2406ed1c418cSBarry Smith 2407*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 2408*d71ae5a4SJacob Faibussowitsch { 2409ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2410ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2411d2840e09SBarry Smith const PetscScalar *x, *v; 2412d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2413ed1c418cSBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2414d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2415d2840e09SBarry Smith PetscInt n, i; 2416ed1c418cSBarry Smith 2417ed1c418cSBarry Smith PetscFunctionBegin; 24189566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 24199566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 24209566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2421ed1c418cSBarry Smith 2422ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2423ed1c418cSBarry Smith idx = a->j + a->i[i]; 2424ed1c418cSBarry Smith v = a->a + a->i[i]; 2425ed1c418cSBarry Smith n = a->i[i + 1] - a->i[i]; 2426ed1c418cSBarry Smith alpha1 = x[18 * i]; 2427ed1c418cSBarry Smith alpha2 = x[18 * i + 1]; 2428ed1c418cSBarry Smith alpha3 = x[18 * i + 2]; 2429ed1c418cSBarry Smith alpha4 = x[18 * i + 3]; 2430ed1c418cSBarry Smith alpha5 = x[18 * i + 4]; 2431ed1c418cSBarry Smith alpha6 = x[18 * i + 5]; 2432ed1c418cSBarry Smith alpha7 = x[18 * i + 6]; 2433ed1c418cSBarry Smith alpha8 = x[18 * i + 7]; 2434ed1c418cSBarry Smith alpha9 = x[18 * i + 8]; 2435ed1c418cSBarry Smith alpha10 = x[18 * i + 9]; 2436ed1c418cSBarry Smith alpha11 = x[18 * i + 10]; 2437ed1c418cSBarry Smith alpha12 = x[18 * i + 11]; 2438ed1c418cSBarry Smith alpha13 = x[18 * i + 12]; 2439ed1c418cSBarry Smith alpha14 = x[18 * i + 13]; 2440ed1c418cSBarry Smith alpha15 = x[18 * i + 14]; 2441ed1c418cSBarry Smith alpha16 = x[18 * i + 15]; 2442ed1c418cSBarry Smith alpha17 = x[18 * i + 16]; 2443ed1c418cSBarry Smith alpha18 = x[18 * i + 17]; 2444ed1c418cSBarry Smith while (n-- > 0) { 2445ed1c418cSBarry Smith y[18 * (*idx)] += alpha1 * (*v); 2446ed1c418cSBarry Smith y[18 * (*idx) + 1] += alpha2 * (*v); 2447ed1c418cSBarry Smith y[18 * (*idx) + 2] += alpha3 * (*v); 2448ed1c418cSBarry Smith y[18 * (*idx) + 3] += alpha4 * (*v); 2449ed1c418cSBarry Smith y[18 * (*idx) + 4] += alpha5 * (*v); 2450ed1c418cSBarry Smith y[18 * (*idx) + 5] += alpha6 * (*v); 2451ed1c418cSBarry Smith y[18 * (*idx) + 6] += alpha7 * (*v); 2452ed1c418cSBarry Smith y[18 * (*idx) + 7] += alpha8 * (*v); 2453ed1c418cSBarry Smith y[18 * (*idx) + 8] += alpha9 * (*v); 2454ed1c418cSBarry Smith y[18 * (*idx) + 9] += alpha10 * (*v); 2455ed1c418cSBarry Smith y[18 * (*idx) + 10] += alpha11 * (*v); 2456ed1c418cSBarry Smith y[18 * (*idx) + 11] += alpha12 * (*v); 2457ed1c418cSBarry Smith y[18 * (*idx) + 12] += alpha13 * (*v); 2458ed1c418cSBarry Smith y[18 * (*idx) + 13] += alpha14 * (*v); 2459ed1c418cSBarry Smith y[18 * (*idx) + 14] += alpha15 * (*v); 2460ed1c418cSBarry Smith y[18 * (*idx) + 15] += alpha16 * (*v); 2461ed1c418cSBarry Smith y[18 * (*idx) + 16] += alpha17 * (*v); 2462ed1c418cSBarry Smith y[18 * (*idx) + 17] += alpha18 * (*v); 24639371c9d4SSatish Balay idx++; 24649371c9d4SSatish Balay v++; 2465ed1c418cSBarry Smith } 2466ed1c418cSBarry Smith } 24679566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 24689566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24699566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2470ed1c418cSBarry Smith PetscFunctionReturn(0); 2471ed1c418cSBarry Smith } 2472ed1c418cSBarry Smith 2473*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 2474*d71ae5a4SJacob Faibussowitsch { 2475ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2476ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2477d2840e09SBarry Smith const PetscScalar *x, *v; 2478d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2479ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2480d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2481ed1c418cSBarry Smith PetscInt n, i, jrow, j; 2482ed1c418cSBarry Smith 2483ed1c418cSBarry Smith PetscFunctionBegin; 24849566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 24859566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 24869566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 2487ed1c418cSBarry Smith idx = a->j; 2488ed1c418cSBarry Smith v = a->a; 2489ed1c418cSBarry Smith ii = a->i; 2490ed1c418cSBarry Smith 2491ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2492ed1c418cSBarry Smith jrow = ii[i]; 2493ed1c418cSBarry Smith n = ii[i + 1] - jrow; 2494ed1c418cSBarry Smith sum1 = 0.0; 2495ed1c418cSBarry Smith sum2 = 0.0; 2496ed1c418cSBarry Smith sum3 = 0.0; 2497ed1c418cSBarry Smith sum4 = 0.0; 2498ed1c418cSBarry Smith sum5 = 0.0; 2499ed1c418cSBarry Smith sum6 = 0.0; 2500ed1c418cSBarry Smith sum7 = 0.0; 2501ed1c418cSBarry Smith sum8 = 0.0; 2502ed1c418cSBarry Smith sum9 = 0.0; 2503ed1c418cSBarry Smith sum10 = 0.0; 2504ed1c418cSBarry Smith sum11 = 0.0; 2505ed1c418cSBarry Smith sum12 = 0.0; 2506ed1c418cSBarry Smith sum13 = 0.0; 2507ed1c418cSBarry Smith sum14 = 0.0; 2508ed1c418cSBarry Smith sum15 = 0.0; 2509ed1c418cSBarry Smith sum16 = 0.0; 2510ed1c418cSBarry Smith sum17 = 0.0; 2511ed1c418cSBarry Smith sum18 = 0.0; 2512ed1c418cSBarry Smith for (j = 0; j < n; j++) { 2513ed1c418cSBarry Smith sum1 += v[jrow] * x[18 * idx[jrow]]; 2514ed1c418cSBarry Smith sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2515ed1c418cSBarry Smith sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2516ed1c418cSBarry Smith sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2517ed1c418cSBarry Smith sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2518ed1c418cSBarry Smith sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2519ed1c418cSBarry Smith sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2520ed1c418cSBarry Smith sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2521ed1c418cSBarry Smith sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2522ed1c418cSBarry Smith sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2523ed1c418cSBarry Smith sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2524ed1c418cSBarry Smith sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2525ed1c418cSBarry Smith sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2526ed1c418cSBarry Smith sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2527ed1c418cSBarry Smith sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2528ed1c418cSBarry Smith sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2529ed1c418cSBarry Smith sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2530ed1c418cSBarry Smith sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2531ed1c418cSBarry Smith jrow++; 2532ed1c418cSBarry Smith } 2533ed1c418cSBarry Smith y[18 * i] += sum1; 2534ed1c418cSBarry Smith y[18 * i + 1] += sum2; 2535ed1c418cSBarry Smith y[18 * i + 2] += sum3; 2536ed1c418cSBarry Smith y[18 * i + 3] += sum4; 2537ed1c418cSBarry Smith y[18 * i + 4] += sum5; 2538ed1c418cSBarry Smith y[18 * i + 5] += sum6; 2539ed1c418cSBarry Smith y[18 * i + 6] += sum7; 2540ed1c418cSBarry Smith y[18 * i + 7] += sum8; 2541ed1c418cSBarry Smith y[18 * i + 8] += sum9; 2542ed1c418cSBarry Smith y[18 * i + 9] += sum10; 2543ed1c418cSBarry Smith y[18 * i + 10] += sum11; 2544ed1c418cSBarry Smith y[18 * i + 11] += sum12; 2545ed1c418cSBarry Smith y[18 * i + 12] += sum13; 2546ed1c418cSBarry Smith y[18 * i + 13] += sum14; 2547ed1c418cSBarry Smith y[18 * i + 14] += sum15; 2548ed1c418cSBarry Smith y[18 * i + 15] += sum16; 2549ed1c418cSBarry Smith y[18 * i + 16] += sum17; 2550ed1c418cSBarry Smith y[18 * i + 17] += sum18; 2551ed1c418cSBarry Smith } 2552ed1c418cSBarry Smith 25539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 25549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2556ed1c418cSBarry Smith PetscFunctionReturn(0); 2557ed1c418cSBarry Smith } 2558ed1c418cSBarry Smith 2559*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 2560*d71ae5a4SJacob Faibussowitsch { 2561ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2562ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2563d2840e09SBarry Smith const PetscScalar *x, *v; 2564d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2565ed1c418cSBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2566d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2567d2840e09SBarry Smith PetscInt n, i; 2568ed1c418cSBarry Smith 2569ed1c418cSBarry Smith PetscFunctionBegin; 25709566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 25719566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 25729566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 2573ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2574ed1c418cSBarry Smith idx = a->j + a->i[i]; 2575ed1c418cSBarry Smith v = a->a + a->i[i]; 2576ed1c418cSBarry Smith n = a->i[i + 1] - a->i[i]; 2577ed1c418cSBarry Smith alpha1 = x[18 * i]; 2578ed1c418cSBarry Smith alpha2 = x[18 * i + 1]; 2579ed1c418cSBarry Smith alpha3 = x[18 * i + 2]; 2580ed1c418cSBarry Smith alpha4 = x[18 * i + 3]; 2581ed1c418cSBarry Smith alpha5 = x[18 * i + 4]; 2582ed1c418cSBarry Smith alpha6 = x[18 * i + 5]; 2583ed1c418cSBarry Smith alpha7 = x[18 * i + 6]; 2584ed1c418cSBarry Smith alpha8 = x[18 * i + 7]; 2585ed1c418cSBarry Smith alpha9 = x[18 * i + 8]; 2586ed1c418cSBarry Smith alpha10 = x[18 * i + 9]; 2587ed1c418cSBarry Smith alpha11 = x[18 * i + 10]; 2588ed1c418cSBarry Smith alpha12 = x[18 * i + 11]; 2589ed1c418cSBarry Smith alpha13 = x[18 * i + 12]; 2590ed1c418cSBarry Smith alpha14 = x[18 * i + 13]; 2591ed1c418cSBarry Smith alpha15 = x[18 * i + 14]; 2592ed1c418cSBarry Smith alpha16 = x[18 * i + 15]; 2593ed1c418cSBarry Smith alpha17 = x[18 * i + 16]; 2594ed1c418cSBarry Smith alpha18 = x[18 * i + 17]; 2595ed1c418cSBarry Smith while (n-- > 0) { 2596ed1c418cSBarry Smith y[18 * (*idx)] += alpha1 * (*v); 2597ed1c418cSBarry Smith y[18 * (*idx) + 1] += alpha2 * (*v); 2598ed1c418cSBarry Smith y[18 * (*idx) + 2] += alpha3 * (*v); 2599ed1c418cSBarry Smith y[18 * (*idx) + 3] += alpha4 * (*v); 2600ed1c418cSBarry Smith y[18 * (*idx) + 4] += alpha5 * (*v); 2601ed1c418cSBarry Smith y[18 * (*idx) + 5] += alpha6 * (*v); 2602ed1c418cSBarry Smith y[18 * (*idx) + 6] += alpha7 * (*v); 2603ed1c418cSBarry Smith y[18 * (*idx) + 7] += alpha8 * (*v); 2604ed1c418cSBarry Smith y[18 * (*idx) + 8] += alpha9 * (*v); 2605ed1c418cSBarry Smith y[18 * (*idx) + 9] += alpha10 * (*v); 2606ed1c418cSBarry Smith y[18 * (*idx) + 10] += alpha11 * (*v); 2607ed1c418cSBarry Smith y[18 * (*idx) + 11] += alpha12 * (*v); 2608ed1c418cSBarry Smith y[18 * (*idx) + 12] += alpha13 * (*v); 2609ed1c418cSBarry Smith y[18 * (*idx) + 13] += alpha14 * (*v); 2610ed1c418cSBarry Smith y[18 * (*idx) + 14] += alpha15 * (*v); 2611ed1c418cSBarry Smith y[18 * (*idx) + 15] += alpha16 * (*v); 2612ed1c418cSBarry Smith y[18 * (*idx) + 16] += alpha17 * (*v); 2613ed1c418cSBarry Smith y[18 * (*idx) + 17] += alpha18 * (*v); 26149371c9d4SSatish Balay idx++; 26159371c9d4SSatish Balay v++; 2616ed1c418cSBarry Smith } 2617ed1c418cSBarry Smith } 26189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 26199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2621ed1c418cSBarry Smith PetscFunctionReturn(0); 2622ed1c418cSBarry Smith } 2623ed1c418cSBarry Smith 2624*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 2625*d71ae5a4SJacob Faibussowitsch { 26266861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26276861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26286861f193SBarry Smith const PetscScalar *x, *v; 26296861f193SBarry Smith PetscScalar *y, *sums; 26306861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 26316861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 26326861f193SBarry Smith 26336861f193SBarry Smith PetscFunctionBegin; 26349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26359566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 26369566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 26376861f193SBarry Smith idx = a->j; 26386861f193SBarry Smith v = a->a; 26396861f193SBarry Smith ii = a->i; 26406861f193SBarry Smith 26416861f193SBarry Smith for (i = 0; i < m; i++) { 26426861f193SBarry Smith jrow = ii[i]; 26436861f193SBarry Smith n = ii[i + 1] - jrow; 26446861f193SBarry Smith sums = y + dof * i; 26456861f193SBarry Smith for (j = 0; j < n; j++) { 2646ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 26476861f193SBarry Smith jrow++; 26486861f193SBarry Smith } 26496861f193SBarry Smith } 26506861f193SBarry Smith 26519566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26529566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26539566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 26546861f193SBarry Smith PetscFunctionReturn(0); 26556861f193SBarry Smith } 26566861f193SBarry Smith 2657*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 2658*d71ae5a4SJacob Faibussowitsch { 26596861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26606861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26616861f193SBarry Smith const PetscScalar *x, *v; 26626861f193SBarry Smith PetscScalar *y, *sums; 26636861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 26646861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 26656861f193SBarry Smith 26666861f193SBarry Smith PetscFunctionBegin; 26679566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 26689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26699566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 26706861f193SBarry Smith idx = a->j; 26716861f193SBarry Smith v = a->a; 26726861f193SBarry Smith ii = a->i; 26736861f193SBarry Smith 26746861f193SBarry Smith for (i = 0; i < m; i++) { 26756861f193SBarry Smith jrow = ii[i]; 26766861f193SBarry Smith n = ii[i + 1] - jrow; 26776861f193SBarry Smith sums = y + dof * i; 26786861f193SBarry Smith for (j = 0; j < n; j++) { 2679ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 26806861f193SBarry Smith jrow++; 26816861f193SBarry Smith } 26826861f193SBarry Smith } 26836861f193SBarry Smith 26849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 26876861f193SBarry Smith PetscFunctionReturn(0); 26886861f193SBarry Smith } 26896861f193SBarry Smith 2690*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 2691*d71ae5a4SJacob Faibussowitsch { 26926861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26936861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26946861f193SBarry Smith const PetscScalar *x, *v, *alpha; 26956861f193SBarry Smith PetscScalar *y; 26966861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 26976861f193SBarry Smith PetscInt n, i, k; 26986861f193SBarry Smith 26996861f193SBarry Smith PetscFunctionBegin; 27009566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 27019566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 27029566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 27036861f193SBarry Smith for (i = 0; i < m; i++) { 27046861f193SBarry Smith idx = a->j + a->i[i]; 27056861f193SBarry Smith v = a->a + a->i[i]; 27066861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 27076861f193SBarry Smith alpha = x + dof * i; 27086861f193SBarry Smith while (n-- > 0) { 2709ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 27109371c9d4SSatish Balay idx++; 27119371c9d4SSatish Balay v++; 27126861f193SBarry Smith } 27136861f193SBarry Smith } 27149566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 27159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 27169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 27176861f193SBarry Smith PetscFunctionReturn(0); 27186861f193SBarry Smith } 27196861f193SBarry Smith 2720*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 2721*d71ae5a4SJacob Faibussowitsch { 27226861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 27236861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 27246861f193SBarry Smith const PetscScalar *x, *v, *alpha; 27256861f193SBarry Smith PetscScalar *y; 27266861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 27276861f193SBarry Smith PetscInt n, i, k; 27286861f193SBarry Smith 27296861f193SBarry Smith PetscFunctionBegin; 27309566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 27319566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 27329566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 27336861f193SBarry Smith for (i = 0; i < m; i++) { 27346861f193SBarry Smith idx = a->j + a->i[i]; 27356861f193SBarry Smith v = a->a + a->i[i]; 27366861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 27376861f193SBarry Smith alpha = x + dof * i; 27386861f193SBarry Smith while (n-- > 0) { 2739ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 27409371c9d4SSatish Balay idx++; 27419371c9d4SSatish Balay v++; 27426861f193SBarry Smith } 27436861f193SBarry Smith } 27449566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 27459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 27469566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 27476861f193SBarry Smith PetscFunctionReturn(0); 27486861f193SBarry Smith } 27496861f193SBarry Smith 2750f4a53059SBarry Smith /*===================================================================================*/ 2751*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 2752*d71ae5a4SJacob Faibussowitsch { 27534cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2754f4a53059SBarry Smith 2755b24ad042SBarry Smith PetscFunctionBegin; 2756f4a53059SBarry Smith /* start the scatter */ 27579566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27589566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 27599566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27609566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 2761f4a53059SBarry Smith PetscFunctionReturn(0); 2762f4a53059SBarry Smith } 2763f4a53059SBarry Smith 2764*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 2765*d71ae5a4SJacob Faibussowitsch { 27664cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2767b24ad042SBarry Smith 27684cb9d9b8SBarry Smith PetscFunctionBegin; 27699566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 27709566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 27719566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 27729566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 27734cb9d9b8SBarry Smith PetscFunctionReturn(0); 27744cb9d9b8SBarry Smith } 27754cb9d9b8SBarry Smith 2776*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 2777*d71ae5a4SJacob Faibussowitsch { 27784cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 27794cb9d9b8SBarry Smith 2780b24ad042SBarry Smith PetscFunctionBegin; 27814cb9d9b8SBarry Smith /* start the scatter */ 27829566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27839566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 27849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27859566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 27864cb9d9b8SBarry Smith PetscFunctionReturn(0); 27874cb9d9b8SBarry Smith } 27884cb9d9b8SBarry Smith 2789*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 2790*d71ae5a4SJacob Faibussowitsch { 27914cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2792b24ad042SBarry Smith 27934cb9d9b8SBarry Smith PetscFunctionBegin; 27949566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 27959566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 27969566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 27979566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 27984cb9d9b8SBarry Smith PetscFunctionReturn(0); 27994cb9d9b8SBarry Smith } 28004cb9d9b8SBarry Smith 280195ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 2802*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 2803*d71ae5a4SJacob Faibussowitsch { 28044222ddf1SHong Zhang Mat_Product *product = C->product; 28054222ddf1SHong Zhang 28064222ddf1SHong Zhang PetscFunctionBegin; 28074222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 28084222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 280998921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 28104222ddf1SHong Zhang PetscFunctionReturn(0); 28114222ddf1SHong Zhang } 28124222ddf1SHong Zhang 2813*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 2814*d71ae5a4SJacob Faibussowitsch { 28154222ddf1SHong Zhang Mat_Product *product = C->product; 28164222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 28174222ddf1SHong Zhang Mat A = product->A, P = product->B; 28184222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 28194222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 28204222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 28214222ddf1SHong Zhang PetscInt nalg = 4; 28224222ddf1SHong Zhang #else 28234222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 28244222ddf1SHong Zhang PetscInt nalg = 5; 28254222ddf1SHong Zhang #endif 28264222ddf1SHong Zhang 28274222ddf1SHong Zhang PetscFunctionBegin; 282808401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 28294222ddf1SHong Zhang 28304222ddf1SHong Zhang /* PtAP */ 28314222ddf1SHong Zhang /* Check matrix local sizes */ 28329371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 28339371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 28349371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 28359371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 28364222ddf1SHong Zhang 28374222ddf1SHong Zhang /* Set the default algorithm */ 28389566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 283948a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 28404222ddf1SHong Zhang 28414222ddf1SHong Zhang /* Get runtime option */ 2842d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 28439566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 284448a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2845d0609cedSBarry Smith PetscOptionsEnd(); 28464222ddf1SHong Zhang 28479566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 28484222ddf1SHong Zhang if (flg) { 28494222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 28504222ddf1SHong Zhang PetscFunctionReturn(0); 28514222ddf1SHong Zhang } 28524222ddf1SHong Zhang 28539566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 28544222ddf1SHong Zhang if (flg) { 28554222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 28564222ddf1SHong Zhang PetscFunctionReturn(0); 28574222ddf1SHong Zhang } 28584222ddf1SHong Zhang 28594222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 28609566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 28619566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 28629566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 28634222ddf1SHong Zhang PetscFunctionReturn(0); 28644222ddf1SHong Zhang } 28654222ddf1SHong Zhang 28664222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 2867*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 2868*d71ae5a4SJacob Faibussowitsch { 28690298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 28707ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 28717ba1a0bfSKris Buschelman Mat P = pp->AIJ; 28727ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 2873d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 28747ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 2875d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 2876d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 28777ba1a0bfSKris Buschelman MatScalar *ca; 2878d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 28797ba1a0bfSKris Buschelman 28807ba1a0bfSKris Buschelman PetscFunctionBegin; 28817ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 28829566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 28837ba1a0bfSKris Buschelman 28847ba1a0bfSKris Buschelman cn = pn * ppdof; 28857ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 28867ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 28879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 28887ba1a0bfSKris Buschelman ci[0] = 0; 28897ba1a0bfSKris Buschelman 28907ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 28919566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 28929566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 28939566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 28947ba1a0bfSKris Buschelman 28957ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 28967ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 28977ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 28989566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 28997ba1a0bfSKris Buschelman current_space = free_space; 29007ba1a0bfSKris Buschelman 29017ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 29027ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 29037ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 29047ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 29057ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 29067ba1a0bfSKris Buschelman ptanzi = 0; 29077ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 29087ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 29097ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 29107ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 29117ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 29127ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 29137ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 29147ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 29157ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 29167ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 29177ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 29187ba1a0bfSKris Buschelman } 29197ba1a0bfSKris Buschelman } 29207ba1a0bfSKris Buschelman } 29217ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 29227ba1a0bfSKris Buschelman ptaj = ptasparserow; 29237ba1a0bfSKris Buschelman cnzi = 0; 29247ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 29257ba1a0bfSKris Buschelman /* Get offset within block of P */ 29267ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 29277ba1a0bfSKris Buschelman /* Get block row of P */ 29287ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 29297ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 29307ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 29317ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29327ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 29337ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 29347ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 29357ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 29367ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 29377ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 29387ba1a0bfSKris Buschelman } 29397ba1a0bfSKris Buschelman } 29407ba1a0bfSKris Buschelman } 29417ba1a0bfSKris Buschelman 29427ba1a0bfSKris Buschelman /* sort sparserow */ 29439566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 29447ba1a0bfSKris Buschelman 29457ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 29467ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 294748a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 29487ba1a0bfSKris Buschelman 29497ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 29509566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 295126fbe8dcSKarl Rupp 29527ba1a0bfSKris Buschelman current_space->array += cnzi; 29537ba1a0bfSKris Buschelman current_space->local_used += cnzi; 29547ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 29557ba1a0bfSKris Buschelman 295626fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 295726fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 295826fbe8dcSKarl Rupp 29597ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 29607ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 29617ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 29627ba1a0bfSKris Buschelman } 29637ba1a0bfSKris Buschelman } 29647ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 29657ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 29667ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 29679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 29689566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 29699566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 29707ba1a0bfSKris Buschelman 29717ba1a0bfSKris Buschelman /* Allocate space for ca */ 29729566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 29737ba1a0bfSKris Buschelman 29747ba1a0bfSKris Buschelman /* put together the new matrix */ 29759566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 29769566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 29777ba1a0bfSKris Buschelman 29787ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 29797ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 29804222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 2981e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2982e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 29837ba1a0bfSKris Buschelman c->nonew = 0; 298426fbe8dcSKarl Rupp 29854222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29864222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 29877ba1a0bfSKris Buschelman 29887ba1a0bfSKris Buschelman /* Clean up. */ 29899566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 29907ba1a0bfSKris Buschelman PetscFunctionReturn(0); 29917ba1a0bfSKris Buschelman } 29927ba1a0bfSKris Buschelman 2993*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 2994*d71ae5a4SJacob Faibussowitsch { 29957ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 29967ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 29977ba1a0bfSKris Buschelman Mat P = pp->AIJ; 29987ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29997ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 30007ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 3001a2ea699eSBarry Smith const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 3002d2840e09SBarry Smith const PetscInt *ci = c->i, *cj = c->j, *cjj; 3003d2840e09SBarry Smith const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 3004d2840e09SBarry Smith PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 3005a2ea699eSBarry Smith const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 3006d2840e09SBarry Smith MatScalar *ca = c->a, *caj, *apa; 30077ba1a0bfSKris Buschelman 30087ba1a0bfSKris Buschelman PetscFunctionBegin; 30097ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 30109566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 30117ba1a0bfSKris Buschelman 30127ba1a0bfSKris Buschelman /* Clear old values in C */ 30139566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 30147ba1a0bfSKris Buschelman 30157ba1a0bfSKris Buschelman for (i = 0; i < am; i++) { 30167ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 30177ba1a0bfSKris Buschelman anzi = ai[i + 1] - ai[i]; 30187ba1a0bfSKris Buschelman apnzj = 0; 30197ba1a0bfSKris Buschelman for (j = 0; j < anzi; j++) { 30207ba1a0bfSKris Buschelman /* Get offset within block of P */ 30217ba1a0bfSKris Buschelman pshift = *aj % ppdof; 30227ba1a0bfSKris Buschelman /* Get block row of P */ 30237ba1a0bfSKris Buschelman prow = *aj++ / ppdof; /* integer division */ 30247ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 30257ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30267ba1a0bfSKris Buschelman paj = pa + pi[prow]; 30277ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 30287ba1a0bfSKris Buschelman poffset = pjj[k] * ppdof + pshift; 30297ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 30307ba1a0bfSKris Buschelman apjdense[poffset] = -1; 30317ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 30327ba1a0bfSKris Buschelman } 30337ba1a0bfSKris Buschelman apa[poffset] += (*aa) * paj[k]; 30347ba1a0bfSKris Buschelman } 30359566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 30367ba1a0bfSKris Buschelman aa++; 30377ba1a0bfSKris Buschelman } 30387ba1a0bfSKris Buschelman 30397ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 30407ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 30419566063dSJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 30427ba1a0bfSKris Buschelman 30437ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 30447ba1a0bfSKris Buschelman prow = i / ppdof; /* integer division */ 30457ba1a0bfSKris Buschelman pshift = i % ppdof; 30467ba1a0bfSKris Buschelman poffset = pi[prow]; 30477ba1a0bfSKris Buschelman pnzi = pi[prow + 1] - poffset; 30487ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 30497ba1a0bfSKris Buschelman pJ = pj + poffset; 30507ba1a0bfSKris Buschelman pA = pa + poffset; 30517ba1a0bfSKris Buschelman for (j = 0; j < pnzi; j++) { 30527ba1a0bfSKris Buschelman crow = (*pJ) * ppdof + pshift; 30537ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 30547ba1a0bfSKris Buschelman caj = ca + ci[crow]; 30557ba1a0bfSKris Buschelman pJ++; 30567ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 30577ba1a0bfSKris Buschelman for (k = 0, nextap = 0; nextap < apnzj; k++) { 305826fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 30597ba1a0bfSKris Buschelman } 30609566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 30617ba1a0bfSKris Buschelman pA++; 30627ba1a0bfSKris Buschelman } 30637ba1a0bfSKris Buschelman 30647ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 30657ba1a0bfSKris Buschelman for (j = 0; j < apnzj; j++) { 30667ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 30677ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 30687ba1a0bfSKris Buschelman } 30697ba1a0bfSKris Buschelman } 30707ba1a0bfSKris Buschelman 30717ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 30729566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 30739566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 30749566063dSJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 307595ddefa2SHong Zhang PetscFunctionReturn(0); 307695ddefa2SHong Zhang } 30777ba1a0bfSKris Buschelman 3078*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 3079*d71ae5a4SJacob Faibussowitsch { 30804222ddf1SHong Zhang Mat_Product *product = C->product; 30814222ddf1SHong Zhang Mat A = product->A, P = product->B; 30822121bac1SHong Zhang 30832121bac1SHong Zhang PetscFunctionBegin; 30849566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 30852121bac1SHong Zhang PetscFunctionReturn(0); 30862121bac1SHong Zhang } 30872121bac1SHong Zhang 3088*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) 3089*d71ae5a4SJacob Faibussowitsch { 309095ddefa2SHong Zhang PetscFunctionBegin; 30913e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 309295ddefa2SHong Zhang } 309395ddefa2SHong Zhang 3094*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) 3095*d71ae5a4SJacob Faibussowitsch { 309695ddefa2SHong Zhang PetscFunctionBegin; 30973e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 30987ba1a0bfSKris Buschelman } 30995c65b9ecSFande Kong 3100bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 3101bc8e477aSFande Kong 3102*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 3103*d71ae5a4SJacob Faibussowitsch { 3104bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3105bc8e477aSFande Kong 3106bc8e477aSFande Kong PetscFunctionBegin; 310734bcad68SFande Kong 31089566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 3109bc8e477aSFande Kong PetscFunctionReturn(0); 3110bc8e477aSFande Kong } 3111bc8e477aSFande Kong 31124222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 3113bc8e477aSFande Kong 3114*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 3115*d71ae5a4SJacob Faibussowitsch { 3116bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3117bc8e477aSFande Kong 3118bc8e477aSFande Kong PetscFunctionBegin; 31199566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 31204222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3121bc8e477aSFande Kong PetscFunctionReturn(0); 3122bc8e477aSFande Kong } 3123bc8e477aSFande Kong 3124bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 3125bc8e477aSFande Kong 3126*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 3127*d71ae5a4SJacob Faibussowitsch { 3128bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3129bc8e477aSFande Kong 3130bc8e477aSFande Kong PetscFunctionBegin; 313134bcad68SFande Kong 31329566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 3133bc8e477aSFande Kong PetscFunctionReturn(0); 3134bc8e477aSFande Kong } 3135bc8e477aSFande Kong 31364222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 3137bc8e477aSFande Kong 3138*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 3139*d71ae5a4SJacob Faibussowitsch { 3140bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3141bc8e477aSFande Kong 3142bc8e477aSFande Kong PetscFunctionBegin; 314334bcad68SFande Kong 31449566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 31454222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3146bc8e477aSFande Kong PetscFunctionReturn(0); 3147bc8e477aSFande Kong } 3148bc8e477aSFande Kong 3149*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3150*d71ae5a4SJacob Faibussowitsch { 31514222ddf1SHong Zhang Mat_Product *product = C->product; 31524222ddf1SHong Zhang Mat A = product->A, P = product->B; 31534222ddf1SHong Zhang PetscBool flg; 3154bc8e477aSFande Kong 3155bc8e477aSFande Kong PetscFunctionBegin; 31569566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 31574222ddf1SHong Zhang if (flg) { 31589566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 31594222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 31604222ddf1SHong Zhang PetscFunctionReturn(0); 3161bc8e477aSFande Kong } 316234bcad68SFande Kong 31639566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 31644222ddf1SHong Zhang if (flg) { 31659566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 31664222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 31674222ddf1SHong Zhang PetscFunctionReturn(0); 31684222ddf1SHong Zhang } 316934bcad68SFande Kong 31704222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 3171bc8e477aSFande Kong } 3172bc8e477aSFande Kong 3173*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3174*d71ae5a4SJacob Faibussowitsch { 31759c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3176ceb03754SKris Buschelman Mat a = b->AIJ, B; 31779c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 31780fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 3179ba8c8a56SBarry Smith PetscInt *cols; 3180ba8c8a56SBarry Smith PetscScalar *vals; 31819c4fc4c7SBarry Smith 31829c4fc4c7SBarry Smith PetscFunctionBegin; 31839566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 31849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 31859c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 31869c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 318726fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 31889c4fc4c7SBarry Smith } 31899566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 31909566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 31919566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 31929566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 31939566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 31949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 31959c4fc4c7SBarry Smith ii = 0; 31969c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 31979566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 31980fd73130SBarry Smith for (j = 0; j < dof; j++) { 319926fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 32009566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 32019c4fc4c7SBarry Smith ii++; 32029c4fc4c7SBarry Smith } 32039566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 32049c4fc4c7SBarry Smith } 32059566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 32069566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 32079566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3208ceb03754SKris Buschelman 3209511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32109566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 3211c3d102feSKris Buschelman } else { 3212ceb03754SKris Buschelman *newmat = B; 3213c3d102feSKris Buschelman } 32149c4fc4c7SBarry Smith PetscFunctionReturn(0); 32159c4fc4c7SBarry Smith } 32169c4fc4c7SBarry Smith 3217c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3218be1d678aSKris Buschelman 3219*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3220*d71ae5a4SJacob Faibussowitsch { 32210fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 3222ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 32230fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 32240fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 32250fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 32260fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 32270298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 32280298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 32290fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 32300fd73130SBarry Smith PetscScalar *vals, *ovals; 32310fd73130SBarry Smith 32320fd73130SBarry Smith PetscFunctionBegin; 32339566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 3234d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 32350fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 32360fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 32370fd73130SBarry Smith for (j = 0; j < dof; j++) { 32380fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 32390fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 32400fd73130SBarry Smith } 32410fd73130SBarry Smith } 32429566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 32439566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 32449566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 32459566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 32469566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 32479566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 32480fd73130SBarry Smith 32499566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 3250d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 3251d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 32520fd73130SBarry Smith garray = mpiaij->garray; 32530fd73130SBarry Smith 32540fd73130SBarry Smith ii = rstart; 3255d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 32569566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 32579566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 32580fd73130SBarry Smith for (j = 0; j < dof; j++) { 3259ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 3260ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 32619566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 32629566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 32630fd73130SBarry Smith ii++; 32640fd73130SBarry Smith } 32659566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 32669566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 32670fd73130SBarry Smith } 32689566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 32690fd73130SBarry Smith 32709566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 32719566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3272ceb03754SKris Buschelman 3273511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32747adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 32757adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 327626fbe8dcSKarl Rupp 32779566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 327826fbe8dcSKarl Rupp 32797adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3280c3d102feSKris Buschelman } else { 3281ceb03754SKris Buschelman *newmat = B; 3282c3d102feSKris Buschelman } 32830fd73130SBarry Smith PetscFunctionReturn(0); 32840fd73130SBarry Smith } 32850fd73130SBarry Smith 3286*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 3287*d71ae5a4SJacob Faibussowitsch { 32889e516c8fSBarry Smith Mat A; 32899e516c8fSBarry Smith 32909e516c8fSBarry Smith PetscFunctionBegin; 32919566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 32929566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 32939566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 32949e516c8fSBarry Smith PetscFunctionReturn(0); 32959e516c8fSBarry Smith } 32960fd73130SBarry Smith 3297*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 3298*d71ae5a4SJacob Faibussowitsch { 3299ec626240SStefano Zampini Mat A; 3300ec626240SStefano Zampini 3301ec626240SStefano Zampini PetscFunctionBegin; 33029566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 33039566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 33049566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 3305ec626240SStefano Zampini PetscFunctionReturn(0); 3306ec626240SStefano Zampini } 3307ec626240SStefano Zampini 3308bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3309480dffcfSJed Brown /*@ 33100bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 33110bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 331211a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 331311a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 33140bad9183SKris Buschelman 3315ff585edeSJed Brown Collective 3316ff585edeSJed Brown 3317ff585edeSJed Brown Input Parameters: 331811a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 3319ff585edeSJed Brown - dof - the block size (number of components per node) 3320ff585edeSJed Brown 3321ff585edeSJed Brown Output Parameter: 332211a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 3323ff585edeSJed Brown 33240bad9183SKris Buschelman Operations provided: 332567b8a455SSatish Balay .vb 332611a5261eSBarry Smith MatMult() 332711a5261eSBarry Smith MatMultTranspose() 332811a5261eSBarry Smith MatMultAdd() 332911a5261eSBarry Smith MatMultTransposeAdd() 333011a5261eSBarry Smith MatView() 333167b8a455SSatish Balay .ve 33320bad9183SKris Buschelman 33330bad9183SKris Buschelman Level: advanced 33340bad9183SKris Buschelman 333511a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3336ff585edeSJed Brown @*/ 3337*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 3338*d71ae5a4SJacob Faibussowitsch { 3339b24ad042SBarry Smith PetscInt n; 334082b1193eSBarry Smith Mat B; 334190f67b22SBarry Smith PetscBool flg; 3342fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3343fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3344fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3345fdc842d1SBarry Smith #endif 334682b1193eSBarry Smith 334782b1193eSBarry Smith PetscFunctionBegin; 3348fdc842d1SBarry Smith dof = PetscAbs(dof); 33499566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 3350d72c5c08SBarry Smith 335126fbe8dcSKarl Rupp if (dof == 1) *maij = A; 335226fbe8dcSKarl Rupp else { 33539566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3354c248e2fdSStefano Zampini /* propagate vec type */ 33559566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 33569566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 33579566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 33589566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 33599566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 33609566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 336126fbe8dcSKarl Rupp 3362cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3363d72c5c08SBarry Smith 336490f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 336590f67b22SBarry Smith if (flg) { 3366feb9fe23SJed Brown Mat_SeqMAIJ *b; 3367feb9fe23SJed Brown 33689566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 336926fbe8dcSKarl Rupp 33700298fd71SBarry Smith B->ops->setup = NULL; 33714cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 33720fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 33734222ddf1SHong Zhang 3374feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 3375b9b97703SBarry Smith b->dof = dof; 33764cb9d9b8SBarry Smith b->AIJ = A; 337726fbe8dcSKarl Rupp 3378d72c5c08SBarry Smith if (dof == 2) { 3379cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3380cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3381cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3382cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3383bcc973b7SBarry Smith } else if (dof == 3) { 3384bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3385bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3386bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3387bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3388bcc973b7SBarry Smith } else if (dof == 4) { 3389bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3390bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3391bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3392bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3393f9fae5adSBarry Smith } else if (dof == 5) { 3394f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3395f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3396f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3397f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 33986cd98798SBarry Smith } else if (dof == 6) { 33996cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 34006cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 34016cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 34026cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3403ed8eea03SBarry Smith } else if (dof == 7) { 3404ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3405ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3406ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3407ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 340866ed3db0SBarry Smith } else if (dof == 8) { 340966ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 341066ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 341166ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 341266ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 34132b692628SMatthew Knepley } else if (dof == 9) { 34142b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 34152b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 34162b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 34172b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3418b9cda22cSBarry Smith } else if (dof == 10) { 3419b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3420b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3421b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3422b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3423dbdd7285SBarry Smith } else if (dof == 11) { 3424dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3425dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3426dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3427dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 34282f7816d4SBarry Smith } else if (dof == 16) { 34292f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 34302f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 34312f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 34322f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3433ed1c418cSBarry Smith } else if (dof == 18) { 3434ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3435ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3436ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3437ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 343882b1193eSBarry Smith } else { 34396861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 34406861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 34416861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 34426861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 344382b1193eSBarry Smith } 3444fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 34459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 3446fdc842d1SBarry Smith #endif 34479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 34489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3449f4a53059SBarry Smith } else { 3450f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3451feb9fe23SJed Brown Mat_MPIMAIJ *b; 3452f4a53059SBarry Smith IS from, to; 3453f4a53059SBarry Smith Vec gvec; 3454f4a53059SBarry Smith 34559566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 345626fbe8dcSKarl Rupp 34570298fd71SBarry Smith B->ops->setup = NULL; 3458d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 34590fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 346026fbe8dcSKarl Rupp 3461b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 3462b9b97703SBarry Smith b->dof = dof; 3463b9b97703SBarry Smith b->A = A; 346426fbe8dcSKarl Rupp 34659566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 34669566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 34674cb9d9b8SBarry Smith 34689566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 34699566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 34709566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 34719566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 34729566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 3473f4a53059SBarry Smith 3474f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 34759566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 34769566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 3477f4a53059SBarry Smith 3478f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 34799566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 3480f4a53059SBarry Smith 3481f4a53059SBarry Smith /* generate the scatter context */ 34829566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 3483f4a53059SBarry Smith 34849566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 34859566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 34869566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 3487f4a53059SBarry Smith 3488f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 34894cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 34904cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 34914cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 349226fbe8dcSKarl Rupp 3493fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 34949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 3495fdc842d1SBarry Smith #endif 34969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 34979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3498f4a53059SBarry Smith } 34997dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3500ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 35019566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3502fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3503fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 3504fdc842d1SBarry Smith { 3505fdc842d1SBarry Smith PetscBool flg; 3506fdc842d1SBarry Smith if (convert) { 35079566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 350848a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 3509fdc842d1SBarry Smith } 3510fdc842d1SBarry Smith } 3511fdc842d1SBarry Smith #endif 3512cd3bbe55SBarry Smith *maij = B; 3513d72c5c08SBarry Smith } 351482b1193eSBarry Smith PetscFunctionReturn(0); 351582b1193eSBarry Smith } 3516