1be1d678aSKris Buschelman 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 2182b1193eSBarry Smith 22b350b9acSSatish Balay /*@ 23ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24ff585edeSJed Brown 25ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26ff585edeSJed Brown 27ff585edeSJed Brown Input Parameter: 28ff585edeSJed Brown . A - the MAIJ matrix 29ff585edeSJed Brown 30ff585edeSJed Brown Output Parameter: 31ff585edeSJed Brown . B - the AIJ matrix 32ff585edeSJed Brown 33ff585edeSJed Brown Level: advanced 34ff585edeSJed Brown 3595452b02SPatrick Sanan Notes: 3695452b02SPatrick Sanan The reference count on the AIJ matrix is not increased so you should not destroy it. 37ff585edeSJed Brown 38db781477SPatrick Sanan .seealso: `MatCreateMAIJ()` 39ff585edeSJed Brown @*/ 409371c9d4SSatish Balay PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) { 41ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 42b9b97703SBarry Smith 43b9b97703SBarry Smith PetscFunctionBegin; 449566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 459566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 46b9b97703SBarry Smith if (ismpimaij) { 47b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 48b9b97703SBarry Smith 49b9b97703SBarry Smith *B = b->A; 50b9b97703SBarry Smith } else if (isseqmaij) { 51b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 52b9b97703SBarry Smith 53b9b97703SBarry Smith *B = b->AIJ; 54b9b97703SBarry Smith } else { 55b9b97703SBarry Smith *B = A; 56b9b97703SBarry Smith } 57b9b97703SBarry Smith PetscFunctionReturn(0); 58b9b97703SBarry Smith } 59b9b97703SBarry Smith 60480dffcfSJed Brown /*@ 61ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 62ff585edeSJed Brown 633f9fe445SBarry Smith Logically Collective 64ff585edeSJed Brown 65d8d19677SJose E. Roman Input Parameters: 66ff585edeSJed Brown + A - the MAIJ matrix 67ff585edeSJed Brown - dof - the block size for the new matrix 68ff585edeSJed Brown 69ff585edeSJed Brown Output Parameter: 70ff585edeSJed Brown . B - the new MAIJ matrix 71ff585edeSJed Brown 72ff585edeSJed Brown Level: advanced 73ff585edeSJed Brown 74db781477SPatrick Sanan .seealso: `MatCreateMAIJ()` 75ff585edeSJed Brown @*/ 769371c9d4SSatish Balay PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) { 770298fd71SBarry Smith Mat Aij = NULL; 78b9b97703SBarry Smith 79b9b97703SBarry Smith PetscFunctionBegin; 80c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 819566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 829566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 83b9b97703SBarry Smith PetscFunctionReturn(0); 84b9b97703SBarry Smith } 85b9b97703SBarry Smith 869371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqMAIJ(Mat A) { 874cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8882b1193eSBarry Smith 8982b1193eSBarry Smith PetscFunctionBegin; 909566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 919566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 922e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 954cb9d9b8SBarry Smith PetscFunctionReturn(0); 964cb9d9b8SBarry Smith } 974cb9d9b8SBarry Smith 989371c9d4SSatish Balay PetscErrorCode MatSetUp_MAIJ(Mat A) { 99356c569eSBarry Smith PetscFunctionBegin; 100ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 101356c569eSBarry Smith } 102356c569eSBarry Smith 1039371c9d4SSatish Balay PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) { 1040fd73130SBarry Smith Mat B; 1050fd73130SBarry Smith 1060fd73130SBarry Smith PetscFunctionBegin; 1079566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1089566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1099566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1100fd73130SBarry Smith PetscFunctionReturn(0); 1110fd73130SBarry Smith } 1120fd73130SBarry Smith 1139371c9d4SSatish Balay PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) { 1140fd73130SBarry Smith Mat B; 1150fd73130SBarry Smith 1160fd73130SBarry Smith PetscFunctionBegin; 1179566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1189566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1200fd73130SBarry Smith PetscFunctionReturn(0); 1210fd73130SBarry Smith } 1220fd73130SBarry Smith 1239371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIMAIJ(Mat A) { 1244cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1254cb9d9b8SBarry Smith 1264cb9d9b8SBarry Smith PetscFunctionBegin; 1279566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1289566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1309566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1319566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1329566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1332e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1369566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 137b9b97703SBarry Smith PetscFunctionReturn(0); 138b9b97703SBarry Smith } 139b9b97703SBarry Smith 1400bad9183SKris Buschelman /*MC 141fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1420bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1430bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1440bad9183SKris Buschelman 1450bad9183SKris Buschelman Operations provided: 14667b8a455SSatish Balay .vb 14767b8a455SSatish Balay MatMult 14867b8a455SSatish Balay MatMultTranspose 14967b8a455SSatish Balay MatMultAdd 15067b8a455SSatish Balay MatMultTransposeAdd 15167b8a455SSatish Balay .ve 1520bad9183SKris Buschelman 1530bad9183SKris Buschelman Level: advanced 1540bad9183SKris Buschelman 155db781477SPatrick Sanan .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1560bad9183SKris Buschelman M*/ 1570bad9183SKris Buschelman 1589371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) { 1594cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 16064b52464SHong Zhang PetscMPIInt size; 16182b1193eSBarry Smith 16282b1193eSBarry Smith PetscFunctionBegin; 1639566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A, &b)); 164b0a32e0cSBarry Smith A->data = (void *)b; 16526fbe8dcSKarl Rupp 1669566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 16726fbe8dcSKarl Rupp 168356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 169f4a53059SBarry Smith 170f4259b30SLisandro Dalcin b->AIJ = NULL; 171cd3bbe55SBarry Smith b->dof = 0; 172f4259b30SLisandro Dalcin b->OAIJ = NULL; 173f4259b30SLisandro Dalcin b->ctx = NULL; 174f4259b30SLisandro Dalcin b->w = NULL; 1759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 17664b52464SHong Zhang if (size == 1) { 1779566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 17864b52464SHong Zhang } else { 1799566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 18064b52464SHong Zhang } 18132e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 18232e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 18382b1193eSBarry Smith PetscFunctionReturn(0); 18482b1193eSBarry Smith } 18582b1193eSBarry Smith 186cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1879371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 1884cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 189bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 190d2840e09SBarry Smith const PetscScalar *x, *v; 191d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 192d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 193d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 19482b1193eSBarry Smith 195bcc973b7SBarry Smith PetscFunctionBegin; 1969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1979566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 198bcc973b7SBarry Smith idx = a->j; 199bcc973b7SBarry Smith v = a->a; 200bcc973b7SBarry Smith ii = a->i; 201bcc973b7SBarry Smith 202bcc973b7SBarry Smith for (i = 0; i < m; i++) { 203bcc973b7SBarry Smith jrow = ii[i]; 204bcc973b7SBarry Smith n = ii[i + 1] - jrow; 205bcc973b7SBarry Smith sum1 = 0.0; 206bcc973b7SBarry Smith sum2 = 0.0; 20726fbe8dcSKarl Rupp 208b3c51c66SMatthew Knepley nonzerorow += (n > 0); 209bcc973b7SBarry Smith for (j = 0; j < n; j++) { 210bcc973b7SBarry Smith sum1 += v[jrow] * x[2 * idx[jrow]]; 211bcc973b7SBarry Smith sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 212bcc973b7SBarry Smith jrow++; 213bcc973b7SBarry Smith } 214bcc973b7SBarry Smith y[2 * i] = sum1; 215bcc973b7SBarry Smith y[2 * i + 1] = sum2; 216bcc973b7SBarry Smith } 217bcc973b7SBarry Smith 2189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow)); 2199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 22182b1193eSBarry Smith PetscFunctionReturn(0); 22282b1193eSBarry Smith } 223bcc973b7SBarry Smith 2249371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 2254cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 226bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 227d2840e09SBarry Smith const PetscScalar *x, *v; 228d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2; 229d2840e09SBarry Smith PetscInt n, i; 230d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 23182b1193eSBarry Smith 232bcc973b7SBarry Smith PetscFunctionBegin; 2339566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 2349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2359566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 236bfec09a0SHong Zhang 237bcc973b7SBarry Smith for (i = 0; i < m; i++) { 238bfec09a0SHong Zhang idx = a->j + a->i[i]; 239bfec09a0SHong Zhang v = a->a + a->i[i]; 240bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 241bcc973b7SBarry Smith alpha1 = x[2 * i]; 242bcc973b7SBarry Smith alpha2 = x[2 * i + 1]; 24326fbe8dcSKarl Rupp while (n-- > 0) { 24426fbe8dcSKarl Rupp y[2 * (*idx)] += alpha1 * (*v); 24526fbe8dcSKarl Rupp y[2 * (*idx) + 1] += alpha2 * (*v); 2469371c9d4SSatish Balay idx++; 2479371c9d4SSatish Balay v++; 24826fbe8dcSKarl Rupp } 249bcc973b7SBarry Smith } 2509566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 2519566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2529566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 25382b1193eSBarry Smith PetscFunctionReturn(0); 25482b1193eSBarry Smith } 255bcc973b7SBarry Smith 2569371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 2574cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 258bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 259d2840e09SBarry Smith const PetscScalar *x, *v; 260d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 261b24ad042SBarry Smith PetscInt n, i, jrow, j; 262d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 26382b1193eSBarry Smith 264bcc973b7SBarry Smith PetscFunctionBegin; 2659566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 2669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2679566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 268bcc973b7SBarry Smith idx = a->j; 269bcc973b7SBarry Smith v = a->a; 270bcc973b7SBarry Smith ii = a->i; 271bcc973b7SBarry Smith 272bcc973b7SBarry Smith for (i = 0; i < m; i++) { 273bcc973b7SBarry Smith jrow = ii[i]; 274bcc973b7SBarry Smith n = ii[i + 1] - jrow; 275bcc973b7SBarry Smith sum1 = 0.0; 276bcc973b7SBarry Smith sum2 = 0.0; 277bcc973b7SBarry Smith for (j = 0; j < n; j++) { 278bcc973b7SBarry Smith sum1 += v[jrow] * x[2 * idx[jrow]]; 279bcc973b7SBarry Smith sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 280bcc973b7SBarry Smith jrow++; 281bcc973b7SBarry Smith } 282bcc973b7SBarry Smith y[2 * i] += sum1; 283bcc973b7SBarry Smith y[2 * i + 1] += sum2; 284bcc973b7SBarry Smith } 285bcc973b7SBarry Smith 2869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 2879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 289bcc973b7SBarry Smith PetscFunctionReturn(0); 29082b1193eSBarry Smith } 2919371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 2924cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 293bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 294d2840e09SBarry Smith const PetscScalar *x, *v; 295d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2; 296d2840e09SBarry Smith PetscInt n, i; 297d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 29882b1193eSBarry Smith 299bcc973b7SBarry Smith PetscFunctionBegin; 3009566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 3019566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3029566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 303bfec09a0SHong Zhang 304bcc973b7SBarry Smith for (i = 0; i < m; i++) { 305bfec09a0SHong Zhang idx = a->j + a->i[i]; 306bfec09a0SHong Zhang v = a->a + a->i[i]; 307bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 308bcc973b7SBarry Smith alpha1 = x[2 * i]; 309bcc973b7SBarry Smith alpha2 = x[2 * i + 1]; 31026fbe8dcSKarl Rupp while (n-- > 0) { 31126fbe8dcSKarl Rupp y[2 * (*idx)] += alpha1 * (*v); 31226fbe8dcSKarl Rupp y[2 * (*idx) + 1] += alpha2 * (*v); 3139371c9d4SSatish Balay idx++; 3149371c9d4SSatish Balay v++; 31526fbe8dcSKarl Rupp } 316bcc973b7SBarry Smith } 3179566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0 * a->nz)); 3189566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3199566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 320bcc973b7SBarry Smith PetscFunctionReturn(0); 32182b1193eSBarry Smith } 322cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3239371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 3244cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 325bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 326d2840e09SBarry Smith const PetscScalar *x, *v; 327d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3; 328d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 329d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 33082b1193eSBarry Smith 331bcc973b7SBarry Smith PetscFunctionBegin; 3329566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3339566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 334bcc973b7SBarry Smith idx = a->j; 335bcc973b7SBarry Smith v = a->a; 336bcc973b7SBarry Smith ii = a->i; 337bcc973b7SBarry Smith 338bcc973b7SBarry Smith for (i = 0; i < m; i++) { 339bcc973b7SBarry Smith jrow = ii[i]; 340bcc973b7SBarry Smith n = ii[i + 1] - jrow; 341bcc973b7SBarry Smith sum1 = 0.0; 342bcc973b7SBarry Smith sum2 = 0.0; 343bcc973b7SBarry Smith sum3 = 0.0; 34426fbe8dcSKarl Rupp 345b3c51c66SMatthew Knepley nonzerorow += (n > 0); 346bcc973b7SBarry Smith for (j = 0; j < n; j++) { 347bcc973b7SBarry Smith sum1 += v[jrow] * x[3 * idx[jrow]]; 348bcc973b7SBarry Smith sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 349bcc973b7SBarry Smith sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 350bcc973b7SBarry Smith jrow++; 351bcc973b7SBarry Smith } 352bcc973b7SBarry Smith y[3 * i] = sum1; 353bcc973b7SBarry Smith y[3 * i + 1] = sum2; 354bcc973b7SBarry Smith y[3 * i + 2] = sum3; 355bcc973b7SBarry Smith } 356bcc973b7SBarry Smith 3579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow)); 3589566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 360bcc973b7SBarry Smith PetscFunctionReturn(0); 361bcc973b7SBarry Smith } 362bcc973b7SBarry Smith 3639371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 3644cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 365bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 366d2840e09SBarry Smith const PetscScalar *x, *v; 367d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3; 368d2840e09SBarry Smith PetscInt n, i; 369d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 370bcc973b7SBarry Smith 371bcc973b7SBarry Smith PetscFunctionBegin; 3729566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 3739566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3749566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 375bfec09a0SHong Zhang 376bcc973b7SBarry Smith for (i = 0; i < m; i++) { 377bfec09a0SHong Zhang idx = a->j + a->i[i]; 378bfec09a0SHong Zhang v = a->a + a->i[i]; 379bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 380bcc973b7SBarry Smith alpha1 = x[3 * i]; 381bcc973b7SBarry Smith alpha2 = x[3 * i + 1]; 382bcc973b7SBarry Smith alpha3 = x[3 * i + 2]; 383bcc973b7SBarry Smith while (n-- > 0) { 384bcc973b7SBarry Smith y[3 * (*idx)] += alpha1 * (*v); 385bcc973b7SBarry Smith y[3 * (*idx) + 1] += alpha2 * (*v); 386bcc973b7SBarry Smith y[3 * (*idx) + 2] += alpha3 * (*v); 3879371c9d4SSatish Balay idx++; 3889371c9d4SSatish Balay v++; 389bcc973b7SBarry Smith } 390bcc973b7SBarry Smith } 3919566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 3929566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3939566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 394bcc973b7SBarry Smith PetscFunctionReturn(0); 395bcc973b7SBarry Smith } 396bcc973b7SBarry Smith 3979371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 3984cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 399bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 400d2840e09SBarry Smith const PetscScalar *x, *v; 401d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3; 402b24ad042SBarry Smith PetscInt n, i, jrow, j; 403d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 404bcc973b7SBarry Smith 405bcc973b7SBarry Smith PetscFunctionBegin; 4069566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4079566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4089566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 409bcc973b7SBarry Smith idx = a->j; 410bcc973b7SBarry Smith v = a->a; 411bcc973b7SBarry Smith ii = a->i; 412bcc973b7SBarry Smith 413bcc973b7SBarry Smith for (i = 0; i < m; i++) { 414bcc973b7SBarry Smith jrow = ii[i]; 415bcc973b7SBarry Smith n = ii[i + 1] - jrow; 416bcc973b7SBarry Smith sum1 = 0.0; 417bcc973b7SBarry Smith sum2 = 0.0; 418bcc973b7SBarry Smith sum3 = 0.0; 419bcc973b7SBarry Smith for (j = 0; j < n; j++) { 420bcc973b7SBarry Smith sum1 += v[jrow] * x[3 * idx[jrow]]; 421bcc973b7SBarry Smith sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 422bcc973b7SBarry Smith sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 423bcc973b7SBarry Smith jrow++; 424bcc973b7SBarry Smith } 425bcc973b7SBarry Smith y[3 * i] += sum1; 426bcc973b7SBarry Smith y[3 * i + 1] += sum2; 427bcc973b7SBarry Smith y[3 * i + 2] += sum3; 428bcc973b7SBarry Smith } 429bcc973b7SBarry Smith 4309566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 4319566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4329566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 433bcc973b7SBarry Smith PetscFunctionReturn(0); 434bcc973b7SBarry Smith } 4359371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 4364cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 437bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 438d2840e09SBarry Smith const PetscScalar *x, *v; 439d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3; 440d2840e09SBarry Smith PetscInt n, i; 441d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 442bcc973b7SBarry Smith 443bcc973b7SBarry Smith PetscFunctionBegin; 4449566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4459566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4469566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 447bcc973b7SBarry Smith for (i = 0; i < m; i++) { 448bfec09a0SHong Zhang idx = a->j + a->i[i]; 449bfec09a0SHong Zhang v = a->a + a->i[i]; 450bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 451bcc973b7SBarry Smith alpha1 = x[3 * i]; 452bcc973b7SBarry Smith alpha2 = x[3 * i + 1]; 453bcc973b7SBarry Smith alpha3 = x[3 * i + 2]; 454bcc973b7SBarry Smith while (n-- > 0) { 455bcc973b7SBarry Smith y[3 * (*idx)] += alpha1 * (*v); 456bcc973b7SBarry Smith y[3 * (*idx) + 1] += alpha2 * (*v); 457bcc973b7SBarry Smith y[3 * (*idx) + 2] += alpha3 * (*v); 4589371c9d4SSatish Balay idx++; 4599371c9d4SSatish Balay v++; 460bcc973b7SBarry Smith } 461bcc973b7SBarry Smith } 4629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0 * a->nz)); 4639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 465bcc973b7SBarry Smith PetscFunctionReturn(0); 466bcc973b7SBarry Smith } 467bcc973b7SBarry Smith 468bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4699371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 4704cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 471bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 472d2840e09SBarry Smith const PetscScalar *x, *v; 473d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4; 474d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 475d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 476bcc973b7SBarry Smith 477bcc973b7SBarry Smith PetscFunctionBegin; 4789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4799566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 480bcc973b7SBarry Smith idx = a->j; 481bcc973b7SBarry Smith v = a->a; 482bcc973b7SBarry Smith ii = a->i; 483bcc973b7SBarry Smith 484bcc973b7SBarry Smith for (i = 0; i < m; i++) { 485bcc973b7SBarry Smith jrow = ii[i]; 486bcc973b7SBarry Smith n = ii[i + 1] - jrow; 487bcc973b7SBarry Smith sum1 = 0.0; 488bcc973b7SBarry Smith sum2 = 0.0; 489bcc973b7SBarry Smith sum3 = 0.0; 490bcc973b7SBarry Smith sum4 = 0.0; 491b3c51c66SMatthew Knepley nonzerorow += (n > 0); 492bcc973b7SBarry Smith for (j = 0; j < n; j++) { 493bcc973b7SBarry Smith sum1 += v[jrow] * x[4 * idx[jrow]]; 494bcc973b7SBarry Smith sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 495bcc973b7SBarry Smith sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 496bcc973b7SBarry Smith sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 497bcc973b7SBarry Smith jrow++; 498bcc973b7SBarry Smith } 499bcc973b7SBarry Smith y[4 * i] = sum1; 500bcc973b7SBarry Smith y[4 * i + 1] = sum2; 501bcc973b7SBarry Smith y[4 * i + 2] = sum3; 502bcc973b7SBarry Smith y[4 * i + 3] = sum4; 503bcc973b7SBarry Smith } 504bcc973b7SBarry Smith 5059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow)); 5069566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5079566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 508bcc973b7SBarry Smith PetscFunctionReturn(0); 509bcc973b7SBarry Smith } 510bcc973b7SBarry Smith 5119371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 5124cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 513bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 514d2840e09SBarry Smith const PetscScalar *x, *v; 515d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 516d2840e09SBarry Smith PetscInt n, i; 517d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 518bcc973b7SBarry Smith 519bcc973b7SBarry Smith PetscFunctionBegin; 5209566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 5219566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5229566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 523bcc973b7SBarry Smith for (i = 0; i < m; i++) { 524bfec09a0SHong Zhang idx = a->j + a->i[i]; 525bfec09a0SHong Zhang v = a->a + a->i[i]; 526bcc973b7SBarry Smith n = a->i[i + 1] - a->i[i]; 527bcc973b7SBarry Smith alpha1 = x[4 * i]; 528bcc973b7SBarry Smith alpha2 = x[4 * i + 1]; 529bcc973b7SBarry Smith alpha3 = x[4 * i + 2]; 530bcc973b7SBarry Smith alpha4 = x[4 * i + 3]; 531bcc973b7SBarry Smith while (n-- > 0) { 532bcc973b7SBarry Smith y[4 * (*idx)] += alpha1 * (*v); 533bcc973b7SBarry Smith y[4 * (*idx) + 1] += alpha2 * (*v); 534bcc973b7SBarry Smith y[4 * (*idx) + 2] += alpha3 * (*v); 535bcc973b7SBarry Smith y[4 * (*idx) + 3] += alpha4 * (*v); 5369371c9d4SSatish Balay idx++; 5379371c9d4SSatish Balay v++; 538bcc973b7SBarry Smith } 539bcc973b7SBarry Smith } 5409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 5419566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 543bcc973b7SBarry Smith PetscFunctionReturn(0); 544bcc973b7SBarry Smith } 545bcc973b7SBarry Smith 5469371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 5474cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 548f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 549d2840e09SBarry Smith const PetscScalar *x, *v; 550d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4; 551b24ad042SBarry Smith PetscInt n, i, jrow, j; 552d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 553f9fae5adSBarry Smith 554f9fae5adSBarry Smith PetscFunctionBegin; 5559566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 5569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5579566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 558f9fae5adSBarry Smith idx = a->j; 559f9fae5adSBarry Smith v = a->a; 560f9fae5adSBarry Smith ii = a->i; 561f9fae5adSBarry Smith 562f9fae5adSBarry Smith for (i = 0; i < m; i++) { 563f9fae5adSBarry Smith jrow = ii[i]; 564f9fae5adSBarry Smith n = ii[i + 1] - jrow; 565f9fae5adSBarry Smith sum1 = 0.0; 566f9fae5adSBarry Smith sum2 = 0.0; 567f9fae5adSBarry Smith sum3 = 0.0; 568f9fae5adSBarry Smith sum4 = 0.0; 569f9fae5adSBarry Smith for (j = 0; j < n; j++) { 570f9fae5adSBarry Smith sum1 += v[jrow] * x[4 * idx[jrow]]; 571f9fae5adSBarry Smith sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 572f9fae5adSBarry Smith sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 573f9fae5adSBarry Smith sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 574f9fae5adSBarry Smith jrow++; 575f9fae5adSBarry Smith } 576f9fae5adSBarry Smith y[4 * i] += sum1; 577f9fae5adSBarry Smith y[4 * i + 1] += sum2; 578f9fae5adSBarry Smith y[4 * i + 2] += sum3; 579f9fae5adSBarry Smith y[4 * i + 3] += sum4; 580f9fae5adSBarry Smith } 581f9fae5adSBarry Smith 5829566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 5839566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5849566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 585f9fae5adSBarry Smith PetscFunctionReturn(0); 586bcc973b7SBarry Smith } 5879371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 5884cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 589f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 590d2840e09SBarry Smith const PetscScalar *x, *v; 591d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 592d2840e09SBarry Smith PetscInt n, i; 593d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 594f9fae5adSBarry Smith 595f9fae5adSBarry Smith PetscFunctionBegin; 5969566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 5979566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5989566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 599bfec09a0SHong Zhang 600f9fae5adSBarry Smith for (i = 0; i < m; i++) { 601bfec09a0SHong Zhang idx = a->j + a->i[i]; 602bfec09a0SHong Zhang v = a->a + a->i[i]; 603f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 604f9fae5adSBarry Smith alpha1 = x[4 * i]; 605f9fae5adSBarry Smith alpha2 = x[4 * i + 1]; 606f9fae5adSBarry Smith alpha3 = x[4 * i + 2]; 607f9fae5adSBarry Smith alpha4 = x[4 * i + 3]; 608f9fae5adSBarry Smith while (n-- > 0) { 609f9fae5adSBarry Smith y[4 * (*idx)] += alpha1 * (*v); 610f9fae5adSBarry Smith y[4 * (*idx) + 1] += alpha2 * (*v); 611f9fae5adSBarry Smith y[4 * (*idx) + 2] += alpha3 * (*v); 612f9fae5adSBarry Smith y[4 * (*idx) + 3] += alpha4 * (*v); 6139371c9d4SSatish Balay idx++; 6149371c9d4SSatish Balay v++; 615f9fae5adSBarry Smith } 616f9fae5adSBarry Smith } 6179566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0 * a->nz)); 6189566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6199566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 620f9fae5adSBarry Smith PetscFunctionReturn(0); 621f9fae5adSBarry Smith } 622f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6236cd98798SBarry Smith 6249371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 6254cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 626f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 627d2840e09SBarry Smith const PetscScalar *x, *v; 628d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 629d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 630d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 631f9fae5adSBarry Smith 632f9fae5adSBarry Smith PetscFunctionBegin; 6339566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6349566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 635f9fae5adSBarry Smith idx = a->j; 636f9fae5adSBarry Smith v = a->a; 637f9fae5adSBarry Smith ii = a->i; 638f9fae5adSBarry Smith 639f9fae5adSBarry Smith for (i = 0; i < m; i++) { 640f9fae5adSBarry Smith jrow = ii[i]; 641f9fae5adSBarry Smith n = ii[i + 1] - jrow; 642f9fae5adSBarry Smith sum1 = 0.0; 643f9fae5adSBarry Smith sum2 = 0.0; 644f9fae5adSBarry Smith sum3 = 0.0; 645f9fae5adSBarry Smith sum4 = 0.0; 646f9fae5adSBarry Smith sum5 = 0.0; 64726fbe8dcSKarl Rupp 648b3c51c66SMatthew Knepley nonzerorow += (n > 0); 649f9fae5adSBarry Smith for (j = 0; j < n; j++) { 650f9fae5adSBarry Smith sum1 += v[jrow] * x[5 * idx[jrow]]; 651f9fae5adSBarry Smith sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 652f9fae5adSBarry Smith sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 653f9fae5adSBarry Smith sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 654f9fae5adSBarry Smith sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 655f9fae5adSBarry Smith jrow++; 656f9fae5adSBarry Smith } 657f9fae5adSBarry Smith y[5 * i] = sum1; 658f9fae5adSBarry Smith y[5 * i + 1] = sum2; 659f9fae5adSBarry Smith y[5 * i + 2] = sum3; 660f9fae5adSBarry Smith y[5 * i + 3] = sum4; 661f9fae5adSBarry Smith y[5 * i + 4] = sum5; 662f9fae5adSBarry Smith } 663f9fae5adSBarry Smith 6649566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow)); 6659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6669566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 667f9fae5adSBarry Smith PetscFunctionReturn(0); 668f9fae5adSBarry Smith } 669f9fae5adSBarry Smith 6709371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 6714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 672f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 673d2840e09SBarry Smith const PetscScalar *x, *v; 674d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 675d2840e09SBarry Smith PetscInt n, i; 676d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 677f9fae5adSBarry Smith 678f9fae5adSBarry Smith PetscFunctionBegin; 6799566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 6809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6819566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 682bfec09a0SHong Zhang 683f9fae5adSBarry Smith for (i = 0; i < m; i++) { 684bfec09a0SHong Zhang idx = a->j + a->i[i]; 685bfec09a0SHong Zhang v = a->a + a->i[i]; 686f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 687f9fae5adSBarry Smith alpha1 = x[5 * i]; 688f9fae5adSBarry Smith alpha2 = x[5 * i + 1]; 689f9fae5adSBarry Smith alpha3 = x[5 * i + 2]; 690f9fae5adSBarry Smith alpha4 = x[5 * i + 3]; 691f9fae5adSBarry Smith alpha5 = x[5 * i + 4]; 692f9fae5adSBarry Smith while (n-- > 0) { 693f9fae5adSBarry Smith y[5 * (*idx)] += alpha1 * (*v); 694f9fae5adSBarry Smith y[5 * (*idx) + 1] += alpha2 * (*v); 695f9fae5adSBarry Smith y[5 * (*idx) + 2] += alpha3 * (*v); 696f9fae5adSBarry Smith y[5 * (*idx) + 3] += alpha4 * (*v); 697f9fae5adSBarry Smith y[5 * (*idx) + 4] += alpha5 * (*v); 6989371c9d4SSatish Balay idx++; 6999371c9d4SSatish Balay v++; 700f9fae5adSBarry Smith } 701f9fae5adSBarry Smith } 7029566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7039566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7049566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 705f9fae5adSBarry Smith PetscFunctionReturn(0); 706f9fae5adSBarry Smith } 707f9fae5adSBarry Smith 7089371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 7094cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 710f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 711d2840e09SBarry Smith const PetscScalar *x, *v; 712d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 713b24ad042SBarry Smith PetscInt n, i, jrow, j; 714d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 715f9fae5adSBarry Smith 716f9fae5adSBarry Smith PetscFunctionBegin; 7179566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7199566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 720f9fae5adSBarry Smith idx = a->j; 721f9fae5adSBarry Smith v = a->a; 722f9fae5adSBarry Smith ii = a->i; 723f9fae5adSBarry Smith 724f9fae5adSBarry Smith for (i = 0; i < m; i++) { 725f9fae5adSBarry Smith jrow = ii[i]; 726f9fae5adSBarry Smith n = ii[i + 1] - jrow; 727f9fae5adSBarry Smith sum1 = 0.0; 728f9fae5adSBarry Smith sum2 = 0.0; 729f9fae5adSBarry Smith sum3 = 0.0; 730f9fae5adSBarry Smith sum4 = 0.0; 731f9fae5adSBarry Smith sum5 = 0.0; 732f9fae5adSBarry Smith for (j = 0; j < n; j++) { 733f9fae5adSBarry Smith sum1 += v[jrow] * x[5 * idx[jrow]]; 734f9fae5adSBarry Smith sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 735f9fae5adSBarry Smith sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 736f9fae5adSBarry Smith sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 737f9fae5adSBarry Smith sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 738f9fae5adSBarry Smith jrow++; 739f9fae5adSBarry Smith } 740f9fae5adSBarry Smith y[5 * i] += sum1; 741f9fae5adSBarry Smith y[5 * i + 1] += sum2; 742f9fae5adSBarry Smith y[5 * i + 2] += sum3; 743f9fae5adSBarry Smith y[5 * i + 3] += sum4; 744f9fae5adSBarry Smith y[5 * i + 4] += sum5; 745f9fae5adSBarry Smith } 746f9fae5adSBarry Smith 7479566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 750f9fae5adSBarry Smith PetscFunctionReturn(0); 751f9fae5adSBarry Smith } 752f9fae5adSBarry Smith 7539371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 7544cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 755f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 756d2840e09SBarry Smith const PetscScalar *x, *v; 757d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 758d2840e09SBarry Smith PetscInt n, i; 759d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 760f9fae5adSBarry Smith 761f9fae5adSBarry Smith PetscFunctionBegin; 7629566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7649566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 765bfec09a0SHong Zhang 766f9fae5adSBarry Smith for (i = 0; i < m; i++) { 767bfec09a0SHong Zhang idx = a->j + a->i[i]; 768bfec09a0SHong Zhang v = a->a + a->i[i]; 769f9fae5adSBarry Smith n = a->i[i + 1] - a->i[i]; 770f9fae5adSBarry Smith alpha1 = x[5 * i]; 771f9fae5adSBarry Smith alpha2 = x[5 * i + 1]; 772f9fae5adSBarry Smith alpha3 = x[5 * i + 2]; 773f9fae5adSBarry Smith alpha4 = x[5 * i + 3]; 774f9fae5adSBarry Smith alpha5 = x[5 * i + 4]; 775f9fae5adSBarry Smith while (n-- > 0) { 776f9fae5adSBarry Smith y[5 * (*idx)] += alpha1 * (*v); 777f9fae5adSBarry Smith y[5 * (*idx) + 1] += alpha2 * (*v); 778f9fae5adSBarry Smith y[5 * (*idx) + 2] += alpha3 * (*v); 779f9fae5adSBarry Smith y[5 * (*idx) + 3] += alpha4 * (*v); 780f9fae5adSBarry Smith y[5 * (*idx) + 4] += alpha5 * (*v); 7819371c9d4SSatish Balay idx++; 7829371c9d4SSatish Balay v++; 783f9fae5adSBarry Smith } 784f9fae5adSBarry Smith } 7859566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0 * a->nz)); 7869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 788f9fae5adSBarry Smith PetscFunctionReturn(0); 789bcc973b7SBarry Smith } 790bcc973b7SBarry Smith 7916cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7929371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 7936cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7946cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 795d2840e09SBarry Smith const PetscScalar *x, *v; 796d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 797d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 798d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 7996cd98798SBarry Smith 8006cd98798SBarry Smith PetscFunctionBegin; 8019566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8029566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 8036cd98798SBarry Smith idx = a->j; 8046cd98798SBarry Smith v = a->a; 8056cd98798SBarry Smith ii = a->i; 8066cd98798SBarry Smith 8076cd98798SBarry Smith for (i = 0; i < m; i++) { 8086cd98798SBarry Smith jrow = ii[i]; 8096cd98798SBarry Smith n = ii[i + 1] - jrow; 8106cd98798SBarry Smith sum1 = 0.0; 8116cd98798SBarry Smith sum2 = 0.0; 8126cd98798SBarry Smith sum3 = 0.0; 8136cd98798SBarry Smith sum4 = 0.0; 8146cd98798SBarry Smith sum5 = 0.0; 8156cd98798SBarry Smith sum6 = 0.0; 81626fbe8dcSKarl Rupp 817b3c51c66SMatthew Knepley nonzerorow += (n > 0); 8186cd98798SBarry Smith for (j = 0; j < n; j++) { 8196cd98798SBarry Smith sum1 += v[jrow] * x[6 * idx[jrow]]; 8206cd98798SBarry Smith sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 8216cd98798SBarry Smith sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 8226cd98798SBarry Smith sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 8236cd98798SBarry Smith sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 8246cd98798SBarry Smith sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 8256cd98798SBarry Smith jrow++; 8266cd98798SBarry Smith } 8276cd98798SBarry Smith y[6 * i] = sum1; 8286cd98798SBarry Smith y[6 * i + 1] = sum2; 8296cd98798SBarry Smith y[6 * i + 2] = sum3; 8306cd98798SBarry Smith y[6 * i + 3] = sum4; 8316cd98798SBarry Smith y[6 * i + 4] = sum5; 8326cd98798SBarry Smith y[6 * i + 5] = sum6; 8336cd98798SBarry Smith } 8346cd98798SBarry Smith 8359566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow)); 8369566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 8379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 8386cd98798SBarry Smith PetscFunctionReturn(0); 8396cd98798SBarry Smith } 8406cd98798SBarry Smith 8419371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 8426cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8436cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 844d2840e09SBarry Smith const PetscScalar *x, *v; 845d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 846d2840e09SBarry Smith PetscInt n, i; 847d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 8486cd98798SBarry Smith 8496cd98798SBarry Smith PetscFunctionBegin; 8509566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 8519566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8529566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 853bfec09a0SHong Zhang 8546cd98798SBarry Smith for (i = 0; i < m; i++) { 855bfec09a0SHong Zhang idx = a->j + a->i[i]; 856bfec09a0SHong Zhang v = a->a + a->i[i]; 8576cd98798SBarry Smith n = a->i[i + 1] - a->i[i]; 8586cd98798SBarry Smith alpha1 = x[6 * i]; 8596cd98798SBarry Smith alpha2 = x[6 * i + 1]; 8606cd98798SBarry Smith alpha3 = x[6 * i + 2]; 8616cd98798SBarry Smith alpha4 = x[6 * i + 3]; 8626cd98798SBarry Smith alpha5 = x[6 * i + 4]; 8636cd98798SBarry Smith alpha6 = x[6 * i + 5]; 8646cd98798SBarry Smith while (n-- > 0) { 8656cd98798SBarry Smith y[6 * (*idx)] += alpha1 * (*v); 8666cd98798SBarry Smith y[6 * (*idx) + 1] += alpha2 * (*v); 8676cd98798SBarry Smith y[6 * (*idx) + 2] += alpha3 * (*v); 8686cd98798SBarry Smith y[6 * (*idx) + 3] += alpha4 * (*v); 8696cd98798SBarry Smith y[6 * (*idx) + 4] += alpha5 * (*v); 8706cd98798SBarry Smith y[6 * (*idx) + 5] += alpha6 * (*v); 8719371c9d4SSatish Balay idx++; 8729371c9d4SSatish Balay v++; 8736cd98798SBarry Smith } 8746cd98798SBarry Smith } 8759566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 8769566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 8779566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 8786cd98798SBarry Smith PetscFunctionReturn(0); 8796cd98798SBarry Smith } 8806cd98798SBarry Smith 8819371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 8826cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 8836cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 884d2840e09SBarry Smith const PetscScalar *x, *v; 885d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 886b24ad042SBarry Smith PetscInt n, i, jrow, j; 887d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 8886cd98798SBarry Smith 8896cd98798SBarry Smith PetscFunctionBegin; 8909566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 8919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 8929566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 8936cd98798SBarry Smith idx = a->j; 8946cd98798SBarry Smith v = a->a; 8956cd98798SBarry Smith ii = a->i; 8966cd98798SBarry Smith 8976cd98798SBarry Smith for (i = 0; i < m; i++) { 8986cd98798SBarry Smith jrow = ii[i]; 8996cd98798SBarry Smith n = ii[i + 1] - jrow; 9006cd98798SBarry Smith sum1 = 0.0; 9016cd98798SBarry Smith sum2 = 0.0; 9026cd98798SBarry Smith sum3 = 0.0; 9036cd98798SBarry Smith sum4 = 0.0; 9046cd98798SBarry Smith sum5 = 0.0; 9056cd98798SBarry Smith sum6 = 0.0; 9066cd98798SBarry Smith for (j = 0; j < n; j++) { 9076cd98798SBarry Smith sum1 += v[jrow] * x[6 * idx[jrow]]; 9086cd98798SBarry Smith sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 9096cd98798SBarry Smith sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 9106cd98798SBarry Smith sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 9116cd98798SBarry Smith sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 9126cd98798SBarry Smith sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 9136cd98798SBarry Smith jrow++; 9146cd98798SBarry Smith } 9156cd98798SBarry Smith y[6 * i] += sum1; 9166cd98798SBarry Smith y[6 * i + 1] += sum2; 9176cd98798SBarry Smith y[6 * i + 2] += sum3; 9186cd98798SBarry Smith y[6 * i + 3] += sum4; 9196cd98798SBarry Smith y[6 * i + 4] += sum5; 9206cd98798SBarry Smith y[6 * i + 5] += sum6; 9216cd98798SBarry Smith } 9226cd98798SBarry Smith 9239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 9249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 9259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 9266cd98798SBarry Smith PetscFunctionReturn(0); 9276cd98798SBarry Smith } 9286cd98798SBarry Smith 9299371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 9306cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 9316cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 932d2840e09SBarry Smith const PetscScalar *x, *v; 933d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 934d2840e09SBarry Smith PetscInt n, i; 935d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 9366cd98798SBarry Smith 9376cd98798SBarry Smith PetscFunctionBegin; 9389566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 9399566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9409566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 941bfec09a0SHong Zhang 9426cd98798SBarry Smith for (i = 0; i < m; i++) { 943bfec09a0SHong Zhang idx = a->j + a->i[i]; 944bfec09a0SHong Zhang v = a->a + a->i[i]; 9456cd98798SBarry Smith n = a->i[i + 1] - a->i[i]; 9466cd98798SBarry Smith alpha1 = x[6 * i]; 9476cd98798SBarry Smith alpha2 = x[6 * i + 1]; 9486cd98798SBarry Smith alpha3 = x[6 * i + 2]; 9496cd98798SBarry Smith alpha4 = x[6 * i + 3]; 9506cd98798SBarry Smith alpha5 = x[6 * i + 4]; 9516cd98798SBarry Smith alpha6 = x[6 * i + 5]; 9526cd98798SBarry Smith while (n-- > 0) { 9536cd98798SBarry Smith y[6 * (*idx)] += alpha1 * (*v); 9546cd98798SBarry Smith y[6 * (*idx) + 1] += alpha2 * (*v); 9556cd98798SBarry Smith y[6 * (*idx) + 2] += alpha3 * (*v); 9566cd98798SBarry Smith y[6 * (*idx) + 3] += alpha4 * (*v); 9576cd98798SBarry Smith y[6 * (*idx) + 4] += alpha5 * (*v); 9586cd98798SBarry Smith y[6 * (*idx) + 5] += alpha6 * (*v); 9599371c9d4SSatish Balay idx++; 9609371c9d4SSatish Balay v++; 9616cd98798SBarry Smith } 9626cd98798SBarry Smith } 9639566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0 * a->nz)); 9649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 9659566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 9666cd98798SBarry Smith PetscFunctionReturn(0); 9676cd98798SBarry Smith } 9686cd98798SBarry Smith 96966ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 9709371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 971ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 972ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 973d2840e09SBarry Smith const PetscScalar *x, *v; 974d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 975d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 976d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 977ed8eea03SBarry Smith 978ed8eea03SBarry Smith PetscFunctionBegin; 9799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 9809566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 981ed8eea03SBarry Smith idx = a->j; 982ed8eea03SBarry Smith v = a->a; 983ed8eea03SBarry Smith ii = a->i; 984ed8eea03SBarry Smith 985ed8eea03SBarry Smith for (i = 0; i < m; i++) { 986ed8eea03SBarry Smith jrow = ii[i]; 987ed8eea03SBarry Smith n = ii[i + 1] - jrow; 988ed8eea03SBarry Smith sum1 = 0.0; 989ed8eea03SBarry Smith sum2 = 0.0; 990ed8eea03SBarry Smith sum3 = 0.0; 991ed8eea03SBarry Smith sum4 = 0.0; 992ed8eea03SBarry Smith sum5 = 0.0; 993ed8eea03SBarry Smith sum6 = 0.0; 994ed8eea03SBarry Smith sum7 = 0.0; 99526fbe8dcSKarl Rupp 996b3c51c66SMatthew Knepley nonzerorow += (n > 0); 997ed8eea03SBarry Smith for (j = 0; j < n; j++) { 998ed8eea03SBarry Smith sum1 += v[jrow] * x[7 * idx[jrow]]; 999ed8eea03SBarry Smith sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1000ed8eea03SBarry Smith sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1001ed8eea03SBarry Smith sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1002ed8eea03SBarry Smith sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1003ed8eea03SBarry Smith sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1004ed8eea03SBarry Smith sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1005ed8eea03SBarry Smith jrow++; 1006ed8eea03SBarry Smith } 1007ed8eea03SBarry Smith y[7 * i] = sum1; 1008ed8eea03SBarry Smith y[7 * i + 1] = sum2; 1009ed8eea03SBarry Smith y[7 * i + 2] = sum3; 1010ed8eea03SBarry Smith y[7 * i + 3] = sum4; 1011ed8eea03SBarry Smith y[7 * i + 4] = sum5; 1012ed8eea03SBarry Smith y[7 * i + 5] = sum6; 1013ed8eea03SBarry Smith y[7 * i + 6] = sum7; 1014ed8eea03SBarry Smith } 1015ed8eea03SBarry Smith 10169566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow)); 10179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10189566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1019ed8eea03SBarry Smith PetscFunctionReturn(0); 1020ed8eea03SBarry Smith } 1021ed8eea03SBarry Smith 10229371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 1023ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1024ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1025d2840e09SBarry Smith const PetscScalar *x, *v; 1026d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1027d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1028d2840e09SBarry Smith PetscInt n, i; 1029ed8eea03SBarry Smith 1030ed8eea03SBarry Smith PetscFunctionBegin; 10319566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 10329566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 10339566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1034ed8eea03SBarry Smith 1035ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1036ed8eea03SBarry Smith idx = a->j + a->i[i]; 1037ed8eea03SBarry Smith v = a->a + a->i[i]; 1038ed8eea03SBarry Smith n = a->i[i + 1] - a->i[i]; 1039ed8eea03SBarry Smith alpha1 = x[7 * i]; 1040ed8eea03SBarry Smith alpha2 = x[7 * i + 1]; 1041ed8eea03SBarry Smith alpha3 = x[7 * i + 2]; 1042ed8eea03SBarry Smith alpha4 = x[7 * i + 3]; 1043ed8eea03SBarry Smith alpha5 = x[7 * i + 4]; 1044ed8eea03SBarry Smith alpha6 = x[7 * i + 5]; 1045ed8eea03SBarry Smith alpha7 = x[7 * i + 6]; 1046ed8eea03SBarry Smith while (n-- > 0) { 1047ed8eea03SBarry Smith y[7 * (*idx)] += alpha1 * (*v); 1048ed8eea03SBarry Smith y[7 * (*idx) + 1] += alpha2 * (*v); 1049ed8eea03SBarry Smith y[7 * (*idx) + 2] += alpha3 * (*v); 1050ed8eea03SBarry Smith y[7 * (*idx) + 3] += alpha4 * (*v); 1051ed8eea03SBarry Smith y[7 * (*idx) + 4] += alpha5 * (*v); 1052ed8eea03SBarry Smith y[7 * (*idx) + 5] += alpha6 * (*v); 1053ed8eea03SBarry Smith y[7 * (*idx) + 6] += alpha7 * (*v); 10549371c9d4SSatish Balay idx++; 10559371c9d4SSatish Balay v++; 1056ed8eea03SBarry Smith } 1057ed8eea03SBarry Smith } 10589566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 10599566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 10609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1061ed8eea03SBarry Smith PetscFunctionReturn(0); 1062ed8eea03SBarry Smith } 1063ed8eea03SBarry Smith 10649371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1065ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1066ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1067d2840e09SBarry Smith const PetscScalar *x, *v; 1068d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1069d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1070ed8eea03SBarry Smith PetscInt n, i, jrow, j; 1071ed8eea03SBarry Smith 1072ed8eea03SBarry Smith PetscFunctionBegin; 10739566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 10749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 10759566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1076ed8eea03SBarry Smith idx = a->j; 1077ed8eea03SBarry Smith v = a->a; 1078ed8eea03SBarry Smith ii = a->i; 1079ed8eea03SBarry Smith 1080ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1081ed8eea03SBarry Smith jrow = ii[i]; 1082ed8eea03SBarry Smith n = ii[i + 1] - jrow; 1083ed8eea03SBarry Smith sum1 = 0.0; 1084ed8eea03SBarry Smith sum2 = 0.0; 1085ed8eea03SBarry Smith sum3 = 0.0; 1086ed8eea03SBarry Smith sum4 = 0.0; 1087ed8eea03SBarry Smith sum5 = 0.0; 1088ed8eea03SBarry Smith sum6 = 0.0; 1089ed8eea03SBarry Smith sum7 = 0.0; 1090ed8eea03SBarry Smith for (j = 0; j < n; j++) { 1091ed8eea03SBarry Smith sum1 += v[jrow] * x[7 * idx[jrow]]; 1092ed8eea03SBarry Smith sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1093ed8eea03SBarry Smith sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1094ed8eea03SBarry Smith sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1095ed8eea03SBarry Smith sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1096ed8eea03SBarry Smith sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1097ed8eea03SBarry Smith sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1098ed8eea03SBarry Smith jrow++; 1099ed8eea03SBarry Smith } 1100ed8eea03SBarry Smith y[7 * i] += sum1; 1101ed8eea03SBarry Smith y[7 * i + 1] += sum2; 1102ed8eea03SBarry Smith y[7 * i + 2] += sum3; 1103ed8eea03SBarry Smith y[7 * i + 3] += sum4; 1104ed8eea03SBarry Smith y[7 * i + 4] += sum5; 1105ed8eea03SBarry Smith y[7 * i + 5] += sum6; 1106ed8eea03SBarry Smith y[7 * i + 6] += sum7; 1107ed8eea03SBarry Smith } 1108ed8eea03SBarry Smith 11099566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 11109566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11119566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1112ed8eea03SBarry Smith PetscFunctionReturn(0); 1113ed8eea03SBarry Smith } 1114ed8eea03SBarry Smith 11159371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1116ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1117ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1118d2840e09SBarry Smith const PetscScalar *x, *v; 1119d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1120d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1121d2840e09SBarry Smith PetscInt n, i; 1122ed8eea03SBarry Smith 1123ed8eea03SBarry Smith PetscFunctionBegin; 11249566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 11259566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11269566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1127ed8eea03SBarry Smith for (i = 0; i < m; i++) { 1128ed8eea03SBarry Smith idx = a->j + a->i[i]; 1129ed8eea03SBarry Smith v = a->a + a->i[i]; 1130ed8eea03SBarry Smith n = a->i[i + 1] - a->i[i]; 1131ed8eea03SBarry Smith alpha1 = x[7 * i]; 1132ed8eea03SBarry Smith alpha2 = x[7 * i + 1]; 1133ed8eea03SBarry Smith alpha3 = x[7 * i + 2]; 1134ed8eea03SBarry Smith alpha4 = x[7 * i + 3]; 1135ed8eea03SBarry Smith alpha5 = x[7 * i + 4]; 1136ed8eea03SBarry Smith alpha6 = x[7 * i + 5]; 1137ed8eea03SBarry Smith alpha7 = x[7 * i + 6]; 1138ed8eea03SBarry Smith while (n-- > 0) { 1139ed8eea03SBarry Smith y[7 * (*idx)] += alpha1 * (*v); 1140ed8eea03SBarry Smith y[7 * (*idx) + 1] += alpha2 * (*v); 1141ed8eea03SBarry Smith y[7 * (*idx) + 2] += alpha3 * (*v); 1142ed8eea03SBarry Smith y[7 * (*idx) + 3] += alpha4 * (*v); 1143ed8eea03SBarry Smith y[7 * (*idx) + 4] += alpha5 * (*v); 1144ed8eea03SBarry Smith y[7 * (*idx) + 5] += alpha6 * (*v); 1145ed8eea03SBarry Smith y[7 * (*idx) + 6] += alpha7 * (*v); 11469371c9d4SSatish Balay idx++; 11479371c9d4SSatish Balay v++; 1148ed8eea03SBarry Smith } 1149ed8eea03SBarry Smith } 11509566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0 * a->nz)); 11519566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11529566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1153ed8eea03SBarry Smith PetscFunctionReturn(0); 1154ed8eea03SBarry Smith } 1155ed8eea03SBarry Smith 11569371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 115766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 115866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1159d2840e09SBarry Smith const PetscScalar *x, *v; 1160d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1161d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1162d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 116366ed3db0SBarry Smith 116466ed3db0SBarry Smith PetscFunctionBegin; 11659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11669566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 116766ed3db0SBarry Smith idx = a->j; 116866ed3db0SBarry Smith v = a->a; 116966ed3db0SBarry Smith ii = a->i; 117066ed3db0SBarry Smith 117166ed3db0SBarry Smith for (i = 0; i < m; i++) { 117266ed3db0SBarry Smith jrow = ii[i]; 117366ed3db0SBarry Smith n = ii[i + 1] - jrow; 117466ed3db0SBarry Smith sum1 = 0.0; 117566ed3db0SBarry Smith sum2 = 0.0; 117666ed3db0SBarry Smith sum3 = 0.0; 117766ed3db0SBarry Smith sum4 = 0.0; 117866ed3db0SBarry Smith sum5 = 0.0; 117966ed3db0SBarry Smith sum6 = 0.0; 118066ed3db0SBarry Smith sum7 = 0.0; 118166ed3db0SBarry Smith sum8 = 0.0; 118226fbe8dcSKarl Rupp 1183b3c51c66SMatthew Knepley nonzerorow += (n > 0); 118466ed3db0SBarry Smith for (j = 0; j < n; j++) { 118566ed3db0SBarry Smith sum1 += v[jrow] * x[8 * idx[jrow]]; 118666ed3db0SBarry Smith sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 118766ed3db0SBarry Smith sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 118866ed3db0SBarry Smith sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 118966ed3db0SBarry Smith sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 119066ed3db0SBarry Smith sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 119166ed3db0SBarry Smith sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 119266ed3db0SBarry Smith sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 119366ed3db0SBarry Smith jrow++; 119466ed3db0SBarry Smith } 119566ed3db0SBarry Smith y[8 * i] = sum1; 119666ed3db0SBarry Smith y[8 * i + 1] = sum2; 119766ed3db0SBarry Smith y[8 * i + 2] = sum3; 119866ed3db0SBarry Smith y[8 * i + 3] = sum4; 119966ed3db0SBarry Smith y[8 * i + 4] = sum5; 120066ed3db0SBarry Smith y[8 * i + 5] = sum6; 120166ed3db0SBarry Smith y[8 * i + 6] = sum7; 120266ed3db0SBarry Smith y[8 * i + 7] = sum8; 120366ed3db0SBarry Smith } 120466ed3db0SBarry Smith 12059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow)); 12069566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 12079566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 120866ed3db0SBarry Smith PetscFunctionReturn(0); 120966ed3db0SBarry Smith } 121066ed3db0SBarry Smith 12119371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 121266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 121366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1214d2840e09SBarry Smith const PetscScalar *x, *v; 1215d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1216d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1217d2840e09SBarry Smith PetscInt n, i; 121866ed3db0SBarry Smith 121966ed3db0SBarry Smith PetscFunctionBegin; 12209566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 12219566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 12229566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1223bfec09a0SHong Zhang 122466ed3db0SBarry Smith for (i = 0; i < m; i++) { 1225bfec09a0SHong Zhang idx = a->j + a->i[i]; 1226bfec09a0SHong Zhang v = a->a + a->i[i]; 122766ed3db0SBarry Smith n = a->i[i + 1] - a->i[i]; 122866ed3db0SBarry Smith alpha1 = x[8 * i]; 122966ed3db0SBarry Smith alpha2 = x[8 * i + 1]; 123066ed3db0SBarry Smith alpha3 = x[8 * i + 2]; 123166ed3db0SBarry Smith alpha4 = x[8 * i + 3]; 123266ed3db0SBarry Smith alpha5 = x[8 * i + 4]; 123366ed3db0SBarry Smith alpha6 = x[8 * i + 5]; 123466ed3db0SBarry Smith alpha7 = x[8 * i + 6]; 123566ed3db0SBarry Smith alpha8 = x[8 * i + 7]; 123666ed3db0SBarry Smith while (n-- > 0) { 123766ed3db0SBarry Smith y[8 * (*idx)] += alpha1 * (*v); 123866ed3db0SBarry Smith y[8 * (*idx) + 1] += alpha2 * (*v); 123966ed3db0SBarry Smith y[8 * (*idx) + 2] += alpha3 * (*v); 124066ed3db0SBarry Smith y[8 * (*idx) + 3] += alpha4 * (*v); 124166ed3db0SBarry Smith y[8 * (*idx) + 4] += alpha5 * (*v); 124266ed3db0SBarry Smith y[8 * (*idx) + 5] += alpha6 * (*v); 124366ed3db0SBarry Smith y[8 * (*idx) + 6] += alpha7 * (*v); 124466ed3db0SBarry Smith y[8 * (*idx) + 7] += alpha8 * (*v); 12459371c9d4SSatish Balay idx++; 12469371c9d4SSatish Balay v++; 124766ed3db0SBarry Smith } 124866ed3db0SBarry Smith } 12499566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 12509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 12519566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 125266ed3db0SBarry Smith PetscFunctionReturn(0); 125366ed3db0SBarry Smith } 125466ed3db0SBarry Smith 12559371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 125666ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 125766ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1258d2840e09SBarry Smith const PetscScalar *x, *v; 1259d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1260d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1261b24ad042SBarry Smith PetscInt n, i, jrow, j; 126266ed3db0SBarry Smith 126366ed3db0SBarry Smith PetscFunctionBegin; 12649566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 12659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 12669566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 126766ed3db0SBarry Smith idx = a->j; 126866ed3db0SBarry Smith v = a->a; 126966ed3db0SBarry Smith ii = a->i; 127066ed3db0SBarry Smith 127166ed3db0SBarry Smith for (i = 0; i < m; i++) { 127266ed3db0SBarry Smith jrow = ii[i]; 127366ed3db0SBarry Smith n = ii[i + 1] - jrow; 127466ed3db0SBarry Smith sum1 = 0.0; 127566ed3db0SBarry Smith sum2 = 0.0; 127666ed3db0SBarry Smith sum3 = 0.0; 127766ed3db0SBarry Smith sum4 = 0.0; 127866ed3db0SBarry Smith sum5 = 0.0; 127966ed3db0SBarry Smith sum6 = 0.0; 128066ed3db0SBarry Smith sum7 = 0.0; 128166ed3db0SBarry Smith sum8 = 0.0; 128266ed3db0SBarry Smith for (j = 0; j < n; j++) { 128366ed3db0SBarry Smith sum1 += v[jrow] * x[8 * idx[jrow]]; 128466ed3db0SBarry Smith sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 128566ed3db0SBarry Smith sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 128666ed3db0SBarry Smith sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 128766ed3db0SBarry Smith sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 128866ed3db0SBarry Smith sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 128966ed3db0SBarry Smith sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 129066ed3db0SBarry Smith sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 129166ed3db0SBarry Smith jrow++; 129266ed3db0SBarry Smith } 129366ed3db0SBarry Smith y[8 * i] += sum1; 129466ed3db0SBarry Smith y[8 * i + 1] += sum2; 129566ed3db0SBarry Smith y[8 * i + 2] += sum3; 129666ed3db0SBarry Smith y[8 * i + 3] += sum4; 129766ed3db0SBarry Smith y[8 * i + 4] += sum5; 129866ed3db0SBarry Smith y[8 * i + 5] += sum6; 129966ed3db0SBarry Smith y[8 * i + 6] += sum7; 130066ed3db0SBarry Smith y[8 * i + 7] += sum8; 130166ed3db0SBarry Smith } 130266ed3db0SBarry Smith 13039566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 13049566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 13059566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 130666ed3db0SBarry Smith PetscFunctionReturn(0); 130766ed3db0SBarry Smith } 130866ed3db0SBarry Smith 13099371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 131066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 131166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1312d2840e09SBarry Smith const PetscScalar *x, *v; 1313d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1314d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1315d2840e09SBarry Smith PetscInt n, i; 131666ed3db0SBarry Smith 131766ed3db0SBarry Smith PetscFunctionBegin; 13189566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 13199566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 13209566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 132166ed3db0SBarry Smith for (i = 0; i < m; i++) { 1322bfec09a0SHong Zhang idx = a->j + a->i[i]; 1323bfec09a0SHong Zhang v = a->a + a->i[i]; 132466ed3db0SBarry Smith n = a->i[i + 1] - a->i[i]; 132566ed3db0SBarry Smith alpha1 = x[8 * i]; 132666ed3db0SBarry Smith alpha2 = x[8 * i + 1]; 132766ed3db0SBarry Smith alpha3 = x[8 * i + 2]; 132866ed3db0SBarry Smith alpha4 = x[8 * i + 3]; 132966ed3db0SBarry Smith alpha5 = x[8 * i + 4]; 133066ed3db0SBarry Smith alpha6 = x[8 * i + 5]; 133166ed3db0SBarry Smith alpha7 = x[8 * i + 6]; 133266ed3db0SBarry Smith alpha8 = x[8 * i + 7]; 133366ed3db0SBarry Smith while (n-- > 0) { 133466ed3db0SBarry Smith y[8 * (*idx)] += alpha1 * (*v); 133566ed3db0SBarry Smith y[8 * (*idx) + 1] += alpha2 * (*v); 133666ed3db0SBarry Smith y[8 * (*idx) + 2] += alpha3 * (*v); 133766ed3db0SBarry Smith y[8 * (*idx) + 3] += alpha4 * (*v); 133866ed3db0SBarry Smith y[8 * (*idx) + 4] += alpha5 * (*v); 133966ed3db0SBarry Smith y[8 * (*idx) + 5] += alpha6 * (*v); 134066ed3db0SBarry Smith y[8 * (*idx) + 6] += alpha7 * (*v); 134166ed3db0SBarry Smith y[8 * (*idx) + 7] += alpha8 * (*v); 13429371c9d4SSatish Balay idx++; 13439371c9d4SSatish Balay v++; 134466ed3db0SBarry Smith } 134566ed3db0SBarry Smith } 13469566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * a->nz)); 13479566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 13489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 13492f7816d4SBarry Smith PetscFunctionReturn(0); 13502f7816d4SBarry Smith } 13512f7816d4SBarry Smith 13522b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 13539371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 13542b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 13552b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1356d2840e09SBarry Smith const PetscScalar *x, *v; 1357d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1358d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1359d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 13602b692628SMatthew Knepley 13612b692628SMatthew Knepley PetscFunctionBegin; 13629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 13639566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 13642b692628SMatthew Knepley idx = a->j; 13652b692628SMatthew Knepley v = a->a; 13662b692628SMatthew Knepley ii = a->i; 13672b692628SMatthew Knepley 13682b692628SMatthew Knepley for (i = 0; i < m; i++) { 13692b692628SMatthew Knepley jrow = ii[i]; 13702b692628SMatthew Knepley n = ii[i + 1] - jrow; 13712b692628SMatthew Knepley sum1 = 0.0; 13722b692628SMatthew Knepley sum2 = 0.0; 13732b692628SMatthew Knepley sum3 = 0.0; 13742b692628SMatthew Knepley sum4 = 0.0; 13752b692628SMatthew Knepley sum5 = 0.0; 13762b692628SMatthew Knepley sum6 = 0.0; 13772b692628SMatthew Knepley sum7 = 0.0; 13782b692628SMatthew Knepley sum8 = 0.0; 13792b692628SMatthew Knepley sum9 = 0.0; 138026fbe8dcSKarl Rupp 1381b3c51c66SMatthew Knepley nonzerorow += (n > 0); 13822b692628SMatthew Knepley for (j = 0; j < n; j++) { 13832b692628SMatthew Knepley sum1 += v[jrow] * x[9 * idx[jrow]]; 13842b692628SMatthew Knepley sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 13852b692628SMatthew Knepley sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 13862b692628SMatthew Knepley sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 13872b692628SMatthew Knepley sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 13882b692628SMatthew Knepley sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 13892b692628SMatthew Knepley sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 13902b692628SMatthew Knepley sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 13912b692628SMatthew Knepley sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 13922b692628SMatthew Knepley jrow++; 13932b692628SMatthew Knepley } 13942b692628SMatthew Knepley y[9 * i] = sum1; 13952b692628SMatthew Knepley y[9 * i + 1] = sum2; 13962b692628SMatthew Knepley y[9 * i + 2] = sum3; 13972b692628SMatthew Knepley y[9 * i + 3] = sum4; 13982b692628SMatthew Knepley y[9 * i + 4] = sum5; 13992b692628SMatthew Knepley y[9 * i + 5] = sum6; 14002b692628SMatthew Knepley y[9 * i + 6] = sum7; 14012b692628SMatthew Knepley y[9 * i + 7] = sum8; 14022b692628SMatthew Knepley y[9 * i + 8] = sum9; 14032b692628SMatthew Knepley } 14042b692628SMatthew Knepley 14059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow)); 14069566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 14079566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 14082b692628SMatthew Knepley PetscFunctionReturn(0); 14092b692628SMatthew Knepley } 14102b692628SMatthew Knepley 1411b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1412b9cda22cSBarry Smith 14139371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 14142b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 14152b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1416d2840e09SBarry Smith const PetscScalar *x, *v; 1417d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1418d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1419d2840e09SBarry Smith PetscInt n, i; 14202b692628SMatthew Knepley 14212b692628SMatthew Knepley PetscFunctionBegin; 14229566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 14239566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 14249566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 14252b692628SMatthew Knepley 14262b692628SMatthew Knepley for (i = 0; i < m; i++) { 14272b692628SMatthew Knepley idx = a->j + a->i[i]; 14282b692628SMatthew Knepley v = a->a + a->i[i]; 14292b692628SMatthew Knepley n = a->i[i + 1] - a->i[i]; 14302b692628SMatthew Knepley alpha1 = x[9 * i]; 14312b692628SMatthew Knepley alpha2 = x[9 * i + 1]; 14322b692628SMatthew Knepley alpha3 = x[9 * i + 2]; 14332b692628SMatthew Knepley alpha4 = x[9 * i + 3]; 14342b692628SMatthew Knepley alpha5 = x[9 * i + 4]; 14352b692628SMatthew Knepley alpha6 = x[9 * i + 5]; 14362b692628SMatthew Knepley alpha7 = x[9 * i + 6]; 14372b692628SMatthew Knepley alpha8 = x[9 * i + 7]; 14382f6bd0c6SMatthew Knepley alpha9 = x[9 * i + 8]; 14392b692628SMatthew Knepley while (n-- > 0) { 14402b692628SMatthew Knepley y[9 * (*idx)] += alpha1 * (*v); 14412b692628SMatthew Knepley y[9 * (*idx) + 1] += alpha2 * (*v); 14422b692628SMatthew Knepley y[9 * (*idx) + 2] += alpha3 * (*v); 14432b692628SMatthew Knepley y[9 * (*idx) + 3] += alpha4 * (*v); 14442b692628SMatthew Knepley y[9 * (*idx) + 4] += alpha5 * (*v); 14452b692628SMatthew Knepley y[9 * (*idx) + 5] += alpha6 * (*v); 14462b692628SMatthew Knepley y[9 * (*idx) + 6] += alpha7 * (*v); 14472b692628SMatthew Knepley y[9 * (*idx) + 7] += alpha8 * (*v); 14482b692628SMatthew Knepley y[9 * (*idx) + 8] += alpha9 * (*v); 14499371c9d4SSatish Balay idx++; 14509371c9d4SSatish Balay v++; 14512b692628SMatthew Knepley } 14522b692628SMatthew Knepley } 14539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 14549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 14559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 14562b692628SMatthew Knepley PetscFunctionReturn(0); 14572b692628SMatthew Knepley } 14582b692628SMatthew Knepley 14599371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 14602b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 14612b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1462d2840e09SBarry Smith const PetscScalar *x, *v; 1463d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1464d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1465b24ad042SBarry Smith PetscInt n, i, jrow, j; 14662b692628SMatthew Knepley 14672b692628SMatthew Knepley PetscFunctionBegin; 14689566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 14699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 14709566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 14712b692628SMatthew Knepley idx = a->j; 14722b692628SMatthew Knepley v = a->a; 14732b692628SMatthew Knepley ii = a->i; 14742b692628SMatthew Knepley 14752b692628SMatthew Knepley for (i = 0; i < m; i++) { 14762b692628SMatthew Knepley jrow = ii[i]; 14772b692628SMatthew Knepley n = ii[i + 1] - jrow; 14782b692628SMatthew Knepley sum1 = 0.0; 14792b692628SMatthew Knepley sum2 = 0.0; 14802b692628SMatthew Knepley sum3 = 0.0; 14812b692628SMatthew Knepley sum4 = 0.0; 14822b692628SMatthew Knepley sum5 = 0.0; 14832b692628SMatthew Knepley sum6 = 0.0; 14842b692628SMatthew Knepley sum7 = 0.0; 14852b692628SMatthew Knepley sum8 = 0.0; 14862b692628SMatthew Knepley sum9 = 0.0; 14872b692628SMatthew Knepley for (j = 0; j < n; j++) { 14882b692628SMatthew Knepley sum1 += v[jrow] * x[9 * idx[jrow]]; 14892b692628SMatthew Knepley sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 14902b692628SMatthew Knepley sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 14912b692628SMatthew Knepley sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 14922b692628SMatthew Knepley sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 14932b692628SMatthew Knepley sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 14942b692628SMatthew Knepley sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 14952b692628SMatthew Knepley sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 14962b692628SMatthew Knepley sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 14972b692628SMatthew Knepley jrow++; 14982b692628SMatthew Knepley } 14992b692628SMatthew Knepley y[9 * i] += sum1; 15002b692628SMatthew Knepley y[9 * i + 1] += sum2; 15012b692628SMatthew Knepley y[9 * i + 2] += sum3; 15022b692628SMatthew Knepley y[9 * i + 3] += sum4; 15032b692628SMatthew Knepley y[9 * i + 4] += sum5; 15042b692628SMatthew Knepley y[9 * i + 5] += sum6; 15052b692628SMatthew Knepley y[9 * i + 6] += sum7; 15062b692628SMatthew Knepley y[9 * i + 7] += sum8; 15072b692628SMatthew Knepley y[9 * i + 8] += sum9; 15082b692628SMatthew Knepley } 15092b692628SMatthew Knepley 15109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 15119566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 15129566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 15132b692628SMatthew Knepley PetscFunctionReturn(0); 15142b692628SMatthew Knepley } 15152b692628SMatthew Knepley 15169371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 15172b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 15182b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1519d2840e09SBarry Smith const PetscScalar *x, *v; 1520d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1521d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1522d2840e09SBarry Smith PetscInt n, i; 15232b692628SMatthew Knepley 15242b692628SMatthew Knepley PetscFunctionBegin; 15259566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 15269566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 15279566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 15282b692628SMatthew Knepley for (i = 0; i < m; i++) { 15292b692628SMatthew Knepley idx = a->j + a->i[i]; 15302b692628SMatthew Knepley v = a->a + a->i[i]; 15312b692628SMatthew Knepley n = a->i[i + 1] - a->i[i]; 15322b692628SMatthew Knepley alpha1 = x[9 * i]; 15332b692628SMatthew Knepley alpha2 = x[9 * i + 1]; 15342b692628SMatthew Knepley alpha3 = x[9 * i + 2]; 15352b692628SMatthew Knepley alpha4 = x[9 * i + 3]; 15362b692628SMatthew Knepley alpha5 = x[9 * i + 4]; 15372b692628SMatthew Knepley alpha6 = x[9 * i + 5]; 15382b692628SMatthew Knepley alpha7 = x[9 * i + 6]; 15392b692628SMatthew Knepley alpha8 = x[9 * i + 7]; 15402b692628SMatthew Knepley alpha9 = x[9 * i + 8]; 15412b692628SMatthew Knepley while (n-- > 0) { 15422b692628SMatthew Knepley y[9 * (*idx)] += alpha1 * (*v); 15432b692628SMatthew Knepley y[9 * (*idx) + 1] += alpha2 * (*v); 15442b692628SMatthew Knepley y[9 * (*idx) + 2] += alpha3 * (*v); 15452b692628SMatthew Knepley y[9 * (*idx) + 3] += alpha4 * (*v); 15462b692628SMatthew Knepley y[9 * (*idx) + 4] += alpha5 * (*v); 15472b692628SMatthew Knepley y[9 * (*idx) + 5] += alpha6 * (*v); 15482b692628SMatthew Knepley y[9 * (*idx) + 6] += alpha7 * (*v); 15492b692628SMatthew Knepley y[9 * (*idx) + 7] += alpha8 * (*v); 15502b692628SMatthew Knepley y[9 * (*idx) + 8] += alpha9 * (*v); 15519371c9d4SSatish Balay idx++; 15529371c9d4SSatish Balay v++; 15532b692628SMatthew Knepley } 15542b692628SMatthew Knepley } 15559566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0 * a->nz)); 15569566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 15579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 15582b692628SMatthew Knepley PetscFunctionReturn(0); 15592b692628SMatthew Knepley } 15609371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1561b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1562b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1563d2840e09SBarry Smith const PetscScalar *x, *v; 1564d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1565d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1566d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 1567b9cda22cSBarry Smith 1568b9cda22cSBarry Smith PetscFunctionBegin; 15699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 15709566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1571b9cda22cSBarry Smith idx = a->j; 1572b9cda22cSBarry Smith v = a->a; 1573b9cda22cSBarry Smith ii = a->i; 1574b9cda22cSBarry Smith 1575b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1576b9cda22cSBarry Smith jrow = ii[i]; 1577b9cda22cSBarry Smith n = ii[i + 1] - jrow; 1578b9cda22cSBarry Smith sum1 = 0.0; 1579b9cda22cSBarry Smith sum2 = 0.0; 1580b9cda22cSBarry Smith sum3 = 0.0; 1581b9cda22cSBarry Smith sum4 = 0.0; 1582b9cda22cSBarry Smith sum5 = 0.0; 1583b9cda22cSBarry Smith sum6 = 0.0; 1584b9cda22cSBarry Smith sum7 = 0.0; 1585b9cda22cSBarry Smith sum8 = 0.0; 1586b9cda22cSBarry Smith sum9 = 0.0; 1587b9cda22cSBarry Smith sum10 = 0.0; 158826fbe8dcSKarl Rupp 1589b3c51c66SMatthew Knepley nonzerorow += (n > 0); 1590b9cda22cSBarry Smith for (j = 0; j < n; j++) { 1591b9cda22cSBarry Smith sum1 += v[jrow] * x[10 * idx[jrow]]; 1592b9cda22cSBarry Smith sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1593b9cda22cSBarry Smith sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1594b9cda22cSBarry Smith sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1595b9cda22cSBarry Smith sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1596b9cda22cSBarry Smith sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1597b9cda22cSBarry Smith sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1598b9cda22cSBarry Smith sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1599b9cda22cSBarry Smith sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1600b9cda22cSBarry Smith sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1601b9cda22cSBarry Smith jrow++; 1602b9cda22cSBarry Smith } 1603b9cda22cSBarry Smith y[10 * i] = sum1; 1604b9cda22cSBarry Smith y[10 * i + 1] = sum2; 1605b9cda22cSBarry Smith y[10 * i + 2] = sum3; 1606b9cda22cSBarry Smith y[10 * i + 3] = sum4; 1607b9cda22cSBarry Smith y[10 * i + 4] = sum5; 1608b9cda22cSBarry Smith y[10 * i + 5] = sum6; 1609b9cda22cSBarry Smith y[10 * i + 6] = sum7; 1610b9cda22cSBarry Smith y[10 * i + 7] = sum8; 1611b9cda22cSBarry Smith y[10 * i + 8] = sum9; 1612b9cda22cSBarry Smith y[10 * i + 9] = sum10; 1613b9cda22cSBarry Smith } 1614b9cda22cSBarry Smith 16159566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow)); 16169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 16179566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1618b9cda22cSBarry Smith PetscFunctionReturn(0); 1619b9cda22cSBarry Smith } 1620b9cda22cSBarry Smith 16219371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1622b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1623b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1624d2840e09SBarry Smith const PetscScalar *x, *v; 1625d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1626d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1627b9cda22cSBarry Smith PetscInt n, i, jrow, j; 1628b9cda22cSBarry Smith 1629b9cda22cSBarry Smith PetscFunctionBegin; 16309566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 16319566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 16329566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1633b9cda22cSBarry Smith idx = a->j; 1634b9cda22cSBarry Smith v = a->a; 1635b9cda22cSBarry Smith ii = a->i; 1636b9cda22cSBarry Smith 1637b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1638b9cda22cSBarry Smith jrow = ii[i]; 1639b9cda22cSBarry Smith n = ii[i + 1] - jrow; 1640b9cda22cSBarry Smith sum1 = 0.0; 1641b9cda22cSBarry Smith sum2 = 0.0; 1642b9cda22cSBarry Smith sum3 = 0.0; 1643b9cda22cSBarry Smith sum4 = 0.0; 1644b9cda22cSBarry Smith sum5 = 0.0; 1645b9cda22cSBarry Smith sum6 = 0.0; 1646b9cda22cSBarry Smith sum7 = 0.0; 1647b9cda22cSBarry Smith sum8 = 0.0; 1648b9cda22cSBarry Smith sum9 = 0.0; 1649b9cda22cSBarry Smith sum10 = 0.0; 1650b9cda22cSBarry Smith for (j = 0; j < n; j++) { 1651b9cda22cSBarry Smith sum1 += v[jrow] * x[10 * idx[jrow]]; 1652b9cda22cSBarry Smith sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1653b9cda22cSBarry Smith sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1654b9cda22cSBarry Smith sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1655b9cda22cSBarry Smith sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1656b9cda22cSBarry Smith sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1657b9cda22cSBarry Smith sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1658b9cda22cSBarry Smith sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1659b9cda22cSBarry Smith sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1660b9cda22cSBarry Smith sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1661b9cda22cSBarry Smith jrow++; 1662b9cda22cSBarry Smith } 1663b9cda22cSBarry Smith y[10 * i] += sum1; 1664b9cda22cSBarry Smith y[10 * i + 1] += sum2; 1665b9cda22cSBarry Smith y[10 * i + 2] += sum3; 1666b9cda22cSBarry Smith y[10 * i + 3] += sum4; 1667b9cda22cSBarry Smith y[10 * i + 4] += sum5; 1668b9cda22cSBarry Smith y[10 * i + 5] += sum6; 1669b9cda22cSBarry Smith y[10 * i + 6] += sum7; 1670b9cda22cSBarry Smith y[10 * i + 7] += sum8; 1671b9cda22cSBarry Smith y[10 * i + 8] += sum9; 1672b9cda22cSBarry Smith y[10 * i + 9] += sum10; 1673b9cda22cSBarry Smith } 1674b9cda22cSBarry Smith 16759566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 16769566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 16779566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1678b9cda22cSBarry Smith PetscFunctionReturn(0); 1679b9cda22cSBarry Smith } 1680b9cda22cSBarry Smith 16819371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1682b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1683b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1684d2840e09SBarry Smith const PetscScalar *x, *v; 1685d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1686d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1687d2840e09SBarry Smith PetscInt n, i; 1688b9cda22cSBarry Smith 1689b9cda22cSBarry Smith PetscFunctionBegin; 16909566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 16919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 16929566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1693b9cda22cSBarry Smith 1694b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1695b9cda22cSBarry Smith idx = a->j + a->i[i]; 1696b9cda22cSBarry Smith v = a->a + a->i[i]; 1697b9cda22cSBarry Smith n = a->i[i + 1] - a->i[i]; 1698e29fdc22SBarry Smith alpha1 = x[10 * i]; 1699e29fdc22SBarry Smith alpha2 = x[10 * i + 1]; 1700e29fdc22SBarry Smith alpha3 = x[10 * i + 2]; 1701e29fdc22SBarry Smith alpha4 = x[10 * i + 3]; 1702e29fdc22SBarry Smith alpha5 = x[10 * i + 4]; 1703e29fdc22SBarry Smith alpha6 = x[10 * i + 5]; 1704e29fdc22SBarry Smith alpha7 = x[10 * i + 6]; 1705e29fdc22SBarry Smith alpha8 = x[10 * i + 7]; 1706e29fdc22SBarry Smith alpha9 = x[10 * i + 8]; 1707b9cda22cSBarry Smith alpha10 = x[10 * i + 9]; 1708b9cda22cSBarry Smith while (n-- > 0) { 1709e29fdc22SBarry Smith y[10 * (*idx)] += alpha1 * (*v); 1710e29fdc22SBarry Smith y[10 * (*idx) + 1] += alpha2 * (*v); 1711e29fdc22SBarry Smith y[10 * (*idx) + 2] += alpha3 * (*v); 1712e29fdc22SBarry Smith y[10 * (*idx) + 3] += alpha4 * (*v); 1713e29fdc22SBarry Smith y[10 * (*idx) + 4] += alpha5 * (*v); 1714e29fdc22SBarry Smith y[10 * (*idx) + 5] += alpha6 * (*v); 1715e29fdc22SBarry Smith y[10 * (*idx) + 6] += alpha7 * (*v); 1716e29fdc22SBarry Smith y[10 * (*idx) + 7] += alpha8 * (*v); 1717e29fdc22SBarry Smith y[10 * (*idx) + 8] += alpha9 * (*v); 1718b9cda22cSBarry Smith y[10 * (*idx) + 9] += alpha10 * (*v); 17199371c9d4SSatish Balay idx++; 17209371c9d4SSatish Balay v++; 1721b9cda22cSBarry Smith } 1722b9cda22cSBarry Smith } 17239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1726b9cda22cSBarry Smith PetscFunctionReturn(0); 1727b9cda22cSBarry Smith } 1728b9cda22cSBarry Smith 17299371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1730b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1731b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1732d2840e09SBarry Smith const PetscScalar *x, *v; 1733d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1734d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1735d2840e09SBarry Smith PetscInt n, i; 1736b9cda22cSBarry Smith 1737b9cda22cSBarry Smith PetscFunctionBegin; 17389566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 17399566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 17409566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1741b9cda22cSBarry Smith for (i = 0; i < m; i++) { 1742b9cda22cSBarry Smith idx = a->j + a->i[i]; 1743b9cda22cSBarry Smith v = a->a + a->i[i]; 1744b9cda22cSBarry Smith n = a->i[i + 1] - a->i[i]; 1745b9cda22cSBarry Smith alpha1 = x[10 * i]; 1746b9cda22cSBarry Smith alpha2 = x[10 * i + 1]; 1747b9cda22cSBarry Smith alpha3 = x[10 * i + 2]; 1748b9cda22cSBarry Smith alpha4 = x[10 * i + 3]; 1749b9cda22cSBarry Smith alpha5 = x[10 * i + 4]; 1750b9cda22cSBarry Smith alpha6 = x[10 * i + 5]; 1751b9cda22cSBarry Smith alpha7 = x[10 * i + 6]; 1752b9cda22cSBarry Smith alpha8 = x[10 * i + 7]; 1753b9cda22cSBarry Smith alpha9 = x[10 * i + 8]; 1754b9cda22cSBarry Smith alpha10 = x[10 * i + 9]; 1755b9cda22cSBarry Smith while (n-- > 0) { 1756b9cda22cSBarry Smith y[10 * (*idx)] += alpha1 * (*v); 1757b9cda22cSBarry Smith y[10 * (*idx) + 1] += alpha2 * (*v); 1758b9cda22cSBarry Smith y[10 * (*idx) + 2] += alpha3 * (*v); 1759b9cda22cSBarry Smith y[10 * (*idx) + 3] += alpha4 * (*v); 1760b9cda22cSBarry Smith y[10 * (*idx) + 4] += alpha5 * (*v); 1761b9cda22cSBarry Smith y[10 * (*idx) + 5] += alpha6 * (*v); 1762b9cda22cSBarry Smith y[10 * (*idx) + 6] += alpha7 * (*v); 1763b9cda22cSBarry Smith y[10 * (*idx) + 7] += alpha8 * (*v); 1764b9cda22cSBarry Smith y[10 * (*idx) + 8] += alpha9 * (*v); 1765b9cda22cSBarry Smith y[10 * (*idx) + 9] += alpha10 * (*v); 17669371c9d4SSatish Balay idx++; 17679371c9d4SSatish Balay v++; 1768b9cda22cSBarry Smith } 1769b9cda22cSBarry Smith } 17709566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0 * a->nz)); 17719566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 17729566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 1773b9cda22cSBarry Smith PetscFunctionReturn(0); 1774b9cda22cSBarry Smith } 1775b9cda22cSBarry Smith 17762f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 17779371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1778dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1779dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1780d2840e09SBarry Smith const PetscScalar *x, *v; 1781d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1782d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1783d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 1784dbdd7285SBarry Smith 1785dbdd7285SBarry Smith PetscFunctionBegin; 17869566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 17879566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1788dbdd7285SBarry Smith idx = a->j; 1789dbdd7285SBarry Smith v = a->a; 1790dbdd7285SBarry Smith ii = a->i; 1791dbdd7285SBarry Smith 1792dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1793dbdd7285SBarry Smith jrow = ii[i]; 1794dbdd7285SBarry Smith n = ii[i + 1] - jrow; 1795dbdd7285SBarry Smith sum1 = 0.0; 1796dbdd7285SBarry Smith sum2 = 0.0; 1797dbdd7285SBarry Smith sum3 = 0.0; 1798dbdd7285SBarry Smith sum4 = 0.0; 1799dbdd7285SBarry Smith sum5 = 0.0; 1800dbdd7285SBarry Smith sum6 = 0.0; 1801dbdd7285SBarry Smith sum7 = 0.0; 1802dbdd7285SBarry Smith sum8 = 0.0; 1803dbdd7285SBarry Smith sum9 = 0.0; 1804dbdd7285SBarry Smith sum10 = 0.0; 1805dbdd7285SBarry Smith sum11 = 0.0; 180626fbe8dcSKarl Rupp 1807dbdd7285SBarry Smith nonzerorow += (n > 0); 1808dbdd7285SBarry Smith for (j = 0; j < n; j++) { 1809dbdd7285SBarry Smith sum1 += v[jrow] * x[11 * idx[jrow]]; 1810dbdd7285SBarry Smith sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1811dbdd7285SBarry Smith sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1812dbdd7285SBarry Smith sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1813dbdd7285SBarry Smith sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1814dbdd7285SBarry Smith sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1815dbdd7285SBarry Smith sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1816dbdd7285SBarry Smith sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1817dbdd7285SBarry Smith sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1818dbdd7285SBarry Smith sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1819dbdd7285SBarry Smith sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1820dbdd7285SBarry Smith jrow++; 1821dbdd7285SBarry Smith } 1822dbdd7285SBarry Smith y[11 * i] = sum1; 1823dbdd7285SBarry Smith y[11 * i + 1] = sum2; 1824dbdd7285SBarry Smith y[11 * i + 2] = sum3; 1825dbdd7285SBarry Smith y[11 * i + 3] = sum4; 1826dbdd7285SBarry Smith y[11 * i + 4] = sum5; 1827dbdd7285SBarry Smith y[11 * i + 5] = sum6; 1828dbdd7285SBarry Smith y[11 * i + 6] = sum7; 1829dbdd7285SBarry Smith y[11 * i + 7] = sum8; 1830dbdd7285SBarry Smith y[11 * i + 8] = sum9; 1831dbdd7285SBarry Smith y[11 * i + 9] = sum10; 1832dbdd7285SBarry Smith y[11 * i + 10] = sum11; 1833dbdd7285SBarry Smith } 1834dbdd7285SBarry Smith 18359566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow)); 18369566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 18379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1838dbdd7285SBarry Smith PetscFunctionReturn(0); 1839dbdd7285SBarry Smith } 1840dbdd7285SBarry Smith 18419371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1842dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1843dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1844d2840e09SBarry Smith const PetscScalar *x, *v; 1845d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1846d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1847dbdd7285SBarry Smith PetscInt n, i, jrow, j; 1848dbdd7285SBarry Smith 1849dbdd7285SBarry Smith PetscFunctionBegin; 18509566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 18519566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 18529566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1853dbdd7285SBarry Smith idx = a->j; 1854dbdd7285SBarry Smith v = a->a; 1855dbdd7285SBarry Smith ii = a->i; 1856dbdd7285SBarry Smith 1857dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1858dbdd7285SBarry Smith jrow = ii[i]; 1859dbdd7285SBarry Smith n = ii[i + 1] - jrow; 1860dbdd7285SBarry Smith sum1 = 0.0; 1861dbdd7285SBarry Smith sum2 = 0.0; 1862dbdd7285SBarry Smith sum3 = 0.0; 1863dbdd7285SBarry Smith sum4 = 0.0; 1864dbdd7285SBarry Smith sum5 = 0.0; 1865dbdd7285SBarry Smith sum6 = 0.0; 1866dbdd7285SBarry Smith sum7 = 0.0; 1867dbdd7285SBarry Smith sum8 = 0.0; 1868dbdd7285SBarry Smith sum9 = 0.0; 1869dbdd7285SBarry Smith sum10 = 0.0; 1870dbdd7285SBarry Smith sum11 = 0.0; 1871dbdd7285SBarry Smith for (j = 0; j < n; j++) { 1872dbdd7285SBarry Smith sum1 += v[jrow] * x[11 * idx[jrow]]; 1873dbdd7285SBarry Smith sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1874dbdd7285SBarry Smith sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1875dbdd7285SBarry Smith sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1876dbdd7285SBarry Smith sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1877dbdd7285SBarry Smith sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1878dbdd7285SBarry Smith sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1879dbdd7285SBarry Smith sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1880dbdd7285SBarry Smith sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1881dbdd7285SBarry Smith sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1882dbdd7285SBarry Smith sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1883dbdd7285SBarry Smith jrow++; 1884dbdd7285SBarry Smith } 1885dbdd7285SBarry Smith y[11 * i] += sum1; 1886dbdd7285SBarry Smith y[11 * i + 1] += sum2; 1887dbdd7285SBarry Smith y[11 * i + 2] += sum3; 1888dbdd7285SBarry Smith y[11 * i + 3] += sum4; 1889dbdd7285SBarry Smith y[11 * i + 4] += sum5; 1890dbdd7285SBarry Smith y[11 * i + 5] += sum6; 1891dbdd7285SBarry Smith y[11 * i + 6] += sum7; 1892dbdd7285SBarry Smith y[11 * i + 7] += sum8; 1893dbdd7285SBarry Smith y[11 * i + 8] += sum9; 1894dbdd7285SBarry Smith y[11 * i + 9] += sum10; 1895dbdd7285SBarry Smith y[11 * i + 10] += sum11; 1896dbdd7285SBarry Smith } 1897dbdd7285SBarry Smith 18989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 18999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1901dbdd7285SBarry Smith PetscFunctionReturn(0); 1902dbdd7285SBarry Smith } 1903dbdd7285SBarry Smith 19049371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1905dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1906dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1907d2840e09SBarry Smith const PetscScalar *x, *v; 1908d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1909d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1910d2840e09SBarry Smith PetscInt n, i; 1911dbdd7285SBarry Smith 1912dbdd7285SBarry Smith PetscFunctionBegin; 19139566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 19149566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 19159566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 1916dbdd7285SBarry Smith 1917dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1918dbdd7285SBarry Smith idx = a->j + a->i[i]; 1919dbdd7285SBarry Smith v = a->a + a->i[i]; 1920dbdd7285SBarry Smith n = a->i[i + 1] - a->i[i]; 1921dbdd7285SBarry Smith alpha1 = x[11 * i]; 1922dbdd7285SBarry Smith alpha2 = x[11 * i + 1]; 1923dbdd7285SBarry Smith alpha3 = x[11 * i + 2]; 1924dbdd7285SBarry Smith alpha4 = x[11 * i + 3]; 1925dbdd7285SBarry Smith alpha5 = x[11 * i + 4]; 1926dbdd7285SBarry Smith alpha6 = x[11 * i + 5]; 1927dbdd7285SBarry Smith alpha7 = x[11 * i + 6]; 1928dbdd7285SBarry Smith alpha8 = x[11 * i + 7]; 1929dbdd7285SBarry Smith alpha9 = x[11 * i + 8]; 1930dbdd7285SBarry Smith alpha10 = x[11 * i + 9]; 1931dbdd7285SBarry Smith alpha11 = x[11 * i + 10]; 1932dbdd7285SBarry Smith while (n-- > 0) { 1933dbdd7285SBarry Smith y[11 * (*idx)] += alpha1 * (*v); 1934dbdd7285SBarry Smith y[11 * (*idx) + 1] += alpha2 * (*v); 1935dbdd7285SBarry Smith y[11 * (*idx) + 2] += alpha3 * (*v); 1936dbdd7285SBarry Smith y[11 * (*idx) + 3] += alpha4 * (*v); 1937dbdd7285SBarry Smith y[11 * (*idx) + 4] += alpha5 * (*v); 1938dbdd7285SBarry Smith y[11 * (*idx) + 5] += alpha6 * (*v); 1939dbdd7285SBarry Smith y[11 * (*idx) + 6] += alpha7 * (*v); 1940dbdd7285SBarry Smith y[11 * (*idx) + 7] += alpha8 * (*v); 1941dbdd7285SBarry Smith y[11 * (*idx) + 8] += alpha9 * (*v); 1942dbdd7285SBarry Smith y[11 * (*idx) + 9] += alpha10 * (*v); 1943dbdd7285SBarry Smith y[11 * (*idx) + 10] += alpha11 * (*v); 19449371c9d4SSatish Balay idx++; 19459371c9d4SSatish Balay v++; 1946dbdd7285SBarry Smith } 1947dbdd7285SBarry Smith } 19489566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 19499566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19509566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1951dbdd7285SBarry Smith PetscFunctionReturn(0); 1952dbdd7285SBarry Smith } 1953dbdd7285SBarry Smith 19549371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1955dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1956dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1957d2840e09SBarry Smith const PetscScalar *x, *v; 1958d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1959d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 1960d2840e09SBarry Smith PetscInt n, i; 1961dbdd7285SBarry Smith 1962dbdd7285SBarry Smith PetscFunctionBegin; 19639566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 19649566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 19659566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 1966dbdd7285SBarry Smith for (i = 0; i < m; i++) { 1967dbdd7285SBarry Smith idx = a->j + a->i[i]; 1968dbdd7285SBarry Smith v = a->a + a->i[i]; 1969dbdd7285SBarry Smith n = a->i[i + 1] - a->i[i]; 1970dbdd7285SBarry Smith alpha1 = x[11 * i]; 1971dbdd7285SBarry Smith alpha2 = x[11 * i + 1]; 1972dbdd7285SBarry Smith alpha3 = x[11 * i + 2]; 1973dbdd7285SBarry Smith alpha4 = x[11 * i + 3]; 1974dbdd7285SBarry Smith alpha5 = x[11 * i + 4]; 1975dbdd7285SBarry Smith alpha6 = x[11 * i + 5]; 1976dbdd7285SBarry Smith alpha7 = x[11 * i + 6]; 1977dbdd7285SBarry Smith alpha8 = x[11 * i + 7]; 1978dbdd7285SBarry Smith alpha9 = x[11 * i + 8]; 1979dbdd7285SBarry Smith alpha10 = x[11 * i + 9]; 1980dbdd7285SBarry Smith alpha11 = x[11 * i + 10]; 1981dbdd7285SBarry Smith while (n-- > 0) { 1982dbdd7285SBarry Smith y[11 * (*idx)] += alpha1 * (*v); 1983dbdd7285SBarry Smith y[11 * (*idx) + 1] += alpha2 * (*v); 1984dbdd7285SBarry Smith y[11 * (*idx) + 2] += alpha3 * (*v); 1985dbdd7285SBarry Smith y[11 * (*idx) + 3] += alpha4 * (*v); 1986dbdd7285SBarry Smith y[11 * (*idx) + 4] += alpha5 * (*v); 1987dbdd7285SBarry Smith y[11 * (*idx) + 5] += alpha6 * (*v); 1988dbdd7285SBarry Smith y[11 * (*idx) + 6] += alpha7 * (*v); 1989dbdd7285SBarry Smith y[11 * (*idx) + 7] += alpha8 * (*v); 1990dbdd7285SBarry Smith y[11 * (*idx) + 8] += alpha9 * (*v); 1991dbdd7285SBarry Smith y[11 * (*idx) + 9] += alpha10 * (*v); 1992dbdd7285SBarry Smith y[11 * (*idx) + 10] += alpha11 * (*v); 19939371c9d4SSatish Balay idx++; 19949371c9d4SSatish Balay v++; 1995dbdd7285SBarry Smith } 1996dbdd7285SBarry Smith } 19979566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0 * a->nz)); 19989566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 19999566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2000dbdd7285SBarry Smith PetscFunctionReturn(0); 2001dbdd7285SBarry Smith } 2002dbdd7285SBarry Smith 2003dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 20049371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 20052f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 20062f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2007d2840e09SBarry Smith const PetscScalar *x, *v; 2008d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 20092f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2010d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2011d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 20122f7816d4SBarry Smith 20132f7816d4SBarry Smith PetscFunctionBegin; 20149566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 20159566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 20162f7816d4SBarry Smith idx = a->j; 20172f7816d4SBarry Smith v = a->a; 20182f7816d4SBarry Smith ii = a->i; 20192f7816d4SBarry Smith 20202f7816d4SBarry Smith for (i = 0; i < m; i++) { 20212f7816d4SBarry Smith jrow = ii[i]; 20222f7816d4SBarry Smith n = ii[i + 1] - jrow; 20232f7816d4SBarry Smith sum1 = 0.0; 20242f7816d4SBarry Smith sum2 = 0.0; 20252f7816d4SBarry Smith sum3 = 0.0; 20262f7816d4SBarry Smith sum4 = 0.0; 20272f7816d4SBarry Smith sum5 = 0.0; 20282f7816d4SBarry Smith sum6 = 0.0; 20292f7816d4SBarry Smith sum7 = 0.0; 20302f7816d4SBarry Smith sum8 = 0.0; 20312f7816d4SBarry Smith sum9 = 0.0; 20322f7816d4SBarry Smith sum10 = 0.0; 20332f7816d4SBarry Smith sum11 = 0.0; 20342f7816d4SBarry Smith sum12 = 0.0; 20352f7816d4SBarry Smith sum13 = 0.0; 20362f7816d4SBarry Smith sum14 = 0.0; 20372f7816d4SBarry Smith sum15 = 0.0; 20382f7816d4SBarry Smith sum16 = 0.0; 203926fbe8dcSKarl Rupp 2040b3c51c66SMatthew Knepley nonzerorow += (n > 0); 20412f7816d4SBarry Smith for (j = 0; j < n; j++) { 20422f7816d4SBarry Smith sum1 += v[jrow] * x[16 * idx[jrow]]; 20432f7816d4SBarry Smith sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 20442f7816d4SBarry Smith sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 20452f7816d4SBarry Smith sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 20462f7816d4SBarry Smith sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 20472f7816d4SBarry Smith sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 20482f7816d4SBarry Smith sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 20492f7816d4SBarry Smith sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 20502f7816d4SBarry Smith sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 20512f7816d4SBarry Smith sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 20522f7816d4SBarry Smith sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 20532f7816d4SBarry Smith sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 20542f7816d4SBarry Smith sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 20552f7816d4SBarry Smith sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 20562f7816d4SBarry Smith sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 20572f7816d4SBarry Smith sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 20582f7816d4SBarry Smith jrow++; 20592f7816d4SBarry Smith } 20602f7816d4SBarry Smith y[16 * i] = sum1; 20612f7816d4SBarry Smith y[16 * i + 1] = sum2; 20622f7816d4SBarry Smith y[16 * i + 2] = sum3; 20632f7816d4SBarry Smith y[16 * i + 3] = sum4; 20642f7816d4SBarry Smith y[16 * i + 4] = sum5; 20652f7816d4SBarry Smith y[16 * i + 5] = sum6; 20662f7816d4SBarry Smith y[16 * i + 6] = sum7; 20672f7816d4SBarry Smith y[16 * i + 7] = sum8; 20682f7816d4SBarry Smith y[16 * i + 8] = sum9; 20692f7816d4SBarry Smith y[16 * i + 9] = sum10; 20702f7816d4SBarry Smith y[16 * i + 10] = sum11; 20712f7816d4SBarry Smith y[16 * i + 11] = sum12; 20722f7816d4SBarry Smith y[16 * i + 12] = sum13; 20732f7816d4SBarry Smith y[16 * i + 13] = sum14; 20742f7816d4SBarry Smith y[16 * i + 14] = sum15; 20752f7816d4SBarry Smith y[16 * i + 15] = sum16; 20762f7816d4SBarry Smith } 20772f7816d4SBarry Smith 20789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow)); 20799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 20809566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 20812f7816d4SBarry Smith PetscFunctionReturn(0); 20822f7816d4SBarry Smith } 20832f7816d4SBarry Smith 20849371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 20852f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 20862f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2087d2840e09SBarry Smith const PetscScalar *x, *v; 2088d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 20892f7816d4SBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2090d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2091d2840e09SBarry Smith PetscInt n, i; 20922f7816d4SBarry Smith 20932f7816d4SBarry Smith PetscFunctionBegin; 20949566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 20959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 20969566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2097bfec09a0SHong Zhang 20982f7816d4SBarry Smith for (i = 0; i < m; i++) { 2099bfec09a0SHong Zhang idx = a->j + a->i[i]; 2100bfec09a0SHong Zhang v = a->a + a->i[i]; 21012f7816d4SBarry Smith n = a->i[i + 1] - a->i[i]; 21022f7816d4SBarry Smith alpha1 = x[16 * i]; 21032f7816d4SBarry Smith alpha2 = x[16 * i + 1]; 21042f7816d4SBarry Smith alpha3 = x[16 * i + 2]; 21052f7816d4SBarry Smith alpha4 = x[16 * i + 3]; 21062f7816d4SBarry Smith alpha5 = x[16 * i + 4]; 21072f7816d4SBarry Smith alpha6 = x[16 * i + 5]; 21082f7816d4SBarry Smith alpha7 = x[16 * i + 6]; 21092f7816d4SBarry Smith alpha8 = x[16 * i + 7]; 21102f7816d4SBarry Smith alpha9 = x[16 * i + 8]; 21112f7816d4SBarry Smith alpha10 = x[16 * i + 9]; 21122f7816d4SBarry Smith alpha11 = x[16 * i + 10]; 21132f7816d4SBarry Smith alpha12 = x[16 * i + 11]; 21142f7816d4SBarry Smith alpha13 = x[16 * i + 12]; 21152f7816d4SBarry Smith alpha14 = x[16 * i + 13]; 21162f7816d4SBarry Smith alpha15 = x[16 * i + 14]; 21172f7816d4SBarry Smith alpha16 = x[16 * i + 15]; 21182f7816d4SBarry Smith while (n-- > 0) { 21192f7816d4SBarry Smith y[16 * (*idx)] += alpha1 * (*v); 21202f7816d4SBarry Smith y[16 * (*idx) + 1] += alpha2 * (*v); 21212f7816d4SBarry Smith y[16 * (*idx) + 2] += alpha3 * (*v); 21222f7816d4SBarry Smith y[16 * (*idx) + 3] += alpha4 * (*v); 21232f7816d4SBarry Smith y[16 * (*idx) + 4] += alpha5 * (*v); 21242f7816d4SBarry Smith y[16 * (*idx) + 5] += alpha6 * (*v); 21252f7816d4SBarry Smith y[16 * (*idx) + 6] += alpha7 * (*v); 21262f7816d4SBarry Smith y[16 * (*idx) + 7] += alpha8 * (*v); 21272f7816d4SBarry Smith y[16 * (*idx) + 8] += alpha9 * (*v); 21282f7816d4SBarry Smith y[16 * (*idx) + 9] += alpha10 * (*v); 21292f7816d4SBarry Smith y[16 * (*idx) + 10] += alpha11 * (*v); 21302f7816d4SBarry Smith y[16 * (*idx) + 11] += alpha12 * (*v); 21312f7816d4SBarry Smith y[16 * (*idx) + 12] += alpha13 * (*v); 21322f7816d4SBarry Smith y[16 * (*idx) + 13] += alpha14 * (*v); 21332f7816d4SBarry Smith y[16 * (*idx) + 14] += alpha15 * (*v); 21342f7816d4SBarry Smith y[16 * (*idx) + 15] += alpha16 * (*v); 21359371c9d4SSatish Balay idx++; 21369371c9d4SSatish Balay v++; 21372f7816d4SBarry Smith } 21382f7816d4SBarry Smith } 21399566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 21409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 21419566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 21422f7816d4SBarry Smith PetscFunctionReturn(0); 21432f7816d4SBarry Smith } 21442f7816d4SBarry Smith 21459371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 21462f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 21472f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2148d2840e09SBarry Smith const PetscScalar *x, *v; 2149d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21502f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2151d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2152b24ad042SBarry Smith PetscInt n, i, jrow, j; 21532f7816d4SBarry Smith 21542f7816d4SBarry Smith PetscFunctionBegin; 21559566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 21569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21579566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 21582f7816d4SBarry Smith idx = a->j; 21592f7816d4SBarry Smith v = a->a; 21602f7816d4SBarry Smith ii = a->i; 21612f7816d4SBarry Smith 21622f7816d4SBarry Smith for (i = 0; i < m; i++) { 21632f7816d4SBarry Smith jrow = ii[i]; 21642f7816d4SBarry Smith n = ii[i + 1] - jrow; 21652f7816d4SBarry Smith sum1 = 0.0; 21662f7816d4SBarry Smith sum2 = 0.0; 21672f7816d4SBarry Smith sum3 = 0.0; 21682f7816d4SBarry Smith sum4 = 0.0; 21692f7816d4SBarry Smith sum5 = 0.0; 21702f7816d4SBarry Smith sum6 = 0.0; 21712f7816d4SBarry Smith sum7 = 0.0; 21722f7816d4SBarry Smith sum8 = 0.0; 21732f7816d4SBarry Smith sum9 = 0.0; 21742f7816d4SBarry Smith sum10 = 0.0; 21752f7816d4SBarry Smith sum11 = 0.0; 21762f7816d4SBarry Smith sum12 = 0.0; 21772f7816d4SBarry Smith sum13 = 0.0; 21782f7816d4SBarry Smith sum14 = 0.0; 21792f7816d4SBarry Smith sum15 = 0.0; 21802f7816d4SBarry Smith sum16 = 0.0; 21812f7816d4SBarry Smith for (j = 0; j < n; j++) { 21822f7816d4SBarry Smith sum1 += v[jrow] * x[16 * idx[jrow]]; 21832f7816d4SBarry Smith sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 21842f7816d4SBarry Smith sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 21852f7816d4SBarry Smith sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 21862f7816d4SBarry Smith sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 21872f7816d4SBarry Smith sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 21882f7816d4SBarry Smith sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 21892f7816d4SBarry Smith sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 21902f7816d4SBarry Smith sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 21912f7816d4SBarry Smith sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 21922f7816d4SBarry Smith sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 21932f7816d4SBarry Smith sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 21942f7816d4SBarry Smith sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 21952f7816d4SBarry Smith sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 21962f7816d4SBarry Smith sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 21972f7816d4SBarry Smith sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 21982f7816d4SBarry Smith jrow++; 21992f7816d4SBarry Smith } 22002f7816d4SBarry Smith y[16 * i] += sum1; 22012f7816d4SBarry Smith y[16 * i + 1] += sum2; 22022f7816d4SBarry Smith y[16 * i + 2] += sum3; 22032f7816d4SBarry Smith y[16 * i + 3] += sum4; 22042f7816d4SBarry Smith y[16 * i + 4] += sum5; 22052f7816d4SBarry Smith y[16 * i + 5] += sum6; 22062f7816d4SBarry Smith y[16 * i + 6] += sum7; 22072f7816d4SBarry Smith y[16 * i + 7] += sum8; 22082f7816d4SBarry Smith y[16 * i + 8] += sum9; 22092f7816d4SBarry Smith y[16 * i + 9] += sum10; 22102f7816d4SBarry Smith y[16 * i + 10] += sum11; 22112f7816d4SBarry Smith y[16 * i + 11] += sum12; 22122f7816d4SBarry Smith y[16 * i + 12] += sum13; 22132f7816d4SBarry Smith y[16 * i + 13] += sum14; 22142f7816d4SBarry Smith y[16 * i + 14] += sum15; 22152f7816d4SBarry Smith y[16 * i + 15] += sum16; 22162f7816d4SBarry Smith } 22172f7816d4SBarry Smith 22189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 22199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 22209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 22212f7816d4SBarry Smith PetscFunctionReturn(0); 22222f7816d4SBarry Smith } 22232f7816d4SBarry Smith 22249371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 22252f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 22262f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2227d2840e09SBarry Smith const PetscScalar *x, *v; 2228d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 22292f7816d4SBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2230d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2231d2840e09SBarry Smith PetscInt n, i; 22322f7816d4SBarry Smith 22332f7816d4SBarry Smith PetscFunctionBegin; 22349566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 22359566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 22369566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 22372f7816d4SBarry Smith for (i = 0; i < m; i++) { 2238bfec09a0SHong Zhang idx = a->j + a->i[i]; 2239bfec09a0SHong Zhang v = a->a + a->i[i]; 22402f7816d4SBarry Smith n = a->i[i + 1] - a->i[i]; 22412f7816d4SBarry Smith alpha1 = x[16 * i]; 22422f7816d4SBarry Smith alpha2 = x[16 * i + 1]; 22432f7816d4SBarry Smith alpha3 = x[16 * i + 2]; 22442f7816d4SBarry Smith alpha4 = x[16 * i + 3]; 22452f7816d4SBarry Smith alpha5 = x[16 * i + 4]; 22462f7816d4SBarry Smith alpha6 = x[16 * i + 5]; 22472f7816d4SBarry Smith alpha7 = x[16 * i + 6]; 22482f7816d4SBarry Smith alpha8 = x[16 * i + 7]; 22492f7816d4SBarry Smith alpha9 = x[16 * i + 8]; 22502f7816d4SBarry Smith alpha10 = x[16 * i + 9]; 22512f7816d4SBarry Smith alpha11 = x[16 * i + 10]; 22522f7816d4SBarry Smith alpha12 = x[16 * i + 11]; 22532f7816d4SBarry Smith alpha13 = x[16 * i + 12]; 22542f7816d4SBarry Smith alpha14 = x[16 * i + 13]; 22552f7816d4SBarry Smith alpha15 = x[16 * i + 14]; 22562f7816d4SBarry Smith alpha16 = x[16 * i + 15]; 22572f7816d4SBarry Smith while (n-- > 0) { 22582f7816d4SBarry Smith y[16 * (*idx)] += alpha1 * (*v); 22592f7816d4SBarry Smith y[16 * (*idx) + 1] += alpha2 * (*v); 22602f7816d4SBarry Smith y[16 * (*idx) + 2] += alpha3 * (*v); 22612f7816d4SBarry Smith y[16 * (*idx) + 3] += alpha4 * (*v); 22622f7816d4SBarry Smith y[16 * (*idx) + 4] += alpha5 * (*v); 22632f7816d4SBarry Smith y[16 * (*idx) + 5] += alpha6 * (*v); 22642f7816d4SBarry Smith y[16 * (*idx) + 6] += alpha7 * (*v); 22652f7816d4SBarry Smith y[16 * (*idx) + 7] += alpha8 * (*v); 22662f7816d4SBarry Smith y[16 * (*idx) + 8] += alpha9 * (*v); 22672f7816d4SBarry Smith y[16 * (*idx) + 9] += alpha10 * (*v); 22682f7816d4SBarry Smith y[16 * (*idx) + 10] += alpha11 * (*v); 22692f7816d4SBarry Smith y[16 * (*idx) + 11] += alpha12 * (*v); 22702f7816d4SBarry Smith y[16 * (*idx) + 12] += alpha13 * (*v); 22712f7816d4SBarry Smith y[16 * (*idx) + 13] += alpha14 * (*v); 22722f7816d4SBarry Smith y[16 * (*idx) + 14] += alpha15 * (*v); 22732f7816d4SBarry Smith y[16 * (*idx) + 15] += alpha16 * (*v); 22749371c9d4SSatish Balay idx++; 22759371c9d4SSatish Balay v++; 22762f7816d4SBarry Smith } 22772f7816d4SBarry Smith } 22789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0 * a->nz)); 22799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 22809566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 228166ed3db0SBarry Smith PetscFunctionReturn(0); 228266ed3db0SBarry Smith } 228366ed3db0SBarry Smith 2284ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 22859371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2286ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2287ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2288d2840e09SBarry Smith const PetscScalar *x, *v; 2289d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2290ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2291d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2292d2840e09SBarry Smith PetscInt nonzerorow = 0, n, i, jrow, j; 2293ed1c418cSBarry Smith 2294ed1c418cSBarry Smith PetscFunctionBegin; 22959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 22969566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2297ed1c418cSBarry Smith idx = a->j; 2298ed1c418cSBarry Smith v = a->a; 2299ed1c418cSBarry Smith ii = a->i; 2300ed1c418cSBarry Smith 2301ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2302ed1c418cSBarry Smith jrow = ii[i]; 2303ed1c418cSBarry Smith n = ii[i + 1] - jrow; 2304ed1c418cSBarry Smith sum1 = 0.0; 2305ed1c418cSBarry Smith sum2 = 0.0; 2306ed1c418cSBarry Smith sum3 = 0.0; 2307ed1c418cSBarry Smith sum4 = 0.0; 2308ed1c418cSBarry Smith sum5 = 0.0; 2309ed1c418cSBarry Smith sum6 = 0.0; 2310ed1c418cSBarry Smith sum7 = 0.0; 2311ed1c418cSBarry Smith sum8 = 0.0; 2312ed1c418cSBarry Smith sum9 = 0.0; 2313ed1c418cSBarry Smith sum10 = 0.0; 2314ed1c418cSBarry Smith sum11 = 0.0; 2315ed1c418cSBarry Smith sum12 = 0.0; 2316ed1c418cSBarry Smith sum13 = 0.0; 2317ed1c418cSBarry Smith sum14 = 0.0; 2318ed1c418cSBarry Smith sum15 = 0.0; 2319ed1c418cSBarry Smith sum16 = 0.0; 2320ed1c418cSBarry Smith sum17 = 0.0; 2321ed1c418cSBarry Smith sum18 = 0.0; 232226fbe8dcSKarl Rupp 2323ed1c418cSBarry Smith nonzerorow += (n > 0); 2324ed1c418cSBarry Smith for (j = 0; j < n; j++) { 2325ed1c418cSBarry Smith sum1 += v[jrow] * x[18 * idx[jrow]]; 2326ed1c418cSBarry Smith sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2327ed1c418cSBarry Smith sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2328ed1c418cSBarry Smith sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2329ed1c418cSBarry Smith sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2330ed1c418cSBarry Smith sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2331ed1c418cSBarry Smith sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2332ed1c418cSBarry Smith sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2333ed1c418cSBarry Smith sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2334ed1c418cSBarry Smith sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2335ed1c418cSBarry Smith sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2336ed1c418cSBarry Smith sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2337ed1c418cSBarry Smith sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2338ed1c418cSBarry Smith sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2339ed1c418cSBarry Smith sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2340ed1c418cSBarry Smith sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2341ed1c418cSBarry Smith sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2342ed1c418cSBarry Smith sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2343ed1c418cSBarry Smith jrow++; 2344ed1c418cSBarry Smith } 2345ed1c418cSBarry Smith y[18 * i] = sum1; 2346ed1c418cSBarry Smith y[18 * i + 1] = sum2; 2347ed1c418cSBarry Smith y[18 * i + 2] = sum3; 2348ed1c418cSBarry Smith y[18 * i + 3] = sum4; 2349ed1c418cSBarry Smith y[18 * i + 4] = sum5; 2350ed1c418cSBarry Smith y[18 * i + 5] = sum6; 2351ed1c418cSBarry Smith y[18 * i + 6] = sum7; 2352ed1c418cSBarry Smith y[18 * i + 7] = sum8; 2353ed1c418cSBarry Smith y[18 * i + 8] = sum9; 2354ed1c418cSBarry Smith y[18 * i + 9] = sum10; 2355ed1c418cSBarry Smith y[18 * i + 10] = sum11; 2356ed1c418cSBarry Smith y[18 * i + 11] = sum12; 2357ed1c418cSBarry Smith y[18 * i + 12] = sum13; 2358ed1c418cSBarry Smith y[18 * i + 13] = sum14; 2359ed1c418cSBarry Smith y[18 * i + 14] = sum15; 2360ed1c418cSBarry Smith y[18 * i + 15] = sum16; 2361ed1c418cSBarry Smith y[18 * i + 16] = sum17; 2362ed1c418cSBarry Smith y[18 * i + 17] = sum18; 2363ed1c418cSBarry Smith } 2364ed1c418cSBarry Smith 23659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow)); 23669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 23679566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2368ed1c418cSBarry Smith PetscFunctionReturn(0); 2369ed1c418cSBarry Smith } 2370ed1c418cSBarry Smith 23719371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2372ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2373ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2374d2840e09SBarry Smith const PetscScalar *x, *v; 2375d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2376ed1c418cSBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2377d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2378d2840e09SBarry Smith PetscInt n, i; 2379ed1c418cSBarry Smith 2380ed1c418cSBarry Smith PetscFunctionBegin; 23819566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 23829566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 23839566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2384ed1c418cSBarry Smith 2385ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2386ed1c418cSBarry Smith idx = a->j + a->i[i]; 2387ed1c418cSBarry Smith v = a->a + a->i[i]; 2388ed1c418cSBarry Smith n = a->i[i + 1] - a->i[i]; 2389ed1c418cSBarry Smith alpha1 = x[18 * i]; 2390ed1c418cSBarry Smith alpha2 = x[18 * i + 1]; 2391ed1c418cSBarry Smith alpha3 = x[18 * i + 2]; 2392ed1c418cSBarry Smith alpha4 = x[18 * i + 3]; 2393ed1c418cSBarry Smith alpha5 = x[18 * i + 4]; 2394ed1c418cSBarry Smith alpha6 = x[18 * i + 5]; 2395ed1c418cSBarry Smith alpha7 = x[18 * i + 6]; 2396ed1c418cSBarry Smith alpha8 = x[18 * i + 7]; 2397ed1c418cSBarry Smith alpha9 = x[18 * i + 8]; 2398ed1c418cSBarry Smith alpha10 = x[18 * i + 9]; 2399ed1c418cSBarry Smith alpha11 = x[18 * i + 10]; 2400ed1c418cSBarry Smith alpha12 = x[18 * i + 11]; 2401ed1c418cSBarry Smith alpha13 = x[18 * i + 12]; 2402ed1c418cSBarry Smith alpha14 = x[18 * i + 13]; 2403ed1c418cSBarry Smith alpha15 = x[18 * i + 14]; 2404ed1c418cSBarry Smith alpha16 = x[18 * i + 15]; 2405ed1c418cSBarry Smith alpha17 = x[18 * i + 16]; 2406ed1c418cSBarry Smith alpha18 = x[18 * i + 17]; 2407ed1c418cSBarry Smith while (n-- > 0) { 2408ed1c418cSBarry Smith y[18 * (*idx)] += alpha1 * (*v); 2409ed1c418cSBarry Smith y[18 * (*idx) + 1] += alpha2 * (*v); 2410ed1c418cSBarry Smith y[18 * (*idx) + 2] += alpha3 * (*v); 2411ed1c418cSBarry Smith y[18 * (*idx) + 3] += alpha4 * (*v); 2412ed1c418cSBarry Smith y[18 * (*idx) + 4] += alpha5 * (*v); 2413ed1c418cSBarry Smith y[18 * (*idx) + 5] += alpha6 * (*v); 2414ed1c418cSBarry Smith y[18 * (*idx) + 6] += alpha7 * (*v); 2415ed1c418cSBarry Smith y[18 * (*idx) + 7] += alpha8 * (*v); 2416ed1c418cSBarry Smith y[18 * (*idx) + 8] += alpha9 * (*v); 2417ed1c418cSBarry Smith y[18 * (*idx) + 9] += alpha10 * (*v); 2418ed1c418cSBarry Smith y[18 * (*idx) + 10] += alpha11 * (*v); 2419ed1c418cSBarry Smith y[18 * (*idx) + 11] += alpha12 * (*v); 2420ed1c418cSBarry Smith y[18 * (*idx) + 12] += alpha13 * (*v); 2421ed1c418cSBarry Smith y[18 * (*idx) + 13] += alpha14 * (*v); 2422ed1c418cSBarry Smith y[18 * (*idx) + 14] += alpha15 * (*v); 2423ed1c418cSBarry Smith y[18 * (*idx) + 15] += alpha16 * (*v); 2424ed1c418cSBarry Smith y[18 * (*idx) + 16] += alpha17 * (*v); 2425ed1c418cSBarry Smith y[18 * (*idx) + 17] += alpha18 * (*v); 24269371c9d4SSatish Balay idx++; 24279371c9d4SSatish Balay v++; 2428ed1c418cSBarry Smith } 2429ed1c418cSBarry Smith } 24309566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 24319566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24329566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2433ed1c418cSBarry Smith PetscFunctionReturn(0); 2434ed1c418cSBarry Smith } 2435ed1c418cSBarry Smith 24369371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2437ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2438ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2439d2840e09SBarry Smith const PetscScalar *x, *v; 2440d2840e09SBarry Smith PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2441ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2442d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2443ed1c418cSBarry Smith PetscInt n, i, jrow, j; 2444ed1c418cSBarry Smith 2445ed1c418cSBarry Smith PetscFunctionBegin; 24469566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 24479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 24489566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 2449ed1c418cSBarry Smith idx = a->j; 2450ed1c418cSBarry Smith v = a->a; 2451ed1c418cSBarry Smith ii = a->i; 2452ed1c418cSBarry Smith 2453ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2454ed1c418cSBarry Smith jrow = ii[i]; 2455ed1c418cSBarry Smith n = ii[i + 1] - jrow; 2456ed1c418cSBarry Smith sum1 = 0.0; 2457ed1c418cSBarry Smith sum2 = 0.0; 2458ed1c418cSBarry Smith sum3 = 0.0; 2459ed1c418cSBarry Smith sum4 = 0.0; 2460ed1c418cSBarry Smith sum5 = 0.0; 2461ed1c418cSBarry Smith sum6 = 0.0; 2462ed1c418cSBarry Smith sum7 = 0.0; 2463ed1c418cSBarry Smith sum8 = 0.0; 2464ed1c418cSBarry Smith sum9 = 0.0; 2465ed1c418cSBarry Smith sum10 = 0.0; 2466ed1c418cSBarry Smith sum11 = 0.0; 2467ed1c418cSBarry Smith sum12 = 0.0; 2468ed1c418cSBarry Smith sum13 = 0.0; 2469ed1c418cSBarry Smith sum14 = 0.0; 2470ed1c418cSBarry Smith sum15 = 0.0; 2471ed1c418cSBarry Smith sum16 = 0.0; 2472ed1c418cSBarry Smith sum17 = 0.0; 2473ed1c418cSBarry Smith sum18 = 0.0; 2474ed1c418cSBarry Smith for (j = 0; j < n; j++) { 2475ed1c418cSBarry Smith sum1 += v[jrow] * x[18 * idx[jrow]]; 2476ed1c418cSBarry Smith sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2477ed1c418cSBarry Smith sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2478ed1c418cSBarry Smith sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2479ed1c418cSBarry Smith sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2480ed1c418cSBarry Smith sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2481ed1c418cSBarry Smith sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2482ed1c418cSBarry Smith sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2483ed1c418cSBarry Smith sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2484ed1c418cSBarry Smith sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2485ed1c418cSBarry Smith sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2486ed1c418cSBarry Smith sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2487ed1c418cSBarry Smith sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2488ed1c418cSBarry Smith sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2489ed1c418cSBarry Smith sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2490ed1c418cSBarry Smith sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2491ed1c418cSBarry Smith sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2492ed1c418cSBarry Smith sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2493ed1c418cSBarry Smith jrow++; 2494ed1c418cSBarry Smith } 2495ed1c418cSBarry Smith y[18 * i] += sum1; 2496ed1c418cSBarry Smith y[18 * i + 1] += sum2; 2497ed1c418cSBarry Smith y[18 * i + 2] += sum3; 2498ed1c418cSBarry Smith y[18 * i + 3] += sum4; 2499ed1c418cSBarry Smith y[18 * i + 4] += sum5; 2500ed1c418cSBarry Smith y[18 * i + 5] += sum6; 2501ed1c418cSBarry Smith y[18 * i + 6] += sum7; 2502ed1c418cSBarry Smith y[18 * i + 7] += sum8; 2503ed1c418cSBarry Smith y[18 * i + 8] += sum9; 2504ed1c418cSBarry Smith y[18 * i + 9] += sum10; 2505ed1c418cSBarry Smith y[18 * i + 10] += sum11; 2506ed1c418cSBarry Smith y[18 * i + 11] += sum12; 2507ed1c418cSBarry Smith y[18 * i + 12] += sum13; 2508ed1c418cSBarry Smith y[18 * i + 13] += sum14; 2509ed1c418cSBarry Smith y[18 * i + 14] += sum15; 2510ed1c418cSBarry Smith y[18 * i + 15] += sum16; 2511ed1c418cSBarry Smith y[18 * i + 16] += sum17; 2512ed1c418cSBarry Smith y[18 * i + 17] += sum18; 2513ed1c418cSBarry Smith } 2514ed1c418cSBarry Smith 25159566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 25169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25179566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2518ed1c418cSBarry Smith PetscFunctionReturn(0); 2519ed1c418cSBarry Smith } 2520ed1c418cSBarry Smith 25219371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2522ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2523ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2524d2840e09SBarry Smith const PetscScalar *x, *v; 2525d2840e09SBarry Smith PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2526ed1c418cSBarry Smith PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2527d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx; 2528d2840e09SBarry Smith PetscInt n, i; 2529ed1c418cSBarry Smith 2530ed1c418cSBarry Smith PetscFunctionBegin; 25319566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 25329566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 25339566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 2534ed1c418cSBarry Smith for (i = 0; i < m; i++) { 2535ed1c418cSBarry Smith idx = a->j + a->i[i]; 2536ed1c418cSBarry Smith v = a->a + a->i[i]; 2537ed1c418cSBarry Smith n = a->i[i + 1] - a->i[i]; 2538ed1c418cSBarry Smith alpha1 = x[18 * i]; 2539ed1c418cSBarry Smith alpha2 = x[18 * i + 1]; 2540ed1c418cSBarry Smith alpha3 = x[18 * i + 2]; 2541ed1c418cSBarry Smith alpha4 = x[18 * i + 3]; 2542ed1c418cSBarry Smith alpha5 = x[18 * i + 4]; 2543ed1c418cSBarry Smith alpha6 = x[18 * i + 5]; 2544ed1c418cSBarry Smith alpha7 = x[18 * i + 6]; 2545ed1c418cSBarry Smith alpha8 = x[18 * i + 7]; 2546ed1c418cSBarry Smith alpha9 = x[18 * i + 8]; 2547ed1c418cSBarry Smith alpha10 = x[18 * i + 9]; 2548ed1c418cSBarry Smith alpha11 = x[18 * i + 10]; 2549ed1c418cSBarry Smith alpha12 = x[18 * i + 11]; 2550ed1c418cSBarry Smith alpha13 = x[18 * i + 12]; 2551ed1c418cSBarry Smith alpha14 = x[18 * i + 13]; 2552ed1c418cSBarry Smith alpha15 = x[18 * i + 14]; 2553ed1c418cSBarry Smith alpha16 = x[18 * i + 15]; 2554ed1c418cSBarry Smith alpha17 = x[18 * i + 16]; 2555ed1c418cSBarry Smith alpha18 = x[18 * i + 17]; 2556ed1c418cSBarry Smith while (n-- > 0) { 2557ed1c418cSBarry Smith y[18 * (*idx)] += alpha1 * (*v); 2558ed1c418cSBarry Smith y[18 * (*idx) + 1] += alpha2 * (*v); 2559ed1c418cSBarry Smith y[18 * (*idx) + 2] += alpha3 * (*v); 2560ed1c418cSBarry Smith y[18 * (*idx) + 3] += alpha4 * (*v); 2561ed1c418cSBarry Smith y[18 * (*idx) + 4] += alpha5 * (*v); 2562ed1c418cSBarry Smith y[18 * (*idx) + 5] += alpha6 * (*v); 2563ed1c418cSBarry Smith y[18 * (*idx) + 6] += alpha7 * (*v); 2564ed1c418cSBarry Smith y[18 * (*idx) + 7] += alpha8 * (*v); 2565ed1c418cSBarry Smith y[18 * (*idx) + 8] += alpha9 * (*v); 2566ed1c418cSBarry Smith y[18 * (*idx) + 9] += alpha10 * (*v); 2567ed1c418cSBarry Smith y[18 * (*idx) + 10] += alpha11 * (*v); 2568ed1c418cSBarry Smith y[18 * (*idx) + 11] += alpha12 * (*v); 2569ed1c418cSBarry Smith y[18 * (*idx) + 12] += alpha13 * (*v); 2570ed1c418cSBarry Smith y[18 * (*idx) + 13] += alpha14 * (*v); 2571ed1c418cSBarry Smith y[18 * (*idx) + 14] += alpha15 * (*v); 2572ed1c418cSBarry Smith y[18 * (*idx) + 15] += alpha16 * (*v); 2573ed1c418cSBarry Smith y[18 * (*idx) + 16] += alpha17 * (*v); 2574ed1c418cSBarry Smith y[18 * (*idx) + 17] += alpha18 * (*v); 25759371c9d4SSatish Balay idx++; 25769371c9d4SSatish Balay v++; 2577ed1c418cSBarry Smith } 2578ed1c418cSBarry Smith } 25799566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0 * a->nz)); 25809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 25819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 2582ed1c418cSBarry Smith PetscFunctionReturn(0); 2583ed1c418cSBarry Smith } 2584ed1c418cSBarry Smith 25859371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 25866861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 25876861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 25886861f193SBarry Smith const PetscScalar *x, *v; 25896861f193SBarry Smith PetscScalar *y, *sums; 25906861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 25916861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 25926861f193SBarry Smith 25936861f193SBarry Smith PetscFunctionBegin; 25949566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 25959566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 25969566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 25976861f193SBarry Smith idx = a->j; 25986861f193SBarry Smith v = a->a; 25996861f193SBarry Smith ii = a->i; 26006861f193SBarry Smith 26016861f193SBarry Smith for (i = 0; i < m; i++) { 26026861f193SBarry Smith jrow = ii[i]; 26036861f193SBarry Smith n = ii[i + 1] - jrow; 26046861f193SBarry Smith sums = y + dof * i; 26056861f193SBarry Smith for (j = 0; j < n; j++) { 26069371c9d4SSatish Balay for (k = 0; k < dof; k++) { sums[k] += v[jrow] * x[dof * idx[jrow] + k]; } 26076861f193SBarry Smith jrow++; 26086861f193SBarry Smith } 26096861f193SBarry Smith } 26106861f193SBarry Smith 26119566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26129566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26139566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 26146861f193SBarry Smith PetscFunctionReturn(0); 26156861f193SBarry Smith } 26166861f193SBarry Smith 26179371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 26186861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26196861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26206861f193SBarry Smith const PetscScalar *x, *v; 26216861f193SBarry Smith PetscScalar *y, *sums; 26226861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 26236861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 26246861f193SBarry Smith 26256861f193SBarry Smith PetscFunctionBegin; 26269566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 26279566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26289566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 26296861f193SBarry Smith idx = a->j; 26306861f193SBarry Smith v = a->a; 26316861f193SBarry Smith ii = a->i; 26326861f193SBarry Smith 26336861f193SBarry Smith for (i = 0; i < m; i++) { 26346861f193SBarry Smith jrow = ii[i]; 26356861f193SBarry Smith n = ii[i + 1] - jrow; 26366861f193SBarry Smith sums = y + dof * i; 26376861f193SBarry Smith for (j = 0; j < n; j++) { 26389371c9d4SSatish Balay for (k = 0; k < dof; k++) { sums[k] += v[jrow] * x[dof * idx[jrow] + k]; } 26396861f193SBarry Smith jrow++; 26406861f193SBarry Smith } 26416861f193SBarry Smith } 26426861f193SBarry Smith 26439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26449566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26459566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 26466861f193SBarry Smith PetscFunctionReturn(0); 26476861f193SBarry Smith } 26486861f193SBarry Smith 26499371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 26506861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26516861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26526861f193SBarry Smith const PetscScalar *x, *v, *alpha; 26536861f193SBarry Smith PetscScalar *y; 26546861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 26556861f193SBarry Smith PetscInt n, i, k; 26566861f193SBarry Smith 26576861f193SBarry Smith PetscFunctionBegin; 26589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26599566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 26609566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 26616861f193SBarry Smith for (i = 0; i < m; i++) { 26626861f193SBarry Smith idx = a->j + a->i[i]; 26636861f193SBarry Smith v = a->a + a->i[i]; 26646861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 26656861f193SBarry Smith alpha = x + dof * i; 26666861f193SBarry Smith while (n-- > 0) { 26679371c9d4SSatish Balay for (k = 0; k < dof; k++) { y[dof * (*idx) + k] += alpha[k] * (*v); } 26689371c9d4SSatish Balay idx++; 26699371c9d4SSatish Balay v++; 26706861f193SBarry Smith } 26716861f193SBarry Smith } 26729566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 26739566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 26749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 26756861f193SBarry Smith PetscFunctionReturn(0); 26766861f193SBarry Smith } 26776861f193SBarry Smith 26789371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 26796861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26806861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 26816861f193SBarry Smith const PetscScalar *x, *v, *alpha; 26826861f193SBarry Smith PetscScalar *y; 26836861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 26846861f193SBarry Smith PetscInt n, i, k; 26856861f193SBarry Smith 26866861f193SBarry Smith PetscFunctionBegin; 26879566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 26889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 26899566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 26906861f193SBarry Smith for (i = 0; i < m; i++) { 26916861f193SBarry Smith idx = a->j + a->i[i]; 26926861f193SBarry Smith v = a->a + a->i[i]; 26936861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 26946861f193SBarry Smith alpha = x + dof * i; 26956861f193SBarry Smith while (n-- > 0) { 26969371c9d4SSatish Balay for (k = 0; k < dof; k++) { y[dof * (*idx) + k] += alpha[k] * (*v); } 26979371c9d4SSatish Balay idx++; 26989371c9d4SSatish Balay v++; 26996861f193SBarry Smith } 27006861f193SBarry Smith } 27019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 27029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 27039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 27046861f193SBarry Smith PetscFunctionReturn(0); 27056861f193SBarry Smith } 27066861f193SBarry Smith 2707f4a53059SBarry Smith /*===================================================================================*/ 27089371c9d4SSatish Balay PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 27094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2710f4a53059SBarry Smith 2711b24ad042SBarry Smith PetscFunctionBegin; 2712f4a53059SBarry Smith /* start the scatter */ 27139566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27149566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 27159566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27169566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 2717f4a53059SBarry Smith PetscFunctionReturn(0); 2718f4a53059SBarry Smith } 2719f4a53059SBarry Smith 27209371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 27214cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2722b24ad042SBarry Smith 27234cb9d9b8SBarry Smith PetscFunctionBegin; 27249566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 27259566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 27269566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 27279566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 27284cb9d9b8SBarry Smith PetscFunctionReturn(0); 27294cb9d9b8SBarry Smith } 27304cb9d9b8SBarry Smith 27319371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 27324cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 27334cb9d9b8SBarry Smith 2734b24ad042SBarry Smith PetscFunctionBegin; 27354cb9d9b8SBarry Smith /* start the scatter */ 27369566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27379566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 27389566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 27399566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 27404cb9d9b8SBarry Smith PetscFunctionReturn(0); 27414cb9d9b8SBarry Smith } 27424cb9d9b8SBarry Smith 27439371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 27444cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2745b24ad042SBarry Smith 27464cb9d9b8SBarry Smith PetscFunctionBegin; 27479566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 27489566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 27499566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 27509566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 27514cb9d9b8SBarry Smith PetscFunctionReturn(0); 27524cb9d9b8SBarry Smith } 27534cb9d9b8SBarry Smith 275495ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 27559371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) { 27564222ddf1SHong Zhang Mat_Product *product = C->product; 27574222ddf1SHong Zhang 27584222ddf1SHong Zhang PetscFunctionBegin; 27594222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 27604222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 276198921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 27624222ddf1SHong Zhang PetscFunctionReturn(0); 27634222ddf1SHong Zhang } 27644222ddf1SHong Zhang 27659371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) { 27664222ddf1SHong Zhang Mat_Product *product = C->product; 27674222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 27684222ddf1SHong Zhang Mat A = product->A, P = product->B; 27694222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 27704222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 27714222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 27724222ddf1SHong Zhang PetscInt nalg = 4; 27734222ddf1SHong Zhang #else 27744222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 27754222ddf1SHong Zhang PetscInt nalg = 5; 27764222ddf1SHong Zhang #endif 27774222ddf1SHong Zhang 27784222ddf1SHong Zhang PetscFunctionBegin; 277908401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 27804222ddf1SHong Zhang 27814222ddf1SHong Zhang /* PtAP */ 27824222ddf1SHong Zhang /* Check matrix local sizes */ 27839371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 27849371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 27859371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 27869371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 27874222ddf1SHong Zhang 27884222ddf1SHong Zhang /* Set the default algorithm */ 27899566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 2790*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 27914222ddf1SHong Zhang 27924222ddf1SHong Zhang /* Get runtime option */ 2793d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 27949566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 2795*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2796d0609cedSBarry Smith PetscOptionsEnd(); 27974222ddf1SHong Zhang 27989566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 27994222ddf1SHong Zhang if (flg) { 28004222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 28014222ddf1SHong Zhang PetscFunctionReturn(0); 28024222ddf1SHong Zhang } 28034222ddf1SHong Zhang 28049566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 28054222ddf1SHong Zhang if (flg) { 28064222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 28074222ddf1SHong Zhang PetscFunctionReturn(0); 28084222ddf1SHong Zhang } 28094222ddf1SHong Zhang 28104222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 28119566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 28129566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 28139566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 28144222ddf1SHong Zhang PetscFunctionReturn(0); 28154222ddf1SHong Zhang } 28164222ddf1SHong Zhang 28174222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 28189371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) { 28190298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 28207ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 28217ba1a0bfSKris Buschelman Mat P = pp->AIJ; 28227ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 2823d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 28247ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 2825d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 2826d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 28277ba1a0bfSKris Buschelman MatScalar *ca; 2828d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 28297ba1a0bfSKris Buschelman 28307ba1a0bfSKris Buschelman PetscFunctionBegin; 28317ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 28329566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 28337ba1a0bfSKris Buschelman 28347ba1a0bfSKris Buschelman cn = pn * ppdof; 28357ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 28367ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 28379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 28387ba1a0bfSKris Buschelman ci[0] = 0; 28397ba1a0bfSKris Buschelman 28407ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 28419566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 28429566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 28439566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 28447ba1a0bfSKris Buschelman 28457ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 28467ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 28477ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 28489566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 28497ba1a0bfSKris Buschelman current_space = free_space; 28507ba1a0bfSKris Buschelman 28517ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 28527ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 28537ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 28547ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 28557ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 28567ba1a0bfSKris Buschelman ptanzi = 0; 28577ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 28587ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 28597ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 28607ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 28617ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 28627ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 28637ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 28647ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 28657ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 28667ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 28677ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 28687ba1a0bfSKris Buschelman } 28697ba1a0bfSKris Buschelman } 28707ba1a0bfSKris Buschelman } 28717ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 28727ba1a0bfSKris Buschelman ptaj = ptasparserow; 28737ba1a0bfSKris Buschelman cnzi = 0; 28747ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 28757ba1a0bfSKris Buschelman /* Get offset within block of P */ 28767ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 28777ba1a0bfSKris Buschelman /* Get block row of P */ 28787ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 28797ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 28807ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 28817ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 28827ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 28837ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 28847ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 28857ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 28867ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 28877ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 28887ba1a0bfSKris Buschelman } 28897ba1a0bfSKris Buschelman } 28907ba1a0bfSKris Buschelman } 28917ba1a0bfSKris Buschelman 28927ba1a0bfSKris Buschelman /* sort sparserow */ 28939566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 28947ba1a0bfSKris Buschelman 28957ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 28967ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 2897*48a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 28987ba1a0bfSKris Buschelman 28997ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 29009566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 290126fbe8dcSKarl Rupp 29027ba1a0bfSKris Buschelman current_space->array += cnzi; 29037ba1a0bfSKris Buschelman current_space->local_used += cnzi; 29047ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 29057ba1a0bfSKris Buschelman 290626fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 290726fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 290826fbe8dcSKarl Rupp 29097ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 29107ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 29117ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 29127ba1a0bfSKris Buschelman } 29137ba1a0bfSKris Buschelman } 29147ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 29157ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 29167ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 29179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 29189566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 29199566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 29207ba1a0bfSKris Buschelman 29217ba1a0bfSKris Buschelman /* Allocate space for ca */ 29229566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 29237ba1a0bfSKris Buschelman 29247ba1a0bfSKris Buschelman /* put together the new matrix */ 29259566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 29269566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 29277ba1a0bfSKris Buschelman 29287ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 29297ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 29304222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 2931e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2932e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 29337ba1a0bfSKris Buschelman c->nonew = 0; 293426fbe8dcSKarl Rupp 29354222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29364222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 29377ba1a0bfSKris Buschelman 29387ba1a0bfSKris Buschelman /* Clean up. */ 29399566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 29407ba1a0bfSKris Buschelman PetscFunctionReturn(0); 29417ba1a0bfSKris Buschelman } 29427ba1a0bfSKris Buschelman 29439371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) { 29447ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 29457ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 29467ba1a0bfSKris Buschelman Mat P = pp->AIJ; 29477ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29487ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 29497ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 2950a2ea699eSBarry Smith const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 2951d2840e09SBarry Smith const PetscInt *ci = c->i, *cj = c->j, *cjj; 2952d2840e09SBarry Smith const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 2953d2840e09SBarry Smith PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 2954a2ea699eSBarry Smith const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 2955d2840e09SBarry Smith MatScalar *ca = c->a, *caj, *apa; 29567ba1a0bfSKris Buschelman 29577ba1a0bfSKris Buschelman PetscFunctionBegin; 29587ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 29599566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 29607ba1a0bfSKris Buschelman 29617ba1a0bfSKris Buschelman /* Clear old values in C */ 29629566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 29637ba1a0bfSKris Buschelman 29647ba1a0bfSKris Buschelman for (i = 0; i < am; i++) { 29657ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 29667ba1a0bfSKris Buschelman anzi = ai[i + 1] - ai[i]; 29677ba1a0bfSKris Buschelman apnzj = 0; 29687ba1a0bfSKris Buschelman for (j = 0; j < anzi; j++) { 29697ba1a0bfSKris Buschelman /* Get offset within block of P */ 29707ba1a0bfSKris Buschelman pshift = *aj % ppdof; 29717ba1a0bfSKris Buschelman /* Get block row of P */ 29727ba1a0bfSKris Buschelman prow = *aj++ / ppdof; /* integer division */ 29737ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 29747ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29757ba1a0bfSKris Buschelman paj = pa + pi[prow]; 29767ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 29777ba1a0bfSKris Buschelman poffset = pjj[k] * ppdof + pshift; 29787ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 29797ba1a0bfSKris Buschelman apjdense[poffset] = -1; 29807ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 29817ba1a0bfSKris Buschelman } 29827ba1a0bfSKris Buschelman apa[poffset] += (*aa) * paj[k]; 29837ba1a0bfSKris Buschelman } 29849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 29857ba1a0bfSKris Buschelman aa++; 29867ba1a0bfSKris Buschelman } 29877ba1a0bfSKris Buschelman 29887ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 29897ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 29909566063dSJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 29917ba1a0bfSKris Buschelman 29927ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 29937ba1a0bfSKris Buschelman prow = i / ppdof; /* integer division */ 29947ba1a0bfSKris Buschelman pshift = i % ppdof; 29957ba1a0bfSKris Buschelman poffset = pi[prow]; 29967ba1a0bfSKris Buschelman pnzi = pi[prow + 1] - poffset; 29977ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 29987ba1a0bfSKris Buschelman pJ = pj + poffset; 29997ba1a0bfSKris Buschelman pA = pa + poffset; 30007ba1a0bfSKris Buschelman for (j = 0; j < pnzi; j++) { 30017ba1a0bfSKris Buschelman crow = (*pJ) * ppdof + pshift; 30027ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 30037ba1a0bfSKris Buschelman caj = ca + ci[crow]; 30047ba1a0bfSKris Buschelman pJ++; 30057ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 30067ba1a0bfSKris Buschelman for (k = 0, nextap = 0; nextap < apnzj; k++) { 300726fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 30087ba1a0bfSKris Buschelman } 30099566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 30107ba1a0bfSKris Buschelman pA++; 30117ba1a0bfSKris Buschelman } 30127ba1a0bfSKris Buschelman 30137ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 30147ba1a0bfSKris Buschelman for (j = 0; j < apnzj; j++) { 30157ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 30167ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 30177ba1a0bfSKris Buschelman } 30187ba1a0bfSKris Buschelman } 30197ba1a0bfSKris Buschelman 30207ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 30219566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 30229566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 30239566063dSJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 302495ddefa2SHong Zhang PetscFunctionReturn(0); 302595ddefa2SHong Zhang } 30267ba1a0bfSKris Buschelman 30279371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) { 30284222ddf1SHong Zhang Mat_Product *product = C->product; 30294222ddf1SHong Zhang Mat A = product->A, P = product->B; 30302121bac1SHong Zhang 30312121bac1SHong Zhang PetscFunctionBegin; 30329566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 30332121bac1SHong Zhang PetscFunctionReturn(0); 30342121bac1SHong Zhang } 30352121bac1SHong Zhang 30369371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) { 303795ddefa2SHong Zhang PetscFunctionBegin; 30383e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 303995ddefa2SHong Zhang } 304095ddefa2SHong Zhang 30419371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) { 304295ddefa2SHong Zhang PetscFunctionBegin; 30433e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 30447ba1a0bfSKris Buschelman } 30455c65b9ecSFande Kong 3046bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 3047bc8e477aSFande Kong 30489371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) { 3049bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3050bc8e477aSFande Kong 3051bc8e477aSFande Kong PetscFunctionBegin; 305234bcad68SFande Kong 30539566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 3054bc8e477aSFande Kong PetscFunctionReturn(0); 3055bc8e477aSFande Kong } 3056bc8e477aSFande Kong 30574222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 3058bc8e477aSFande Kong 30599371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) { 3060bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3061bc8e477aSFande Kong 3062bc8e477aSFande Kong PetscFunctionBegin; 30639566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 30644222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3065bc8e477aSFande Kong PetscFunctionReturn(0); 3066bc8e477aSFande Kong } 3067bc8e477aSFande Kong 3068bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 3069bc8e477aSFande Kong 30709371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) { 3071bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3072bc8e477aSFande Kong 3073bc8e477aSFande Kong PetscFunctionBegin; 307434bcad68SFande Kong 30759566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 3076bc8e477aSFande Kong PetscFunctionReturn(0); 3077bc8e477aSFande Kong } 3078bc8e477aSFande Kong 30794222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 3080bc8e477aSFande Kong 30819371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) { 3082bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3083bc8e477aSFande Kong 3084bc8e477aSFande Kong PetscFunctionBegin; 308534bcad68SFande Kong 30869566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 30874222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3088bc8e477aSFande Kong PetscFunctionReturn(0); 3089bc8e477aSFande Kong } 3090bc8e477aSFande Kong 30919371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) { 30924222ddf1SHong Zhang Mat_Product *product = C->product; 30934222ddf1SHong Zhang Mat A = product->A, P = product->B; 30944222ddf1SHong Zhang PetscBool flg; 3095bc8e477aSFande Kong 3096bc8e477aSFande Kong PetscFunctionBegin; 30979566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 30984222ddf1SHong Zhang if (flg) { 30999566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 31004222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 31014222ddf1SHong Zhang PetscFunctionReturn(0); 3102bc8e477aSFande Kong } 310334bcad68SFande Kong 31049566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 31054222ddf1SHong Zhang if (flg) { 31069566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 31074222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 31084222ddf1SHong Zhang PetscFunctionReturn(0); 31094222ddf1SHong Zhang } 311034bcad68SFande Kong 31114222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 3112bc8e477aSFande Kong } 3113bc8e477aSFande Kong 31149371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 31159c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3116ceb03754SKris Buschelman Mat a = b->AIJ, B; 31179c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 31180fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 3119ba8c8a56SBarry Smith PetscInt *cols; 3120ba8c8a56SBarry Smith PetscScalar *vals; 31219c4fc4c7SBarry Smith 31229c4fc4c7SBarry Smith PetscFunctionBegin; 31239566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 31249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 31259c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 31269c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 312726fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 31289c4fc4c7SBarry Smith } 31299566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 31309566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 31319566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 31329566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 31339566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 31349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 31359c4fc4c7SBarry Smith ii = 0; 31369c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 31379566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 31380fd73130SBarry Smith for (j = 0; j < dof; j++) { 313926fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 31409566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 31419c4fc4c7SBarry Smith ii++; 31429c4fc4c7SBarry Smith } 31439566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 31449c4fc4c7SBarry Smith } 31459566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 31469566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 31479566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3148ceb03754SKris Buschelman 3149511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 31509566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 3151c3d102feSKris Buschelman } else { 3152ceb03754SKris Buschelman *newmat = B; 3153c3d102feSKris Buschelman } 31549c4fc4c7SBarry Smith PetscFunctionReturn(0); 31559c4fc4c7SBarry Smith } 31569c4fc4c7SBarry Smith 3157c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3158be1d678aSKris Buschelman 31599371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 31600fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 3161ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 31620fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 31630fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 31640fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 31650fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 31660298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 31670298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 31680fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 31690fd73130SBarry Smith PetscScalar *vals, *ovals; 31700fd73130SBarry Smith 31710fd73130SBarry Smith PetscFunctionBegin; 31729566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 3173d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 31740fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 31750fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 31760fd73130SBarry Smith for (j = 0; j < dof; j++) { 31770fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 31780fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 31790fd73130SBarry Smith } 31800fd73130SBarry Smith } 31819566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 31829566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 31839566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 31849566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 31859566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 31869566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 31870fd73130SBarry Smith 31889566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 3189d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 3190d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 31910fd73130SBarry Smith garray = mpiaij->garray; 31920fd73130SBarry Smith 31930fd73130SBarry Smith ii = rstart; 3194d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 31959566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 31969566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 31970fd73130SBarry Smith for (j = 0; j < dof; j++) { 31989371c9d4SSatish Balay for (k = 0; k < ncols; k++) { icols[k] = cstart + dof * cols[k] + j; } 31999371c9d4SSatish Balay for (k = 0; k < oncols; k++) { oicols[k] = dof * garray[ocols[k]] + j; } 32009566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 32019566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 32020fd73130SBarry Smith ii++; 32030fd73130SBarry Smith } 32049566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 32059566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 32060fd73130SBarry Smith } 32079566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 32080fd73130SBarry Smith 32099566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 32109566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3211ceb03754SKris Buschelman 3212511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32137adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 32147adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 321526fbe8dcSKarl Rupp 32169566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 321726fbe8dcSKarl Rupp 32187adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3219c3d102feSKris Buschelman } else { 3220ceb03754SKris Buschelman *newmat = B; 3221c3d102feSKris Buschelman } 32220fd73130SBarry Smith PetscFunctionReturn(0); 32230fd73130SBarry Smith } 32240fd73130SBarry Smith 32259371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 32269e516c8fSBarry Smith Mat A; 32279e516c8fSBarry Smith 32289e516c8fSBarry Smith PetscFunctionBegin; 32299566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 32309566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 32319566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 32329e516c8fSBarry Smith PetscFunctionReturn(0); 32339e516c8fSBarry Smith } 32340fd73130SBarry Smith 32359371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) { 3236ec626240SStefano Zampini Mat A; 3237ec626240SStefano Zampini 3238ec626240SStefano Zampini PetscFunctionBegin; 32399566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 32409566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 32419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 3242ec626240SStefano Zampini PetscFunctionReturn(0); 3243ec626240SStefano Zampini } 3244ec626240SStefano Zampini 3245bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3246480dffcfSJed Brown /*@ 32470bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 32480bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 32490bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 32500bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 32510bad9183SKris Buschelman 3252ff585edeSJed Brown Collective 3253ff585edeSJed Brown 3254ff585edeSJed Brown Input Parameters: 3255ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3256ff585edeSJed Brown - dof - the block size (number of components per node) 3257ff585edeSJed Brown 3258ff585edeSJed Brown Output Parameter: 3259ff585edeSJed Brown . maij - the new MAIJ matrix 3260ff585edeSJed Brown 32610bad9183SKris Buschelman Operations provided: 326267b8a455SSatish Balay .vb 326367b8a455SSatish Balay MatMult 326467b8a455SSatish Balay MatMultTranspose 326567b8a455SSatish Balay MatMultAdd 326667b8a455SSatish Balay MatMultTransposeAdd 326767b8a455SSatish Balay MatView 326867b8a455SSatish Balay .ve 32690bad9183SKris Buschelman 32700bad9183SKris Buschelman Level: advanced 32710bad9183SKris Buschelman 3272db781477SPatrick Sanan .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3273ff585edeSJed Brown @*/ 32749371c9d4SSatish Balay PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) { 3275b24ad042SBarry Smith PetscInt n; 327682b1193eSBarry Smith Mat B; 327790f67b22SBarry Smith PetscBool flg; 3278fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3279fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3280fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3281fdc842d1SBarry Smith #endif 328282b1193eSBarry Smith 328382b1193eSBarry Smith PetscFunctionBegin; 3284fdc842d1SBarry Smith dof = PetscAbs(dof); 32859566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 3286d72c5c08SBarry Smith 328726fbe8dcSKarl Rupp if (dof == 1) *maij = A; 328826fbe8dcSKarl Rupp else { 32899566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3290c248e2fdSStefano Zampini /* propagate vec type */ 32919566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 32929566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 32939566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 32949566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 32959566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 32969566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 329726fbe8dcSKarl Rupp 3298cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3299d72c5c08SBarry Smith 330090f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 330190f67b22SBarry Smith if (flg) { 3302feb9fe23SJed Brown Mat_SeqMAIJ *b; 3303feb9fe23SJed Brown 33049566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 330526fbe8dcSKarl Rupp 33060298fd71SBarry Smith B->ops->setup = NULL; 33074cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 33080fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 33094222ddf1SHong Zhang 3310feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 3311b9b97703SBarry Smith b->dof = dof; 33124cb9d9b8SBarry Smith b->AIJ = A; 331326fbe8dcSKarl Rupp 3314d72c5c08SBarry Smith if (dof == 2) { 3315cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3316cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3317cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3318cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3319bcc973b7SBarry Smith } else if (dof == 3) { 3320bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3321bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3322bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3323bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3324bcc973b7SBarry Smith } else if (dof == 4) { 3325bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3326bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3327bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3328bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3329f9fae5adSBarry Smith } else if (dof == 5) { 3330f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3331f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3332f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3333f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 33346cd98798SBarry Smith } else if (dof == 6) { 33356cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 33366cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 33376cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 33386cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3339ed8eea03SBarry Smith } else if (dof == 7) { 3340ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3341ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3342ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3343ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 334466ed3db0SBarry Smith } else if (dof == 8) { 334566ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 334666ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 334766ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 334866ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 33492b692628SMatthew Knepley } else if (dof == 9) { 33502b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 33512b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 33522b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 33532b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3354b9cda22cSBarry Smith } else if (dof == 10) { 3355b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3356b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3357b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3358b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3359dbdd7285SBarry Smith } else if (dof == 11) { 3360dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3361dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3362dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3363dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 33642f7816d4SBarry Smith } else if (dof == 16) { 33652f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 33662f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 33672f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 33682f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3369ed1c418cSBarry Smith } else if (dof == 18) { 3370ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3371ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3372ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3373ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 337482b1193eSBarry Smith } else { 33756861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 33766861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 33776861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 33786861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 337982b1193eSBarry Smith } 3380fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 33819566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 3382fdc842d1SBarry Smith #endif 33839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 33849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3385f4a53059SBarry Smith } else { 3386f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3387feb9fe23SJed Brown Mat_MPIMAIJ *b; 3388f4a53059SBarry Smith IS from, to; 3389f4a53059SBarry Smith Vec gvec; 3390f4a53059SBarry Smith 33919566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 339226fbe8dcSKarl Rupp 33930298fd71SBarry Smith B->ops->setup = NULL; 3394d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 33950fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 339626fbe8dcSKarl Rupp 3397b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 3398b9b97703SBarry Smith b->dof = dof; 3399b9b97703SBarry Smith b->A = A; 340026fbe8dcSKarl Rupp 34019566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 34029566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 34034cb9d9b8SBarry Smith 34049566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 34059566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 34069566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 34079566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 34089566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 3409f4a53059SBarry Smith 3410f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 34119566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 34129566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 3413f4a53059SBarry Smith 3414f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 34159566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 3416f4a53059SBarry Smith 3417f4a53059SBarry Smith /* generate the scatter context */ 34189566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 3419f4a53059SBarry Smith 34209566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 34219566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 34229566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 3423f4a53059SBarry Smith 3424f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 34254cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 34264cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 34274cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 342826fbe8dcSKarl Rupp 3429fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 34309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 3431fdc842d1SBarry Smith #endif 34329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 34339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3434f4a53059SBarry Smith } 34357dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3436ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 34379566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3438fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3439fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 3440fdc842d1SBarry Smith { 3441fdc842d1SBarry Smith PetscBool flg; 3442fdc842d1SBarry Smith if (convert) { 34439566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 3444*48a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 3445fdc842d1SBarry Smith } 3446fdc842d1SBarry Smith } 3447fdc842d1SBarry Smith #endif 3448cd3bbe55SBarry Smith *maij = B; 3449d72c5c08SBarry Smith } 345082b1193eSBarry Smith PetscFunctionReturn(0); 345182b1193eSBarry Smith } 3452