1be1d678aSKris Buschelman 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 2182b1193eSBarry Smith 22b350b9acSSatish Balay /*@ 23ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24ff585edeSJed Brown 25ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26ff585edeSJed Brown 27ff585edeSJed Brown Input Parameter: 28ff585edeSJed Brown . A - the MAIJ matrix 29ff585edeSJed Brown 30ff585edeSJed Brown Output Parameter: 31ff585edeSJed Brown . B - the AIJ matrix 32ff585edeSJed Brown 33ff585edeSJed Brown Level: advanced 34ff585edeSJed Brown 3595452b02SPatrick Sanan Notes: 3695452b02SPatrick Sanan The reference count on the AIJ matrix is not increased so you should not destroy it. 37ff585edeSJed Brown 38ff585edeSJed Brown .seealso: MatCreateMAIJ() 39ff585edeSJed Brown @*/ 407087cfbeSBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41b9b97703SBarry Smith { 42ace3abfcSBarry Smith PetscBool ismpimaij,isseqmaij; 43b9b97703SBarry Smith 44b9b97703SBarry Smith PetscFunctionBegin; 459566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij)); 469566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij)); 47b9b97703SBarry Smith if (ismpimaij) { 48b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 49b9b97703SBarry Smith 50b9b97703SBarry Smith *B = b->A; 51b9b97703SBarry Smith } else if (isseqmaij) { 52b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 53b9b97703SBarry Smith 54b9b97703SBarry Smith *B = b->AIJ; 55b9b97703SBarry Smith } else { 56b9b97703SBarry Smith *B = A; 57b9b97703SBarry Smith } 58b9b97703SBarry Smith PetscFunctionReturn(0); 59b9b97703SBarry Smith } 60b9b97703SBarry Smith 61480dffcfSJed Brown /*@ 62ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 63ff585edeSJed Brown 643f9fe445SBarry Smith Logically Collective 65ff585edeSJed Brown 66d8d19677SJose E. Roman Input Parameters: 67ff585edeSJed Brown + A - the MAIJ matrix 68ff585edeSJed Brown - dof - the block size for the new matrix 69ff585edeSJed Brown 70ff585edeSJed Brown Output Parameter: 71ff585edeSJed Brown . B - the new MAIJ matrix 72ff585edeSJed Brown 73ff585edeSJed Brown Level: advanced 74ff585edeSJed Brown 75ff585edeSJed Brown .seealso: MatCreateMAIJ() 76ff585edeSJed Brown @*/ 777087cfbeSBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 78b9b97703SBarry Smith { 790298fd71SBarry Smith Mat Aij = NULL; 80b9b97703SBarry Smith 81b9b97703SBarry Smith PetscFunctionBegin; 82c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A,dof,2); 839566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A,&Aij)); 849566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij,dof,B)); 85b9b97703SBarry Smith PetscFunctionReturn(0); 86b9b97703SBarry Smith } 87b9b97703SBarry Smith 88dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 8982b1193eSBarry Smith { 904cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9182b1193eSBarry Smith 9282b1193eSBarry Smith PetscFunctionBegin; 939566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 949566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL)); 969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL)); 974cb9d9b8SBarry Smith PetscFunctionReturn(0); 984cb9d9b8SBarry Smith } 994cb9d9b8SBarry Smith 100356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A) 101356c569eSBarry Smith { 102356c569eSBarry Smith PetscFunctionBegin; 103ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 104356c569eSBarry Smith } 105356c569eSBarry Smith 1060fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1070fd73130SBarry Smith { 1080fd73130SBarry Smith Mat B; 1090fd73130SBarry Smith 1100fd73130SBarry Smith PetscFunctionBegin; 1119566063dSJacob Faibussowitsch PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1129566063dSJacob Faibussowitsch PetscCall(MatView(B,viewer)); 1139566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1140fd73130SBarry Smith PetscFunctionReturn(0); 1150fd73130SBarry Smith } 1160fd73130SBarry Smith 1170fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1180fd73130SBarry Smith { 1190fd73130SBarry Smith Mat B; 1200fd73130SBarry Smith 1210fd73130SBarry Smith PetscFunctionBegin; 1229566063dSJacob Faibussowitsch PetscCall(MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B)); 1239566063dSJacob Faibussowitsch PetscCall(MatView(B,viewer)); 1249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1250fd73130SBarry Smith PetscFunctionReturn(0); 1260fd73130SBarry Smith } 1270fd73130SBarry Smith 128dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1294cb9d9b8SBarry Smith { 1304cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1314cb9d9b8SBarry Smith 1324cb9d9b8SBarry Smith PetscFunctionBegin; 1339566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1349566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1359566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1369566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1379566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1389566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL)); 1409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL)); 1419566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL)); 142b9b97703SBarry Smith PetscFunctionReturn(0); 143b9b97703SBarry Smith } 144b9b97703SBarry Smith 1450bad9183SKris Buschelman /*MC 146fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1470bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1480bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1490bad9183SKris Buschelman 1500bad9183SKris Buschelman Operations provided: 15167b8a455SSatish Balay .vb 15267b8a455SSatish Balay MatMult 15367b8a455SSatish Balay MatMultTranspose 15467b8a455SSatish Balay MatMultAdd 15567b8a455SSatish Balay MatMultTransposeAdd 15667b8a455SSatish Balay .ve 1570bad9183SKris Buschelman 1580bad9183SKris Buschelman Level: advanced 1590bad9183SKris Buschelman 160b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 1610bad9183SKris Buschelman M*/ 1620bad9183SKris Buschelman 1638cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 16482b1193eSBarry Smith { 1654cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 16664b52464SHong Zhang PetscMPIInt size; 16782b1193eSBarry Smith 16882b1193eSBarry Smith PetscFunctionBegin; 1699566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A,&b)); 170b0a32e0cSBarry Smith A->data = (void*)b; 17126fbe8dcSKarl Rupp 1729566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps))); 17326fbe8dcSKarl Rupp 174356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 175f4a53059SBarry Smith 176f4259b30SLisandro Dalcin b->AIJ = NULL; 177cd3bbe55SBarry Smith b->dof = 0; 178f4259b30SLisandro Dalcin b->OAIJ = NULL; 179f4259b30SLisandro Dalcin b->ctx = NULL; 180f4259b30SLisandro Dalcin b->w = NULL; 1819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 18264b52464SHong Zhang if (size == 1) { 1839566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ)); 18464b52464SHong Zhang } else { 1859566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ)); 18664b52464SHong Zhang } 18732e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 18832e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 18982b1193eSBarry Smith PetscFunctionReturn(0); 19082b1193eSBarry Smith } 19182b1193eSBarry Smith 192cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 193dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 19482b1193eSBarry Smith { 1954cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 196bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 197d2840e09SBarry Smith const PetscScalar *x,*v; 198d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 199d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 200d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 20182b1193eSBarry Smith 202bcc973b7SBarry Smith PetscFunctionBegin; 2039566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2049566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 205bcc973b7SBarry Smith idx = a->j; 206bcc973b7SBarry Smith v = a->a; 207bcc973b7SBarry Smith ii = a->i; 208bcc973b7SBarry Smith 209bcc973b7SBarry Smith for (i=0; i<m; i++) { 210bcc973b7SBarry Smith jrow = ii[i]; 211bcc973b7SBarry Smith n = ii[i+1] - jrow; 212bcc973b7SBarry Smith sum1 = 0.0; 213bcc973b7SBarry Smith sum2 = 0.0; 21426fbe8dcSKarl Rupp 215b3c51c66SMatthew Knepley nonzerorow += (n>0); 216bcc973b7SBarry Smith for (j=0; j<n; j++) { 217bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 218bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 219bcc973b7SBarry Smith jrow++; 220bcc973b7SBarry Smith } 221bcc973b7SBarry Smith y[2*i] = sum1; 222bcc973b7SBarry Smith y[2*i+1] = sum2; 223bcc973b7SBarry Smith } 224bcc973b7SBarry Smith 2259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0*a->nz - 2.0*nonzerorow)); 2269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 2279566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 22882b1193eSBarry Smith PetscFunctionReturn(0); 22982b1193eSBarry Smith } 230bcc973b7SBarry Smith 231dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 23282b1193eSBarry Smith { 2334cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 234bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 235d2840e09SBarry Smith const PetscScalar *x,*v; 236d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 237d2840e09SBarry Smith PetscInt n,i; 238d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 23982b1193eSBarry Smith 240bcc973b7SBarry Smith PetscFunctionBegin; 2419566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 2429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2439566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 244bfec09a0SHong Zhang 245bcc973b7SBarry Smith for (i=0; i<m; i++) { 246bfec09a0SHong Zhang idx = a->j + a->i[i]; 247bfec09a0SHong Zhang v = a->a + a->i[i]; 248bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 249bcc973b7SBarry Smith alpha1 = x[2*i]; 250bcc973b7SBarry Smith alpha2 = x[2*i+1]; 25126fbe8dcSKarl Rupp while (n-->0) { 25226fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 25326fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 25426fbe8dcSKarl Rupp idx++; v++; 25526fbe8dcSKarl Rupp } 256bcc973b7SBarry Smith } 2579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0*a->nz)); 2589566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 2599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 26082b1193eSBarry Smith PetscFunctionReturn(0); 26182b1193eSBarry Smith } 262bcc973b7SBarry Smith 263dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 26482b1193eSBarry Smith { 2654cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 266bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 267d2840e09SBarry Smith const PetscScalar *x,*v; 268d2840e09SBarry Smith PetscScalar *y,sum1, sum2; 269b24ad042SBarry Smith PetscInt n,i,jrow,j; 270d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27182b1193eSBarry Smith 272bcc973b7SBarry Smith PetscFunctionBegin; 2739566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 2749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2759566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 276bcc973b7SBarry Smith idx = a->j; 277bcc973b7SBarry Smith v = a->a; 278bcc973b7SBarry Smith ii = a->i; 279bcc973b7SBarry Smith 280bcc973b7SBarry Smith for (i=0; i<m; i++) { 281bcc973b7SBarry Smith jrow = ii[i]; 282bcc973b7SBarry Smith n = ii[i+1] - jrow; 283bcc973b7SBarry Smith sum1 = 0.0; 284bcc973b7SBarry Smith sum2 = 0.0; 285bcc973b7SBarry Smith for (j=0; j<n; j++) { 286bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 287bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 288bcc973b7SBarry Smith jrow++; 289bcc973b7SBarry Smith } 290bcc973b7SBarry Smith y[2*i] += sum1; 291bcc973b7SBarry Smith y[2*i+1] += sum2; 292bcc973b7SBarry Smith } 293bcc973b7SBarry Smith 2949566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0*a->nz)); 2959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 2969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 297bcc973b7SBarry Smith PetscFunctionReturn(0); 29882b1193eSBarry Smith } 299dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 30082b1193eSBarry Smith { 3014cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 302bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 303d2840e09SBarry Smith const PetscScalar *x,*v; 304d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 305d2840e09SBarry Smith PetscInt n,i; 306d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 30782b1193eSBarry Smith 308bcc973b7SBarry Smith PetscFunctionBegin; 3099566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 3109566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 3119566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 312bfec09a0SHong Zhang 313bcc973b7SBarry Smith for (i=0; i<m; i++) { 314bfec09a0SHong Zhang idx = a->j + a->i[i]; 315bfec09a0SHong Zhang v = a->a + a->i[i]; 316bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 317bcc973b7SBarry Smith alpha1 = x[2*i]; 318bcc973b7SBarry Smith alpha2 = x[2*i+1]; 31926fbe8dcSKarl Rupp while (n-->0) { 32026fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 32126fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 32226fbe8dcSKarl Rupp idx++; v++; 32326fbe8dcSKarl Rupp } 324bcc973b7SBarry Smith } 3259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0*a->nz)); 3269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 3279566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 328bcc973b7SBarry Smith PetscFunctionReturn(0); 32982b1193eSBarry Smith } 330cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 331dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 332bcc973b7SBarry Smith { 3334cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 334bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 335d2840e09SBarry Smith const PetscScalar *x,*v; 336d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 337d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 338d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 33982b1193eSBarry Smith 340bcc973b7SBarry Smith PetscFunctionBegin; 3419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 3429566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 343bcc973b7SBarry Smith idx = a->j; 344bcc973b7SBarry Smith v = a->a; 345bcc973b7SBarry Smith ii = a->i; 346bcc973b7SBarry Smith 347bcc973b7SBarry Smith for (i=0; i<m; i++) { 348bcc973b7SBarry Smith jrow = ii[i]; 349bcc973b7SBarry Smith n = ii[i+1] - jrow; 350bcc973b7SBarry Smith sum1 = 0.0; 351bcc973b7SBarry Smith sum2 = 0.0; 352bcc973b7SBarry Smith sum3 = 0.0; 35326fbe8dcSKarl Rupp 354b3c51c66SMatthew Knepley nonzerorow += (n>0); 355bcc973b7SBarry Smith for (j=0; j<n; j++) { 356bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 357bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 358bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 359bcc973b7SBarry Smith jrow++; 360bcc973b7SBarry Smith } 361bcc973b7SBarry Smith y[3*i] = sum1; 362bcc973b7SBarry Smith y[3*i+1] = sum2; 363bcc973b7SBarry Smith y[3*i+2] = sum3; 364bcc973b7SBarry Smith } 365bcc973b7SBarry Smith 3669566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0*a->nz - 3.0*nonzerorow)); 3679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 3689566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 369bcc973b7SBarry Smith PetscFunctionReturn(0); 370bcc973b7SBarry Smith } 371bcc973b7SBarry Smith 372dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 373bcc973b7SBarry Smith { 3744cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 375bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 376d2840e09SBarry Smith const PetscScalar *x,*v; 377d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 378d2840e09SBarry Smith PetscInt n,i; 379d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 380bcc973b7SBarry Smith 381bcc973b7SBarry Smith PetscFunctionBegin; 3829566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 3839566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 3849566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 385bfec09a0SHong Zhang 386bcc973b7SBarry Smith for (i=0; i<m; i++) { 387bfec09a0SHong Zhang idx = a->j + a->i[i]; 388bfec09a0SHong Zhang v = a->a + a->i[i]; 389bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 390bcc973b7SBarry Smith alpha1 = x[3*i]; 391bcc973b7SBarry Smith alpha2 = x[3*i+1]; 392bcc973b7SBarry Smith alpha3 = x[3*i+2]; 393bcc973b7SBarry Smith while (n-->0) { 394bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 395bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 396bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 397bcc973b7SBarry Smith idx++; v++; 398bcc973b7SBarry Smith } 399bcc973b7SBarry Smith } 4009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0*a->nz)); 4019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4029566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 403bcc973b7SBarry Smith PetscFunctionReturn(0); 404bcc973b7SBarry Smith } 405bcc973b7SBarry Smith 406dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 407bcc973b7SBarry Smith { 4084cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 409bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 410d2840e09SBarry Smith const PetscScalar *x,*v; 411d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 412b24ad042SBarry Smith PetscInt n,i,jrow,j; 413d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 414bcc973b7SBarry Smith 415bcc973b7SBarry Smith PetscFunctionBegin; 4169566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 4179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4189566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 419bcc973b7SBarry Smith idx = a->j; 420bcc973b7SBarry Smith v = a->a; 421bcc973b7SBarry Smith ii = a->i; 422bcc973b7SBarry Smith 423bcc973b7SBarry Smith for (i=0; i<m; i++) { 424bcc973b7SBarry Smith jrow = ii[i]; 425bcc973b7SBarry Smith n = ii[i+1] - jrow; 426bcc973b7SBarry Smith sum1 = 0.0; 427bcc973b7SBarry Smith sum2 = 0.0; 428bcc973b7SBarry Smith sum3 = 0.0; 429bcc973b7SBarry Smith for (j=0; j<n; j++) { 430bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 431bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 432bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 433bcc973b7SBarry Smith jrow++; 434bcc973b7SBarry Smith } 435bcc973b7SBarry Smith y[3*i] += sum1; 436bcc973b7SBarry Smith y[3*i+1] += sum2; 437bcc973b7SBarry Smith y[3*i+2] += sum3; 438bcc973b7SBarry Smith } 439bcc973b7SBarry Smith 4409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0*a->nz)); 4419566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 443bcc973b7SBarry Smith PetscFunctionReturn(0); 444bcc973b7SBarry Smith } 445dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 446bcc973b7SBarry Smith { 4474cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 448bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 449d2840e09SBarry Smith const PetscScalar *x,*v; 450d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 451d2840e09SBarry Smith PetscInt n,i; 452d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 453bcc973b7SBarry Smith 454bcc973b7SBarry Smith PetscFunctionBegin; 4559566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 4569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4579566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 458bcc973b7SBarry Smith for (i=0; i<m; i++) { 459bfec09a0SHong Zhang idx = a->j + a->i[i]; 460bfec09a0SHong Zhang v = a->a + a->i[i]; 461bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 462bcc973b7SBarry Smith alpha1 = x[3*i]; 463bcc973b7SBarry Smith alpha2 = x[3*i+1]; 464bcc973b7SBarry Smith alpha3 = x[3*i+2]; 465bcc973b7SBarry Smith while (n-->0) { 466bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 467bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 468bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 469bcc973b7SBarry Smith idx++; v++; 470bcc973b7SBarry Smith } 471bcc973b7SBarry Smith } 4729566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0*a->nz)); 4739566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 475bcc973b7SBarry Smith PetscFunctionReturn(0); 476bcc973b7SBarry Smith } 477bcc973b7SBarry Smith 478bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 479dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 480bcc973b7SBarry Smith { 4814cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 482bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 483d2840e09SBarry Smith const PetscScalar *x,*v; 484d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 485d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 486d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 487bcc973b7SBarry Smith 488bcc973b7SBarry Smith PetscFunctionBegin; 4899566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4909566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 491bcc973b7SBarry Smith idx = a->j; 492bcc973b7SBarry Smith v = a->a; 493bcc973b7SBarry Smith ii = a->i; 494bcc973b7SBarry Smith 495bcc973b7SBarry Smith for (i=0; i<m; i++) { 496bcc973b7SBarry Smith jrow = ii[i]; 497bcc973b7SBarry Smith n = ii[i+1] - jrow; 498bcc973b7SBarry Smith sum1 = 0.0; 499bcc973b7SBarry Smith sum2 = 0.0; 500bcc973b7SBarry Smith sum3 = 0.0; 501bcc973b7SBarry Smith sum4 = 0.0; 502b3c51c66SMatthew Knepley nonzerorow += (n>0); 503bcc973b7SBarry Smith for (j=0; j<n; j++) { 504bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 505bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 506bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 507bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 508bcc973b7SBarry Smith jrow++; 509bcc973b7SBarry Smith } 510bcc973b7SBarry Smith y[4*i] = sum1; 511bcc973b7SBarry Smith y[4*i+1] = sum2; 512bcc973b7SBarry Smith y[4*i+2] = sum3; 513bcc973b7SBarry Smith y[4*i+3] = sum4; 514bcc973b7SBarry Smith } 515bcc973b7SBarry Smith 5169566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0*a->nz - 4.0*nonzerorow)); 5179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 5189566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 519bcc973b7SBarry Smith PetscFunctionReturn(0); 520bcc973b7SBarry Smith } 521bcc973b7SBarry Smith 522dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 523bcc973b7SBarry Smith { 5244cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 525bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 526d2840e09SBarry Smith const PetscScalar *x,*v; 527d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 528d2840e09SBarry Smith PetscInt n,i; 529d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 530bcc973b7SBarry Smith 531bcc973b7SBarry Smith PetscFunctionBegin; 5329566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 5339566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 5349566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 535bcc973b7SBarry Smith for (i=0; i<m; i++) { 536bfec09a0SHong Zhang idx = a->j + a->i[i]; 537bfec09a0SHong Zhang v = a->a + a->i[i]; 538bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 539bcc973b7SBarry Smith alpha1 = x[4*i]; 540bcc973b7SBarry Smith alpha2 = x[4*i+1]; 541bcc973b7SBarry Smith alpha3 = x[4*i+2]; 542bcc973b7SBarry Smith alpha4 = x[4*i+3]; 543bcc973b7SBarry Smith while (n-->0) { 544bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 545bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 546bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 547bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 548bcc973b7SBarry Smith idx++; v++; 549bcc973b7SBarry Smith } 550bcc973b7SBarry Smith } 5519566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0*a->nz)); 5529566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 5539566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 554bcc973b7SBarry Smith PetscFunctionReturn(0); 555bcc973b7SBarry Smith } 556bcc973b7SBarry Smith 557dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 558bcc973b7SBarry Smith { 5594cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 560f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 561d2840e09SBarry Smith const PetscScalar *x,*v; 562d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 563b24ad042SBarry Smith PetscInt n,i,jrow,j; 564d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 565f9fae5adSBarry Smith 566f9fae5adSBarry Smith PetscFunctionBegin; 5679566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 5689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 5699566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 570f9fae5adSBarry Smith idx = a->j; 571f9fae5adSBarry Smith v = a->a; 572f9fae5adSBarry Smith ii = a->i; 573f9fae5adSBarry Smith 574f9fae5adSBarry Smith for (i=0; i<m; i++) { 575f9fae5adSBarry Smith jrow = ii[i]; 576f9fae5adSBarry Smith n = ii[i+1] - jrow; 577f9fae5adSBarry Smith sum1 = 0.0; 578f9fae5adSBarry Smith sum2 = 0.0; 579f9fae5adSBarry Smith sum3 = 0.0; 580f9fae5adSBarry Smith sum4 = 0.0; 581f9fae5adSBarry Smith for (j=0; j<n; j++) { 582f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 583f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 584f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 585f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 586f9fae5adSBarry Smith jrow++; 587f9fae5adSBarry Smith } 588f9fae5adSBarry Smith y[4*i] += sum1; 589f9fae5adSBarry Smith y[4*i+1] += sum2; 590f9fae5adSBarry Smith y[4*i+2] += sum3; 591f9fae5adSBarry Smith y[4*i+3] += sum4; 592f9fae5adSBarry Smith } 593f9fae5adSBarry Smith 5949566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0*a->nz)); 5959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 5969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 597f9fae5adSBarry Smith PetscFunctionReturn(0); 598bcc973b7SBarry Smith } 599dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 600bcc973b7SBarry Smith { 6014cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 602f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 603d2840e09SBarry Smith const PetscScalar *x,*v; 604d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 605d2840e09SBarry Smith PetscInt n,i; 606d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 607f9fae5adSBarry Smith 608f9fae5adSBarry Smith PetscFunctionBegin; 6099566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 6109566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 6119566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 612bfec09a0SHong Zhang 613f9fae5adSBarry Smith for (i=0; i<m; i++) { 614bfec09a0SHong Zhang idx = a->j + a->i[i]; 615bfec09a0SHong Zhang v = a->a + a->i[i]; 616f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 617f9fae5adSBarry Smith alpha1 = x[4*i]; 618f9fae5adSBarry Smith alpha2 = x[4*i+1]; 619f9fae5adSBarry Smith alpha3 = x[4*i+2]; 620f9fae5adSBarry Smith alpha4 = x[4*i+3]; 621f9fae5adSBarry Smith while (n-->0) { 622f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 623f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 624f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 625f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 626f9fae5adSBarry Smith idx++; v++; 627f9fae5adSBarry Smith } 628f9fae5adSBarry Smith } 6299566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0*a->nz)); 6309566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 6319566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 632f9fae5adSBarry Smith PetscFunctionReturn(0); 633f9fae5adSBarry Smith } 634f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6356cd98798SBarry Smith 636dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 637f9fae5adSBarry Smith { 6384cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 639f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 640d2840e09SBarry Smith const PetscScalar *x,*v; 641d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 642d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 643d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 644f9fae5adSBarry Smith 645f9fae5adSBarry Smith PetscFunctionBegin; 6469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 6479566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 648f9fae5adSBarry Smith idx = a->j; 649f9fae5adSBarry Smith v = a->a; 650f9fae5adSBarry Smith ii = a->i; 651f9fae5adSBarry Smith 652f9fae5adSBarry Smith for (i=0; i<m; i++) { 653f9fae5adSBarry Smith jrow = ii[i]; 654f9fae5adSBarry Smith n = ii[i+1] - jrow; 655f9fae5adSBarry Smith sum1 = 0.0; 656f9fae5adSBarry Smith sum2 = 0.0; 657f9fae5adSBarry Smith sum3 = 0.0; 658f9fae5adSBarry Smith sum4 = 0.0; 659f9fae5adSBarry Smith sum5 = 0.0; 66026fbe8dcSKarl Rupp 661b3c51c66SMatthew Knepley nonzerorow += (n>0); 662f9fae5adSBarry Smith for (j=0; j<n; j++) { 663f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 664f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 665f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 666f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 667f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 668f9fae5adSBarry Smith jrow++; 669f9fae5adSBarry Smith } 670f9fae5adSBarry Smith y[5*i] = sum1; 671f9fae5adSBarry Smith y[5*i+1] = sum2; 672f9fae5adSBarry Smith y[5*i+2] = sum3; 673f9fae5adSBarry Smith y[5*i+3] = sum4; 674f9fae5adSBarry Smith y[5*i+4] = sum5; 675f9fae5adSBarry Smith } 676f9fae5adSBarry Smith 6779566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0*a->nz - 5.0*nonzerorow)); 6789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 6799566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 680f9fae5adSBarry Smith PetscFunctionReturn(0); 681f9fae5adSBarry Smith } 682f9fae5adSBarry Smith 683dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 684f9fae5adSBarry Smith { 6854cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 686f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 687d2840e09SBarry Smith const PetscScalar *x,*v; 688d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 689d2840e09SBarry Smith PetscInt n,i; 690d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 691f9fae5adSBarry Smith 692f9fae5adSBarry Smith PetscFunctionBegin; 6939566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 6949566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 6959566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 696bfec09a0SHong Zhang 697f9fae5adSBarry Smith for (i=0; i<m; i++) { 698bfec09a0SHong Zhang idx = a->j + a->i[i]; 699bfec09a0SHong Zhang v = a->a + a->i[i]; 700f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 701f9fae5adSBarry Smith alpha1 = x[5*i]; 702f9fae5adSBarry Smith alpha2 = x[5*i+1]; 703f9fae5adSBarry Smith alpha3 = x[5*i+2]; 704f9fae5adSBarry Smith alpha4 = x[5*i+3]; 705f9fae5adSBarry Smith alpha5 = x[5*i+4]; 706f9fae5adSBarry Smith while (n-->0) { 707f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 708f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 709f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 710f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 711f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 712f9fae5adSBarry Smith idx++; v++; 713f9fae5adSBarry Smith } 714f9fae5adSBarry Smith } 7159566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0*a->nz)); 7169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 7179566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 718f9fae5adSBarry Smith PetscFunctionReturn(0); 719f9fae5adSBarry Smith } 720f9fae5adSBarry Smith 721dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 722f9fae5adSBarry Smith { 7234cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 724f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 725d2840e09SBarry Smith const PetscScalar *x,*v; 726d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 727b24ad042SBarry Smith PetscInt n,i,jrow,j; 728d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 729f9fae5adSBarry Smith 730f9fae5adSBarry Smith PetscFunctionBegin; 7319566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 7329566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 7339566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 734f9fae5adSBarry Smith idx = a->j; 735f9fae5adSBarry Smith v = a->a; 736f9fae5adSBarry Smith ii = a->i; 737f9fae5adSBarry Smith 738f9fae5adSBarry Smith for (i=0; i<m; i++) { 739f9fae5adSBarry Smith jrow = ii[i]; 740f9fae5adSBarry Smith n = ii[i+1] - jrow; 741f9fae5adSBarry Smith sum1 = 0.0; 742f9fae5adSBarry Smith sum2 = 0.0; 743f9fae5adSBarry Smith sum3 = 0.0; 744f9fae5adSBarry Smith sum4 = 0.0; 745f9fae5adSBarry Smith sum5 = 0.0; 746f9fae5adSBarry Smith for (j=0; j<n; j++) { 747f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 748f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 749f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 750f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 751f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 752f9fae5adSBarry Smith jrow++; 753f9fae5adSBarry Smith } 754f9fae5adSBarry Smith y[5*i] += sum1; 755f9fae5adSBarry Smith y[5*i+1] += sum2; 756f9fae5adSBarry Smith y[5*i+2] += sum3; 757f9fae5adSBarry Smith y[5*i+3] += sum4; 758f9fae5adSBarry Smith y[5*i+4] += sum5; 759f9fae5adSBarry Smith } 760f9fae5adSBarry Smith 7619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0*a->nz)); 7629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 7639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 764f9fae5adSBarry Smith PetscFunctionReturn(0); 765f9fae5adSBarry Smith } 766f9fae5adSBarry Smith 767dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 768f9fae5adSBarry Smith { 7694cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 770f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 771d2840e09SBarry Smith const PetscScalar *x,*v; 772d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 773d2840e09SBarry Smith PetscInt n,i; 774d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 775f9fae5adSBarry Smith 776f9fae5adSBarry Smith PetscFunctionBegin; 7779566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 7789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 7799566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 780bfec09a0SHong Zhang 781f9fae5adSBarry Smith for (i=0; i<m; i++) { 782bfec09a0SHong Zhang idx = a->j + a->i[i]; 783bfec09a0SHong Zhang v = a->a + a->i[i]; 784f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 785f9fae5adSBarry Smith alpha1 = x[5*i]; 786f9fae5adSBarry Smith alpha2 = x[5*i+1]; 787f9fae5adSBarry Smith alpha3 = x[5*i+2]; 788f9fae5adSBarry Smith alpha4 = x[5*i+3]; 789f9fae5adSBarry Smith alpha5 = x[5*i+4]; 790f9fae5adSBarry Smith while (n-->0) { 791f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 792f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 793f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 794f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 795f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 796f9fae5adSBarry Smith idx++; v++; 797f9fae5adSBarry Smith } 798f9fae5adSBarry Smith } 7999566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0*a->nz)); 8009566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 8019566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 802f9fae5adSBarry Smith PetscFunctionReturn(0); 803bcc973b7SBarry Smith } 804bcc973b7SBarry Smith 8056cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 806dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8076cd98798SBarry Smith { 8086cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8096cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 810d2840e09SBarry Smith const PetscScalar *x,*v; 811d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 812d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 813d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 8146cd98798SBarry Smith 8156cd98798SBarry Smith PetscFunctionBegin; 8169566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 8179566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 8186cd98798SBarry Smith idx = a->j; 8196cd98798SBarry Smith v = a->a; 8206cd98798SBarry Smith ii = a->i; 8216cd98798SBarry Smith 8226cd98798SBarry Smith for (i=0; i<m; i++) { 8236cd98798SBarry Smith jrow = ii[i]; 8246cd98798SBarry Smith n = ii[i+1] - jrow; 8256cd98798SBarry Smith sum1 = 0.0; 8266cd98798SBarry Smith sum2 = 0.0; 8276cd98798SBarry Smith sum3 = 0.0; 8286cd98798SBarry Smith sum4 = 0.0; 8296cd98798SBarry Smith sum5 = 0.0; 8306cd98798SBarry Smith sum6 = 0.0; 83126fbe8dcSKarl Rupp 832b3c51c66SMatthew Knepley nonzerorow += (n>0); 8336cd98798SBarry Smith for (j=0; j<n; j++) { 8346cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8356cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8366cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8376cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8386cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8396cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8406cd98798SBarry Smith jrow++; 8416cd98798SBarry Smith } 8426cd98798SBarry Smith y[6*i] = sum1; 8436cd98798SBarry Smith y[6*i+1] = sum2; 8446cd98798SBarry Smith y[6*i+2] = sum3; 8456cd98798SBarry Smith y[6*i+3] = sum4; 8466cd98798SBarry Smith y[6*i+4] = sum5; 8476cd98798SBarry Smith y[6*i+5] = sum6; 8486cd98798SBarry Smith } 8496cd98798SBarry Smith 8509566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0*a->nz - 6.0*nonzerorow)); 8519566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 8529566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 8536cd98798SBarry Smith PetscFunctionReturn(0); 8546cd98798SBarry Smith } 8556cd98798SBarry Smith 856dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8576cd98798SBarry Smith { 8586cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8596cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 860d2840e09SBarry Smith const PetscScalar *x,*v; 861d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 862d2840e09SBarry Smith PetscInt n,i; 863d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 8646cd98798SBarry Smith 8656cd98798SBarry Smith PetscFunctionBegin; 8669566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 8679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 8689566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 869bfec09a0SHong Zhang 8706cd98798SBarry Smith for (i=0; i<m; i++) { 871bfec09a0SHong Zhang idx = a->j + a->i[i]; 872bfec09a0SHong Zhang v = a->a + a->i[i]; 8736cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8746cd98798SBarry Smith alpha1 = x[6*i]; 8756cd98798SBarry Smith alpha2 = x[6*i+1]; 8766cd98798SBarry Smith alpha3 = x[6*i+2]; 8776cd98798SBarry Smith alpha4 = x[6*i+3]; 8786cd98798SBarry Smith alpha5 = x[6*i+4]; 8796cd98798SBarry Smith alpha6 = x[6*i+5]; 8806cd98798SBarry Smith while (n-->0) { 8816cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8826cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8836cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8846cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8856cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8866cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8876cd98798SBarry Smith idx++; v++; 8886cd98798SBarry Smith } 8896cd98798SBarry Smith } 8909566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0*a->nz)); 8919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 8929566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 8936cd98798SBarry Smith PetscFunctionReturn(0); 8946cd98798SBarry Smith } 8956cd98798SBarry Smith 896dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8976cd98798SBarry Smith { 8986cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8996cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 900d2840e09SBarry Smith const PetscScalar *x,*v; 901d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 902b24ad042SBarry Smith PetscInt n,i,jrow,j; 903d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 9046cd98798SBarry Smith 9056cd98798SBarry Smith PetscFunctionBegin; 9069566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 9079566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 9089566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 9096cd98798SBarry Smith idx = a->j; 9106cd98798SBarry Smith v = a->a; 9116cd98798SBarry Smith ii = a->i; 9126cd98798SBarry Smith 9136cd98798SBarry Smith for (i=0; i<m; i++) { 9146cd98798SBarry Smith jrow = ii[i]; 9156cd98798SBarry Smith n = ii[i+1] - jrow; 9166cd98798SBarry Smith sum1 = 0.0; 9176cd98798SBarry Smith sum2 = 0.0; 9186cd98798SBarry Smith sum3 = 0.0; 9196cd98798SBarry Smith sum4 = 0.0; 9206cd98798SBarry Smith sum5 = 0.0; 9216cd98798SBarry Smith sum6 = 0.0; 9226cd98798SBarry Smith for (j=0; j<n; j++) { 9236cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9246cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9256cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9266cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9276cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9286cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9296cd98798SBarry Smith jrow++; 9306cd98798SBarry Smith } 9316cd98798SBarry Smith y[6*i] += sum1; 9326cd98798SBarry Smith y[6*i+1] += sum2; 9336cd98798SBarry Smith y[6*i+2] += sum3; 9346cd98798SBarry Smith y[6*i+3] += sum4; 9356cd98798SBarry Smith y[6*i+4] += sum5; 9366cd98798SBarry Smith y[6*i+5] += sum6; 9376cd98798SBarry Smith } 9386cd98798SBarry Smith 9399566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0*a->nz)); 9409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 9419566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 9426cd98798SBarry Smith PetscFunctionReturn(0); 9436cd98798SBarry Smith } 9446cd98798SBarry Smith 945dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9466cd98798SBarry Smith { 9476cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9486cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 949d2840e09SBarry Smith const PetscScalar *x,*v; 950d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 951d2840e09SBarry Smith PetscInt n,i; 952d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 9536cd98798SBarry Smith 9546cd98798SBarry Smith PetscFunctionBegin; 9559566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 9569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 9579566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 958bfec09a0SHong Zhang 9596cd98798SBarry Smith for (i=0; i<m; i++) { 960bfec09a0SHong Zhang idx = a->j + a->i[i]; 961bfec09a0SHong Zhang v = a->a + a->i[i]; 9626cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9636cd98798SBarry Smith alpha1 = x[6*i]; 9646cd98798SBarry Smith alpha2 = x[6*i+1]; 9656cd98798SBarry Smith alpha3 = x[6*i+2]; 9666cd98798SBarry Smith alpha4 = x[6*i+3]; 9676cd98798SBarry Smith alpha5 = x[6*i+4]; 9686cd98798SBarry Smith alpha6 = x[6*i+5]; 9696cd98798SBarry Smith while (n-->0) { 9706cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9716cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9726cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9736cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9746cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9756cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9766cd98798SBarry Smith idx++; v++; 9776cd98798SBarry Smith } 9786cd98798SBarry Smith } 9799566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0*a->nz)); 9809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 9819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 9826cd98798SBarry Smith PetscFunctionReturn(0); 9836cd98798SBarry Smith } 9846cd98798SBarry Smith 98566ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 986ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 987ed8eea03SBarry Smith { 988ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 989ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 990d2840e09SBarry Smith const PetscScalar *x,*v; 991d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 992d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 993d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 994ed8eea03SBarry Smith 995ed8eea03SBarry Smith PetscFunctionBegin; 9969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 9979566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 998ed8eea03SBarry Smith idx = a->j; 999ed8eea03SBarry Smith v = a->a; 1000ed8eea03SBarry Smith ii = a->i; 1001ed8eea03SBarry Smith 1002ed8eea03SBarry Smith for (i=0; i<m; i++) { 1003ed8eea03SBarry Smith jrow = ii[i]; 1004ed8eea03SBarry Smith n = ii[i+1] - jrow; 1005ed8eea03SBarry Smith sum1 = 0.0; 1006ed8eea03SBarry Smith sum2 = 0.0; 1007ed8eea03SBarry Smith sum3 = 0.0; 1008ed8eea03SBarry Smith sum4 = 0.0; 1009ed8eea03SBarry Smith sum5 = 0.0; 1010ed8eea03SBarry Smith sum6 = 0.0; 1011ed8eea03SBarry Smith sum7 = 0.0; 101226fbe8dcSKarl Rupp 1013b3c51c66SMatthew Knepley nonzerorow += (n>0); 1014ed8eea03SBarry Smith for (j=0; j<n; j++) { 1015ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1016ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1017ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1018ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1019ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1020ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1021ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1022ed8eea03SBarry Smith jrow++; 1023ed8eea03SBarry Smith } 1024ed8eea03SBarry Smith y[7*i] = sum1; 1025ed8eea03SBarry Smith y[7*i+1] = sum2; 1026ed8eea03SBarry Smith y[7*i+2] = sum3; 1027ed8eea03SBarry Smith y[7*i+3] = sum4; 1028ed8eea03SBarry Smith y[7*i+4] = sum5; 1029ed8eea03SBarry Smith y[7*i+5] = sum6; 1030ed8eea03SBarry Smith y[7*i+6] = sum7; 1031ed8eea03SBarry Smith } 1032ed8eea03SBarry Smith 10339566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0*a->nz - 7.0*nonzerorow)); 10349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 10359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1036ed8eea03SBarry Smith PetscFunctionReturn(0); 1037ed8eea03SBarry Smith } 1038ed8eea03SBarry Smith 1039ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1040ed8eea03SBarry Smith { 1041ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1042ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1043d2840e09SBarry Smith const PetscScalar *x,*v; 1044d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1045d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1046d2840e09SBarry Smith PetscInt n,i; 1047ed8eea03SBarry Smith 1048ed8eea03SBarry Smith PetscFunctionBegin; 10499566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 10509566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 10519566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1052ed8eea03SBarry Smith 1053ed8eea03SBarry Smith for (i=0; i<m; i++) { 1054ed8eea03SBarry Smith idx = a->j + a->i[i]; 1055ed8eea03SBarry Smith v = a->a + a->i[i]; 1056ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1057ed8eea03SBarry Smith alpha1 = x[7*i]; 1058ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1059ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1060ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1061ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1062ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1063ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1064ed8eea03SBarry Smith while (n-->0) { 1065ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1066ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1067ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1068ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1069ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1070ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1071ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1072ed8eea03SBarry Smith idx++; v++; 1073ed8eea03SBarry Smith } 1074ed8eea03SBarry Smith } 10759566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0*a->nz)); 10769566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 10779566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1078ed8eea03SBarry Smith PetscFunctionReturn(0); 1079ed8eea03SBarry Smith } 1080ed8eea03SBarry Smith 1081ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1082ed8eea03SBarry Smith { 1083ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1084ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1085d2840e09SBarry Smith const PetscScalar *x,*v; 1086d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1087d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1088ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1089ed8eea03SBarry Smith 1090ed8eea03SBarry Smith PetscFunctionBegin; 10919566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 10929566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 10939566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1094ed8eea03SBarry Smith idx = a->j; 1095ed8eea03SBarry Smith v = a->a; 1096ed8eea03SBarry Smith ii = a->i; 1097ed8eea03SBarry Smith 1098ed8eea03SBarry Smith for (i=0; i<m; i++) { 1099ed8eea03SBarry Smith jrow = ii[i]; 1100ed8eea03SBarry Smith n = ii[i+1] - jrow; 1101ed8eea03SBarry Smith sum1 = 0.0; 1102ed8eea03SBarry Smith sum2 = 0.0; 1103ed8eea03SBarry Smith sum3 = 0.0; 1104ed8eea03SBarry Smith sum4 = 0.0; 1105ed8eea03SBarry Smith sum5 = 0.0; 1106ed8eea03SBarry Smith sum6 = 0.0; 1107ed8eea03SBarry Smith sum7 = 0.0; 1108ed8eea03SBarry Smith for (j=0; j<n; j++) { 1109ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1110ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1111ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1112ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1113ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1114ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1115ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1116ed8eea03SBarry Smith jrow++; 1117ed8eea03SBarry Smith } 1118ed8eea03SBarry Smith y[7*i] += sum1; 1119ed8eea03SBarry Smith y[7*i+1] += sum2; 1120ed8eea03SBarry Smith y[7*i+2] += sum3; 1121ed8eea03SBarry Smith y[7*i+3] += sum4; 1122ed8eea03SBarry Smith y[7*i+4] += sum5; 1123ed8eea03SBarry Smith y[7*i+5] += sum6; 1124ed8eea03SBarry Smith y[7*i+6] += sum7; 1125ed8eea03SBarry Smith } 1126ed8eea03SBarry Smith 11279566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0*a->nz)); 11289566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 11299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 1130ed8eea03SBarry Smith PetscFunctionReturn(0); 1131ed8eea03SBarry Smith } 1132ed8eea03SBarry Smith 1133ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1134ed8eea03SBarry Smith { 1135ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1136ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1137d2840e09SBarry Smith const PetscScalar *x,*v; 1138d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1139d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1140d2840e09SBarry Smith PetscInt n,i; 1141ed8eea03SBarry Smith 1142ed8eea03SBarry Smith PetscFunctionBegin; 11439566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 11449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 11459566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1146ed8eea03SBarry Smith for (i=0; i<m; i++) { 1147ed8eea03SBarry Smith idx = a->j + a->i[i]; 1148ed8eea03SBarry Smith v = a->a + a->i[i]; 1149ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1150ed8eea03SBarry Smith alpha1 = x[7*i]; 1151ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1152ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1153ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1154ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1155ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1156ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1157ed8eea03SBarry Smith while (n-->0) { 1158ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1159ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1160ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1161ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1162ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1163ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1164ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1165ed8eea03SBarry Smith idx++; v++; 1166ed8eea03SBarry Smith } 1167ed8eea03SBarry Smith } 11689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0*a->nz)); 11699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 11709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 1171ed8eea03SBarry Smith PetscFunctionReturn(0); 1172ed8eea03SBarry Smith } 1173ed8eea03SBarry Smith 1174dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 117566ed3db0SBarry Smith { 117666ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 117766ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1178d2840e09SBarry Smith const PetscScalar *x,*v; 1179d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1180d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1181d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 118266ed3db0SBarry Smith 118366ed3db0SBarry Smith PetscFunctionBegin; 11849566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 11859566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 118666ed3db0SBarry Smith idx = a->j; 118766ed3db0SBarry Smith v = a->a; 118866ed3db0SBarry Smith ii = a->i; 118966ed3db0SBarry Smith 119066ed3db0SBarry Smith for (i=0; i<m; i++) { 119166ed3db0SBarry Smith jrow = ii[i]; 119266ed3db0SBarry Smith n = ii[i+1] - jrow; 119366ed3db0SBarry Smith sum1 = 0.0; 119466ed3db0SBarry Smith sum2 = 0.0; 119566ed3db0SBarry Smith sum3 = 0.0; 119666ed3db0SBarry Smith sum4 = 0.0; 119766ed3db0SBarry Smith sum5 = 0.0; 119866ed3db0SBarry Smith sum6 = 0.0; 119966ed3db0SBarry Smith sum7 = 0.0; 120066ed3db0SBarry Smith sum8 = 0.0; 120126fbe8dcSKarl Rupp 1202b3c51c66SMatthew Knepley nonzerorow += (n>0); 120366ed3db0SBarry Smith for (j=0; j<n; j++) { 120466ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 120566ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 120666ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 120766ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 120866ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 120966ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 121066ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 121166ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 121266ed3db0SBarry Smith jrow++; 121366ed3db0SBarry Smith } 121466ed3db0SBarry Smith y[8*i] = sum1; 121566ed3db0SBarry Smith y[8*i+1] = sum2; 121666ed3db0SBarry Smith y[8*i+2] = sum3; 121766ed3db0SBarry Smith y[8*i+3] = sum4; 121866ed3db0SBarry Smith y[8*i+4] = sum5; 121966ed3db0SBarry Smith y[8*i+5] = sum6; 122066ed3db0SBarry Smith y[8*i+6] = sum7; 122166ed3db0SBarry Smith y[8*i+7] = sum8; 122266ed3db0SBarry Smith } 122366ed3db0SBarry Smith 12249566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0*a->nz - 8.0*nonzerorow)); 12259566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 12269566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 122766ed3db0SBarry Smith PetscFunctionReturn(0); 122866ed3db0SBarry Smith } 122966ed3db0SBarry Smith 1230dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 123166ed3db0SBarry Smith { 123266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 123366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1234d2840e09SBarry Smith const PetscScalar *x,*v; 1235d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1236d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1237d2840e09SBarry Smith PetscInt n,i; 123866ed3db0SBarry Smith 123966ed3db0SBarry Smith PetscFunctionBegin; 12409566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 12419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 12429566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1243bfec09a0SHong Zhang 124466ed3db0SBarry Smith for (i=0; i<m; i++) { 1245bfec09a0SHong Zhang idx = a->j + a->i[i]; 1246bfec09a0SHong Zhang v = a->a + a->i[i]; 124766ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 124866ed3db0SBarry Smith alpha1 = x[8*i]; 124966ed3db0SBarry Smith alpha2 = x[8*i+1]; 125066ed3db0SBarry Smith alpha3 = x[8*i+2]; 125166ed3db0SBarry Smith alpha4 = x[8*i+3]; 125266ed3db0SBarry Smith alpha5 = x[8*i+4]; 125366ed3db0SBarry Smith alpha6 = x[8*i+5]; 125466ed3db0SBarry Smith alpha7 = x[8*i+6]; 125566ed3db0SBarry Smith alpha8 = x[8*i+7]; 125666ed3db0SBarry Smith while (n-->0) { 125766ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 125866ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 125966ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 126066ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 126166ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 126266ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 126366ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 126466ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 126566ed3db0SBarry Smith idx++; v++; 126666ed3db0SBarry Smith } 126766ed3db0SBarry Smith } 12689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0*a->nz)); 12699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 12709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 127166ed3db0SBarry Smith PetscFunctionReturn(0); 127266ed3db0SBarry Smith } 127366ed3db0SBarry Smith 1274dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 127566ed3db0SBarry Smith { 127666ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 127766ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1278d2840e09SBarry Smith const PetscScalar *x,*v; 1279d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1280d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1281b24ad042SBarry Smith PetscInt n,i,jrow,j; 128266ed3db0SBarry Smith 128366ed3db0SBarry Smith PetscFunctionBegin; 12849566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 12859566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 12869566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 128766ed3db0SBarry Smith idx = a->j; 128866ed3db0SBarry Smith v = a->a; 128966ed3db0SBarry Smith ii = a->i; 129066ed3db0SBarry Smith 129166ed3db0SBarry Smith for (i=0; i<m; i++) { 129266ed3db0SBarry Smith jrow = ii[i]; 129366ed3db0SBarry Smith n = ii[i+1] - jrow; 129466ed3db0SBarry Smith sum1 = 0.0; 129566ed3db0SBarry Smith sum2 = 0.0; 129666ed3db0SBarry Smith sum3 = 0.0; 129766ed3db0SBarry Smith sum4 = 0.0; 129866ed3db0SBarry Smith sum5 = 0.0; 129966ed3db0SBarry Smith sum6 = 0.0; 130066ed3db0SBarry Smith sum7 = 0.0; 130166ed3db0SBarry Smith sum8 = 0.0; 130266ed3db0SBarry Smith for (j=0; j<n; j++) { 130366ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 130466ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 130566ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 130666ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 130766ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 130866ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 130966ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 131066ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 131166ed3db0SBarry Smith jrow++; 131266ed3db0SBarry Smith } 131366ed3db0SBarry Smith y[8*i] += sum1; 131466ed3db0SBarry Smith y[8*i+1] += sum2; 131566ed3db0SBarry Smith y[8*i+2] += sum3; 131666ed3db0SBarry Smith y[8*i+3] += sum4; 131766ed3db0SBarry Smith y[8*i+4] += sum5; 131866ed3db0SBarry Smith y[8*i+5] += sum6; 131966ed3db0SBarry Smith y[8*i+6] += sum7; 132066ed3db0SBarry Smith y[8*i+7] += sum8; 132166ed3db0SBarry Smith } 132266ed3db0SBarry Smith 13239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0*a->nz)); 13249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 13259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 132666ed3db0SBarry Smith PetscFunctionReturn(0); 132766ed3db0SBarry Smith } 132866ed3db0SBarry Smith 1329dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 133066ed3db0SBarry Smith { 133166ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 133266ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1333d2840e09SBarry Smith const PetscScalar *x,*v; 1334d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1335d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1336d2840e09SBarry Smith PetscInt n,i; 133766ed3db0SBarry Smith 133866ed3db0SBarry Smith PetscFunctionBegin; 13399566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 13409566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 13419566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 134266ed3db0SBarry Smith for (i=0; i<m; i++) { 1343bfec09a0SHong Zhang idx = a->j + a->i[i]; 1344bfec09a0SHong Zhang v = a->a + a->i[i]; 134566ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 134666ed3db0SBarry Smith alpha1 = x[8*i]; 134766ed3db0SBarry Smith alpha2 = x[8*i+1]; 134866ed3db0SBarry Smith alpha3 = x[8*i+2]; 134966ed3db0SBarry Smith alpha4 = x[8*i+3]; 135066ed3db0SBarry Smith alpha5 = x[8*i+4]; 135166ed3db0SBarry Smith alpha6 = x[8*i+5]; 135266ed3db0SBarry Smith alpha7 = x[8*i+6]; 135366ed3db0SBarry Smith alpha8 = x[8*i+7]; 135466ed3db0SBarry Smith while (n-->0) { 135566ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 135666ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 135766ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 135866ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 135966ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 136066ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 136166ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 136266ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 136366ed3db0SBarry Smith idx++; v++; 136466ed3db0SBarry Smith } 136566ed3db0SBarry Smith } 13669566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0*a->nz)); 13679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 13689566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 13692f7816d4SBarry Smith PetscFunctionReturn(0); 13702f7816d4SBarry Smith } 13712f7816d4SBarry Smith 13722b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 1373dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 13742b692628SMatthew Knepley { 13752b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13762b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1377d2840e09SBarry Smith const PetscScalar *x,*v; 1378d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1379d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1380d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 13812b692628SMatthew Knepley 13822b692628SMatthew Knepley PetscFunctionBegin; 13839566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 13849566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 13852b692628SMatthew Knepley idx = a->j; 13862b692628SMatthew Knepley v = a->a; 13872b692628SMatthew Knepley ii = a->i; 13882b692628SMatthew Knepley 13892b692628SMatthew Knepley for (i=0; i<m; i++) { 13902b692628SMatthew Knepley jrow = ii[i]; 13912b692628SMatthew Knepley n = ii[i+1] - jrow; 13922b692628SMatthew Knepley sum1 = 0.0; 13932b692628SMatthew Knepley sum2 = 0.0; 13942b692628SMatthew Knepley sum3 = 0.0; 13952b692628SMatthew Knepley sum4 = 0.0; 13962b692628SMatthew Knepley sum5 = 0.0; 13972b692628SMatthew Knepley sum6 = 0.0; 13982b692628SMatthew Knepley sum7 = 0.0; 13992b692628SMatthew Knepley sum8 = 0.0; 14002b692628SMatthew Knepley sum9 = 0.0; 140126fbe8dcSKarl Rupp 1402b3c51c66SMatthew Knepley nonzerorow += (n>0); 14032b692628SMatthew Knepley for (j=0; j<n; j++) { 14042b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14052b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14062b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14072b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14082b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14092b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14102b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14112b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14122b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14132b692628SMatthew Knepley jrow++; 14142b692628SMatthew Knepley } 14152b692628SMatthew Knepley y[9*i] = sum1; 14162b692628SMatthew Knepley y[9*i+1] = sum2; 14172b692628SMatthew Knepley y[9*i+2] = sum3; 14182b692628SMatthew Knepley y[9*i+3] = sum4; 14192b692628SMatthew Knepley y[9*i+4] = sum5; 14202b692628SMatthew Knepley y[9*i+5] = sum6; 14212b692628SMatthew Knepley y[9*i+6] = sum7; 14222b692628SMatthew Knepley y[9*i+7] = sum8; 14232b692628SMatthew Knepley y[9*i+8] = sum9; 14242b692628SMatthew Knepley } 14252b692628SMatthew Knepley 14269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0*a->nz - 9*nonzerorow)); 14279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 14289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 14292b692628SMatthew Knepley PetscFunctionReturn(0); 14302b692628SMatthew Knepley } 14312b692628SMatthew Knepley 1432b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1433b9cda22cSBarry Smith 1434dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14352b692628SMatthew Knepley { 14362b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14372b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1438d2840e09SBarry Smith const PetscScalar *x,*v; 1439d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1440d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1441d2840e09SBarry Smith PetscInt n,i; 14422b692628SMatthew Knepley 14432b692628SMatthew Knepley PetscFunctionBegin; 14449566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 14459566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 14469566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 14472b692628SMatthew Knepley 14482b692628SMatthew Knepley for (i=0; i<m; i++) { 14492b692628SMatthew Knepley idx = a->j + a->i[i]; 14502b692628SMatthew Knepley v = a->a + a->i[i]; 14512b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14522b692628SMatthew Knepley alpha1 = x[9*i]; 14532b692628SMatthew Knepley alpha2 = x[9*i+1]; 14542b692628SMatthew Knepley alpha3 = x[9*i+2]; 14552b692628SMatthew Knepley alpha4 = x[9*i+3]; 14562b692628SMatthew Knepley alpha5 = x[9*i+4]; 14572b692628SMatthew Knepley alpha6 = x[9*i+5]; 14582b692628SMatthew Knepley alpha7 = x[9*i+6]; 14592b692628SMatthew Knepley alpha8 = x[9*i+7]; 14602f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14612b692628SMatthew Knepley while (n-->0) { 14622b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 14632b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 14642b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 14652b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 14662b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 14672b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 14682b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 14692b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 14702b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 14712b692628SMatthew Knepley idx++; v++; 14722b692628SMatthew Knepley } 14732b692628SMatthew Knepley } 14749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0*a->nz)); 14759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 14769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 14772b692628SMatthew Knepley PetscFunctionReturn(0); 14782b692628SMatthew Knepley } 14792b692628SMatthew Knepley 1480dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 14812b692628SMatthew Knepley { 14822b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14832b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1484d2840e09SBarry Smith const PetscScalar *x,*v; 1485d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1486d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1487b24ad042SBarry Smith PetscInt n,i,jrow,j; 14882b692628SMatthew Knepley 14892b692628SMatthew Knepley PetscFunctionBegin; 14909566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 14919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 14929566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 14932b692628SMatthew Knepley idx = a->j; 14942b692628SMatthew Knepley v = a->a; 14952b692628SMatthew Knepley ii = a->i; 14962b692628SMatthew Knepley 14972b692628SMatthew Knepley for (i=0; i<m; i++) { 14982b692628SMatthew Knepley jrow = ii[i]; 14992b692628SMatthew Knepley n = ii[i+1] - jrow; 15002b692628SMatthew Knepley sum1 = 0.0; 15012b692628SMatthew Knepley sum2 = 0.0; 15022b692628SMatthew Knepley sum3 = 0.0; 15032b692628SMatthew Knepley sum4 = 0.0; 15042b692628SMatthew Knepley sum5 = 0.0; 15052b692628SMatthew Knepley sum6 = 0.0; 15062b692628SMatthew Knepley sum7 = 0.0; 15072b692628SMatthew Knepley sum8 = 0.0; 15082b692628SMatthew Knepley sum9 = 0.0; 15092b692628SMatthew Knepley for (j=0; j<n; j++) { 15102b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15112b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15122b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15132b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15142b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15152b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15162b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15172b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15182b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15192b692628SMatthew Knepley jrow++; 15202b692628SMatthew Knepley } 15212b692628SMatthew Knepley y[9*i] += sum1; 15222b692628SMatthew Knepley y[9*i+1] += sum2; 15232b692628SMatthew Knepley y[9*i+2] += sum3; 15242b692628SMatthew Knepley y[9*i+3] += sum4; 15252b692628SMatthew Knepley y[9*i+4] += sum5; 15262b692628SMatthew Knepley y[9*i+5] += sum6; 15272b692628SMatthew Knepley y[9*i+6] += sum7; 15282b692628SMatthew Knepley y[9*i+7] += sum8; 15292b692628SMatthew Knepley y[9*i+8] += sum9; 15302b692628SMatthew Knepley } 15312b692628SMatthew Knepley 15329566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0*a->nz)); 15339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 15349566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 15352b692628SMatthew Knepley PetscFunctionReturn(0); 15362b692628SMatthew Knepley } 15372b692628SMatthew Knepley 1538dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15392b692628SMatthew Knepley { 15402b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15412b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1542d2840e09SBarry Smith const PetscScalar *x,*v; 1543d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1544d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1545d2840e09SBarry Smith PetscInt n,i; 15462b692628SMatthew Knepley 15472b692628SMatthew Knepley PetscFunctionBegin; 15489566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 15499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 15509566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 15512b692628SMatthew Knepley for (i=0; i<m; i++) { 15522b692628SMatthew Knepley idx = a->j + a->i[i]; 15532b692628SMatthew Knepley v = a->a + a->i[i]; 15542b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15552b692628SMatthew Knepley alpha1 = x[9*i]; 15562b692628SMatthew Knepley alpha2 = x[9*i+1]; 15572b692628SMatthew Knepley alpha3 = x[9*i+2]; 15582b692628SMatthew Knepley alpha4 = x[9*i+3]; 15592b692628SMatthew Knepley alpha5 = x[9*i+4]; 15602b692628SMatthew Knepley alpha6 = x[9*i+5]; 15612b692628SMatthew Knepley alpha7 = x[9*i+6]; 15622b692628SMatthew Knepley alpha8 = x[9*i+7]; 15632b692628SMatthew Knepley alpha9 = x[9*i+8]; 15642b692628SMatthew Knepley while (n-->0) { 15652b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15662b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15672b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15682b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15692b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15702b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15712b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15722b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15732b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15742b692628SMatthew Knepley idx++; v++; 15752b692628SMatthew Knepley } 15762b692628SMatthew Knepley } 15779566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0*a->nz)); 15789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 15799566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 15802b692628SMatthew Knepley PetscFunctionReturn(0); 15812b692628SMatthew Knepley } 1582b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1583b9cda22cSBarry Smith { 1584b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1585b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1586d2840e09SBarry Smith const PetscScalar *x,*v; 1587d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1588d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1589d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1590b9cda22cSBarry Smith 1591b9cda22cSBarry Smith PetscFunctionBegin; 15929566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 15939566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1594b9cda22cSBarry Smith idx = a->j; 1595b9cda22cSBarry Smith v = a->a; 1596b9cda22cSBarry Smith ii = a->i; 1597b9cda22cSBarry Smith 1598b9cda22cSBarry Smith for (i=0; i<m; i++) { 1599b9cda22cSBarry Smith jrow = ii[i]; 1600b9cda22cSBarry Smith n = ii[i+1] - jrow; 1601b9cda22cSBarry Smith sum1 = 0.0; 1602b9cda22cSBarry Smith sum2 = 0.0; 1603b9cda22cSBarry Smith sum3 = 0.0; 1604b9cda22cSBarry Smith sum4 = 0.0; 1605b9cda22cSBarry Smith sum5 = 0.0; 1606b9cda22cSBarry Smith sum6 = 0.0; 1607b9cda22cSBarry Smith sum7 = 0.0; 1608b9cda22cSBarry Smith sum8 = 0.0; 1609b9cda22cSBarry Smith sum9 = 0.0; 1610b9cda22cSBarry Smith sum10 = 0.0; 161126fbe8dcSKarl Rupp 1612b3c51c66SMatthew Knepley nonzerorow += (n>0); 1613b9cda22cSBarry Smith for (j=0; j<n; j++) { 1614b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1615b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1616b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1617b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1618b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1619b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1620b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1621b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1622b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1623b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1624b9cda22cSBarry Smith jrow++; 1625b9cda22cSBarry Smith } 1626b9cda22cSBarry Smith y[10*i] = sum1; 1627b9cda22cSBarry Smith y[10*i+1] = sum2; 1628b9cda22cSBarry Smith y[10*i+2] = sum3; 1629b9cda22cSBarry Smith y[10*i+3] = sum4; 1630b9cda22cSBarry Smith y[10*i+4] = sum5; 1631b9cda22cSBarry Smith y[10*i+5] = sum6; 1632b9cda22cSBarry Smith y[10*i+6] = sum7; 1633b9cda22cSBarry Smith y[10*i+7] = sum8; 1634b9cda22cSBarry Smith y[10*i+8] = sum9; 1635b9cda22cSBarry Smith y[10*i+9] = sum10; 1636b9cda22cSBarry Smith } 1637b9cda22cSBarry Smith 16389566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0*a->nz - 10.0*nonzerorow)); 16399566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 16409566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1641b9cda22cSBarry Smith PetscFunctionReturn(0); 1642b9cda22cSBarry Smith } 1643b9cda22cSBarry Smith 1644b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1645b9cda22cSBarry Smith { 1646b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1647b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1648d2840e09SBarry Smith const PetscScalar *x,*v; 1649d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1650d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1651b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1652b9cda22cSBarry Smith 1653b9cda22cSBarry Smith PetscFunctionBegin; 16549566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 16559566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 16569566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1657b9cda22cSBarry Smith idx = a->j; 1658b9cda22cSBarry Smith v = a->a; 1659b9cda22cSBarry Smith ii = a->i; 1660b9cda22cSBarry Smith 1661b9cda22cSBarry Smith for (i=0; i<m; i++) { 1662b9cda22cSBarry Smith jrow = ii[i]; 1663b9cda22cSBarry Smith n = ii[i+1] - jrow; 1664b9cda22cSBarry Smith sum1 = 0.0; 1665b9cda22cSBarry Smith sum2 = 0.0; 1666b9cda22cSBarry Smith sum3 = 0.0; 1667b9cda22cSBarry Smith sum4 = 0.0; 1668b9cda22cSBarry Smith sum5 = 0.0; 1669b9cda22cSBarry Smith sum6 = 0.0; 1670b9cda22cSBarry Smith sum7 = 0.0; 1671b9cda22cSBarry Smith sum8 = 0.0; 1672b9cda22cSBarry Smith sum9 = 0.0; 1673b9cda22cSBarry Smith sum10 = 0.0; 1674b9cda22cSBarry Smith for (j=0; j<n; j++) { 1675b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1676b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1677b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1678b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1679b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1680b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1681b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1682b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1683b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1684b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1685b9cda22cSBarry Smith jrow++; 1686b9cda22cSBarry Smith } 1687b9cda22cSBarry Smith y[10*i] += sum1; 1688b9cda22cSBarry Smith y[10*i+1] += sum2; 1689b9cda22cSBarry Smith y[10*i+2] += sum3; 1690b9cda22cSBarry Smith y[10*i+3] += sum4; 1691b9cda22cSBarry Smith y[10*i+4] += sum5; 1692b9cda22cSBarry Smith y[10*i+5] += sum6; 1693b9cda22cSBarry Smith y[10*i+6] += sum7; 1694b9cda22cSBarry Smith y[10*i+7] += sum8; 1695b9cda22cSBarry Smith y[10*i+8] += sum9; 1696b9cda22cSBarry Smith y[10*i+9] += sum10; 1697b9cda22cSBarry Smith } 1698b9cda22cSBarry Smith 16999566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0*a->nz)); 17009566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 17019566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1702b9cda22cSBarry Smith PetscFunctionReturn(0); 1703b9cda22cSBarry Smith } 1704b9cda22cSBarry Smith 1705b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1706b9cda22cSBarry Smith { 1707b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1708b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1709d2840e09SBarry Smith const PetscScalar *x,*v; 1710d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1711d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1712d2840e09SBarry Smith PetscInt n,i; 1713b9cda22cSBarry Smith 1714b9cda22cSBarry Smith PetscFunctionBegin; 17159566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 17169566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 17179566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1718b9cda22cSBarry Smith 1719b9cda22cSBarry Smith for (i=0; i<m; i++) { 1720b9cda22cSBarry Smith idx = a->j + a->i[i]; 1721b9cda22cSBarry Smith v = a->a + a->i[i]; 1722b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1723e29fdc22SBarry Smith alpha1 = x[10*i]; 1724e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1725e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1726e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1727e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1728e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1729e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1730e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1731e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1732b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1733b9cda22cSBarry Smith while (n-->0) { 1734e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1735e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1736e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1737e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1738e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1739e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1740e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1741e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1742e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1743b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1744b9cda22cSBarry Smith idx++; v++; 1745b9cda22cSBarry Smith } 1746b9cda22cSBarry Smith } 17479566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0*a->nz)); 17489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 17499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1750b9cda22cSBarry Smith PetscFunctionReturn(0); 1751b9cda22cSBarry Smith } 1752b9cda22cSBarry Smith 1753b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1754b9cda22cSBarry Smith { 1755b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1756b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1757d2840e09SBarry Smith const PetscScalar *x,*v; 1758d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1759d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1760d2840e09SBarry Smith PetscInt n,i; 1761b9cda22cSBarry Smith 1762b9cda22cSBarry Smith PetscFunctionBegin; 17639566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 17649566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 17659566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1766b9cda22cSBarry Smith for (i=0; i<m; i++) { 1767b9cda22cSBarry Smith idx = a->j + a->i[i]; 1768b9cda22cSBarry Smith v = a->a + a->i[i]; 1769b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1770b9cda22cSBarry Smith alpha1 = x[10*i]; 1771b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1772b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1773b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1774b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1775b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1776b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1777b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1778b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1779b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1780b9cda22cSBarry Smith while (n-->0) { 1781b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1782b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1783b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1784b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1785b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1786b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1787b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1788b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1789b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1790b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1791b9cda22cSBarry Smith idx++; v++; 1792b9cda22cSBarry Smith } 1793b9cda22cSBarry Smith } 17949566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0*a->nz)); 17959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 17969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 1797b9cda22cSBarry Smith PetscFunctionReturn(0); 1798b9cda22cSBarry Smith } 1799b9cda22cSBarry Smith 18002f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 1801dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1802dbdd7285SBarry Smith { 1803dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1804dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1805d2840e09SBarry Smith const PetscScalar *x,*v; 1806d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1807d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1808d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1809dbdd7285SBarry Smith 1810dbdd7285SBarry Smith PetscFunctionBegin; 18119566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 18129566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1813dbdd7285SBarry Smith idx = a->j; 1814dbdd7285SBarry Smith v = a->a; 1815dbdd7285SBarry Smith ii = a->i; 1816dbdd7285SBarry Smith 1817dbdd7285SBarry Smith for (i=0; i<m; i++) { 1818dbdd7285SBarry Smith jrow = ii[i]; 1819dbdd7285SBarry Smith n = ii[i+1] - jrow; 1820dbdd7285SBarry Smith sum1 = 0.0; 1821dbdd7285SBarry Smith sum2 = 0.0; 1822dbdd7285SBarry Smith sum3 = 0.0; 1823dbdd7285SBarry Smith sum4 = 0.0; 1824dbdd7285SBarry Smith sum5 = 0.0; 1825dbdd7285SBarry Smith sum6 = 0.0; 1826dbdd7285SBarry Smith sum7 = 0.0; 1827dbdd7285SBarry Smith sum8 = 0.0; 1828dbdd7285SBarry Smith sum9 = 0.0; 1829dbdd7285SBarry Smith sum10 = 0.0; 1830dbdd7285SBarry Smith sum11 = 0.0; 183126fbe8dcSKarl Rupp 1832dbdd7285SBarry Smith nonzerorow += (n>0); 1833dbdd7285SBarry Smith for (j=0; j<n; j++) { 1834dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1835dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1836dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1837dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1838dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1839dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1840dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1841dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1842dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1843dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1844dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1845dbdd7285SBarry Smith jrow++; 1846dbdd7285SBarry Smith } 1847dbdd7285SBarry Smith y[11*i] = sum1; 1848dbdd7285SBarry Smith y[11*i+1] = sum2; 1849dbdd7285SBarry Smith y[11*i+2] = sum3; 1850dbdd7285SBarry Smith y[11*i+3] = sum4; 1851dbdd7285SBarry Smith y[11*i+4] = sum5; 1852dbdd7285SBarry Smith y[11*i+5] = sum6; 1853dbdd7285SBarry Smith y[11*i+6] = sum7; 1854dbdd7285SBarry Smith y[11*i+7] = sum8; 1855dbdd7285SBarry Smith y[11*i+8] = sum9; 1856dbdd7285SBarry Smith y[11*i+9] = sum10; 1857dbdd7285SBarry Smith y[11*i+10] = sum11; 1858dbdd7285SBarry Smith } 1859dbdd7285SBarry Smith 18609566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0*a->nz - 11*nonzerorow)); 18619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 18629566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1863dbdd7285SBarry Smith PetscFunctionReturn(0); 1864dbdd7285SBarry Smith } 1865dbdd7285SBarry Smith 1866dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1867dbdd7285SBarry Smith { 1868dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1869dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1870d2840e09SBarry Smith const PetscScalar *x,*v; 1871d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1872d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1873dbdd7285SBarry Smith PetscInt n,i,jrow,j; 1874dbdd7285SBarry Smith 1875dbdd7285SBarry Smith PetscFunctionBegin; 18769566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 18779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 18789566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1879dbdd7285SBarry Smith idx = a->j; 1880dbdd7285SBarry Smith v = a->a; 1881dbdd7285SBarry Smith ii = a->i; 1882dbdd7285SBarry Smith 1883dbdd7285SBarry Smith for (i=0; i<m; i++) { 1884dbdd7285SBarry Smith jrow = ii[i]; 1885dbdd7285SBarry Smith n = ii[i+1] - jrow; 1886dbdd7285SBarry Smith sum1 = 0.0; 1887dbdd7285SBarry Smith sum2 = 0.0; 1888dbdd7285SBarry Smith sum3 = 0.0; 1889dbdd7285SBarry Smith sum4 = 0.0; 1890dbdd7285SBarry Smith sum5 = 0.0; 1891dbdd7285SBarry Smith sum6 = 0.0; 1892dbdd7285SBarry Smith sum7 = 0.0; 1893dbdd7285SBarry Smith sum8 = 0.0; 1894dbdd7285SBarry Smith sum9 = 0.0; 1895dbdd7285SBarry Smith sum10 = 0.0; 1896dbdd7285SBarry Smith sum11 = 0.0; 1897dbdd7285SBarry Smith for (j=0; j<n; j++) { 1898dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1899dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1900dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1901dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1902dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1903dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1904dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1905dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1906dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1907dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1908dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1909dbdd7285SBarry Smith jrow++; 1910dbdd7285SBarry Smith } 1911dbdd7285SBarry Smith y[11*i] += sum1; 1912dbdd7285SBarry Smith y[11*i+1] += sum2; 1913dbdd7285SBarry Smith y[11*i+2] += sum3; 1914dbdd7285SBarry Smith y[11*i+3] += sum4; 1915dbdd7285SBarry Smith y[11*i+4] += sum5; 1916dbdd7285SBarry Smith y[11*i+5] += sum6; 1917dbdd7285SBarry Smith y[11*i+6] += sum7; 1918dbdd7285SBarry Smith y[11*i+7] += sum8; 1919dbdd7285SBarry Smith y[11*i+8] += sum9; 1920dbdd7285SBarry Smith y[11*i+9] += sum10; 1921dbdd7285SBarry Smith y[11*i+10] += sum11; 1922dbdd7285SBarry Smith } 1923dbdd7285SBarry Smith 19249566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0*a->nz)); 19259566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 19269566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1927dbdd7285SBarry Smith PetscFunctionReturn(0); 1928dbdd7285SBarry Smith } 1929dbdd7285SBarry Smith 1930dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1931dbdd7285SBarry Smith { 1932dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1933dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1934d2840e09SBarry Smith const PetscScalar *x,*v; 1935d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1936d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1937d2840e09SBarry Smith PetscInt n,i; 1938dbdd7285SBarry Smith 1939dbdd7285SBarry Smith PetscFunctionBegin; 19409566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 19419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 19429566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1943dbdd7285SBarry Smith 1944dbdd7285SBarry Smith for (i=0; i<m; i++) { 1945dbdd7285SBarry Smith idx = a->j + a->i[i]; 1946dbdd7285SBarry Smith v = a->a + a->i[i]; 1947dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 1948dbdd7285SBarry Smith alpha1 = x[11*i]; 1949dbdd7285SBarry Smith alpha2 = x[11*i+1]; 1950dbdd7285SBarry Smith alpha3 = x[11*i+2]; 1951dbdd7285SBarry Smith alpha4 = x[11*i+3]; 1952dbdd7285SBarry Smith alpha5 = x[11*i+4]; 1953dbdd7285SBarry Smith alpha6 = x[11*i+5]; 1954dbdd7285SBarry Smith alpha7 = x[11*i+6]; 1955dbdd7285SBarry Smith alpha8 = x[11*i+7]; 1956dbdd7285SBarry Smith alpha9 = x[11*i+8]; 1957dbdd7285SBarry Smith alpha10 = x[11*i+9]; 1958dbdd7285SBarry Smith alpha11 = x[11*i+10]; 1959dbdd7285SBarry Smith while (n-->0) { 1960dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 1961dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 1962dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 1963dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 1964dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 1965dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 1966dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 1967dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 1968dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 1969dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 1970dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 1971dbdd7285SBarry Smith idx++; v++; 1972dbdd7285SBarry Smith } 1973dbdd7285SBarry Smith } 19749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0*a->nz)); 19759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 19769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1977dbdd7285SBarry Smith PetscFunctionReturn(0); 1978dbdd7285SBarry Smith } 1979dbdd7285SBarry Smith 1980dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1981dbdd7285SBarry Smith { 1982dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1983dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1984d2840e09SBarry Smith const PetscScalar *x,*v; 1985d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1986d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1987d2840e09SBarry Smith PetscInt n,i; 1988dbdd7285SBarry Smith 1989dbdd7285SBarry Smith PetscFunctionBegin; 19909566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 19919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 19929566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1993dbdd7285SBarry Smith for (i=0; i<m; i++) { 1994dbdd7285SBarry Smith idx = a->j + a->i[i]; 1995dbdd7285SBarry Smith v = a->a + a->i[i]; 1996dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 1997dbdd7285SBarry Smith alpha1 = x[11*i]; 1998dbdd7285SBarry Smith alpha2 = x[11*i+1]; 1999dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2000dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2001dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2002dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2003dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2004dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2005dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2006dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2007dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2008dbdd7285SBarry Smith while (n-->0) { 2009dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2010dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2011dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2012dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2013dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2014dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2015dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2016dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2017dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2018dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2019dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2020dbdd7285SBarry Smith idx++; v++; 2021dbdd7285SBarry Smith } 2022dbdd7285SBarry Smith } 20239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0*a->nz)); 20249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 20259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 2026dbdd7285SBarry Smith PetscFunctionReturn(0); 2027dbdd7285SBarry Smith } 2028dbdd7285SBarry Smith 2029dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2030dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 20312f7816d4SBarry Smith { 20322f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20332f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2034d2840e09SBarry Smith const PetscScalar *x,*v; 2035d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 20362f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2037d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2038d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 20392f7816d4SBarry Smith 20402f7816d4SBarry Smith PetscFunctionBegin; 20419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 20429566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 20432f7816d4SBarry Smith idx = a->j; 20442f7816d4SBarry Smith v = a->a; 20452f7816d4SBarry Smith ii = a->i; 20462f7816d4SBarry Smith 20472f7816d4SBarry Smith for (i=0; i<m; i++) { 20482f7816d4SBarry Smith jrow = ii[i]; 20492f7816d4SBarry Smith n = ii[i+1] - jrow; 20502f7816d4SBarry Smith sum1 = 0.0; 20512f7816d4SBarry Smith sum2 = 0.0; 20522f7816d4SBarry Smith sum3 = 0.0; 20532f7816d4SBarry Smith sum4 = 0.0; 20542f7816d4SBarry Smith sum5 = 0.0; 20552f7816d4SBarry Smith sum6 = 0.0; 20562f7816d4SBarry Smith sum7 = 0.0; 20572f7816d4SBarry Smith sum8 = 0.0; 20582f7816d4SBarry Smith sum9 = 0.0; 20592f7816d4SBarry Smith sum10 = 0.0; 20602f7816d4SBarry Smith sum11 = 0.0; 20612f7816d4SBarry Smith sum12 = 0.0; 20622f7816d4SBarry Smith sum13 = 0.0; 20632f7816d4SBarry Smith sum14 = 0.0; 20642f7816d4SBarry Smith sum15 = 0.0; 20652f7816d4SBarry Smith sum16 = 0.0; 206626fbe8dcSKarl Rupp 2067b3c51c66SMatthew Knepley nonzerorow += (n>0); 20682f7816d4SBarry Smith for (j=0; j<n; j++) { 20692f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 20702f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 20712f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 20722f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 20732f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 20742f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 20752f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 20762f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 20772f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 20782f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 20792f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 20802f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 20812f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 20822f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 20832f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 20842f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 20852f7816d4SBarry Smith jrow++; 20862f7816d4SBarry Smith } 20872f7816d4SBarry Smith y[16*i] = sum1; 20882f7816d4SBarry Smith y[16*i+1] = sum2; 20892f7816d4SBarry Smith y[16*i+2] = sum3; 20902f7816d4SBarry Smith y[16*i+3] = sum4; 20912f7816d4SBarry Smith y[16*i+4] = sum5; 20922f7816d4SBarry Smith y[16*i+5] = sum6; 20932f7816d4SBarry Smith y[16*i+6] = sum7; 20942f7816d4SBarry Smith y[16*i+7] = sum8; 20952f7816d4SBarry Smith y[16*i+8] = sum9; 20962f7816d4SBarry Smith y[16*i+9] = sum10; 20972f7816d4SBarry Smith y[16*i+10] = sum11; 20982f7816d4SBarry Smith y[16*i+11] = sum12; 20992f7816d4SBarry Smith y[16*i+12] = sum13; 21002f7816d4SBarry Smith y[16*i+13] = sum14; 21012f7816d4SBarry Smith y[16*i+14] = sum15; 21022f7816d4SBarry Smith y[16*i+15] = sum16; 21032f7816d4SBarry Smith } 21042f7816d4SBarry Smith 21059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0*a->nz - 16.0*nonzerorow)); 21069566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 21079566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 21082f7816d4SBarry Smith PetscFunctionReturn(0); 21092f7816d4SBarry Smith } 21102f7816d4SBarry Smith 2111dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 21122f7816d4SBarry Smith { 21132f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21142f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2115d2840e09SBarry Smith const PetscScalar *x,*v; 2116d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 21172f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2118d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2119d2840e09SBarry Smith PetscInt n,i; 21202f7816d4SBarry Smith 21212f7816d4SBarry Smith PetscFunctionBegin; 21229566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 21239566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 21249566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 2125bfec09a0SHong Zhang 21262f7816d4SBarry Smith for (i=0; i<m; i++) { 2127bfec09a0SHong Zhang idx = a->j + a->i[i]; 2128bfec09a0SHong Zhang v = a->a + a->i[i]; 21292f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 21302f7816d4SBarry Smith alpha1 = x[16*i]; 21312f7816d4SBarry Smith alpha2 = x[16*i+1]; 21322f7816d4SBarry Smith alpha3 = x[16*i+2]; 21332f7816d4SBarry Smith alpha4 = x[16*i+3]; 21342f7816d4SBarry Smith alpha5 = x[16*i+4]; 21352f7816d4SBarry Smith alpha6 = x[16*i+5]; 21362f7816d4SBarry Smith alpha7 = x[16*i+6]; 21372f7816d4SBarry Smith alpha8 = x[16*i+7]; 21382f7816d4SBarry Smith alpha9 = x[16*i+8]; 21392f7816d4SBarry Smith alpha10 = x[16*i+9]; 21402f7816d4SBarry Smith alpha11 = x[16*i+10]; 21412f7816d4SBarry Smith alpha12 = x[16*i+11]; 21422f7816d4SBarry Smith alpha13 = x[16*i+12]; 21432f7816d4SBarry Smith alpha14 = x[16*i+13]; 21442f7816d4SBarry Smith alpha15 = x[16*i+14]; 21452f7816d4SBarry Smith alpha16 = x[16*i+15]; 21462f7816d4SBarry Smith while (n-->0) { 21472f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 21482f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 21492f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 21502f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 21512f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 21522f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 21532f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 21542f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 21552f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 21562f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 21572f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 21582f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 21592f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 21602f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 21612f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 21622f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 21632f7816d4SBarry Smith idx++; v++; 21642f7816d4SBarry Smith } 21652f7816d4SBarry Smith } 21669566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0*a->nz)); 21679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 21689566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 21692f7816d4SBarry Smith PetscFunctionReturn(0); 21702f7816d4SBarry Smith } 21712f7816d4SBarry Smith 2172dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 21732f7816d4SBarry Smith { 21742f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21752f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2176d2840e09SBarry Smith const PetscScalar *x,*v; 2177d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21782f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2179d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2180b24ad042SBarry Smith PetscInt n,i,jrow,j; 21812f7816d4SBarry Smith 21822f7816d4SBarry Smith PetscFunctionBegin; 21839566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 21849566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 21859566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 21862f7816d4SBarry Smith idx = a->j; 21872f7816d4SBarry Smith v = a->a; 21882f7816d4SBarry Smith ii = a->i; 21892f7816d4SBarry Smith 21902f7816d4SBarry Smith for (i=0; i<m; i++) { 21912f7816d4SBarry Smith jrow = ii[i]; 21922f7816d4SBarry Smith n = ii[i+1] - jrow; 21932f7816d4SBarry Smith sum1 = 0.0; 21942f7816d4SBarry Smith sum2 = 0.0; 21952f7816d4SBarry Smith sum3 = 0.0; 21962f7816d4SBarry Smith sum4 = 0.0; 21972f7816d4SBarry Smith sum5 = 0.0; 21982f7816d4SBarry Smith sum6 = 0.0; 21992f7816d4SBarry Smith sum7 = 0.0; 22002f7816d4SBarry Smith sum8 = 0.0; 22012f7816d4SBarry Smith sum9 = 0.0; 22022f7816d4SBarry Smith sum10 = 0.0; 22032f7816d4SBarry Smith sum11 = 0.0; 22042f7816d4SBarry Smith sum12 = 0.0; 22052f7816d4SBarry Smith sum13 = 0.0; 22062f7816d4SBarry Smith sum14 = 0.0; 22072f7816d4SBarry Smith sum15 = 0.0; 22082f7816d4SBarry Smith sum16 = 0.0; 22092f7816d4SBarry Smith for (j=0; j<n; j++) { 22102f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 22112f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 22122f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 22132f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 22142f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 22152f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 22162f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 22172f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 22182f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 22192f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 22202f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 22212f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 22222f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 22232f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 22242f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 22252f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 22262f7816d4SBarry Smith jrow++; 22272f7816d4SBarry Smith } 22282f7816d4SBarry Smith y[16*i] += sum1; 22292f7816d4SBarry Smith y[16*i+1] += sum2; 22302f7816d4SBarry Smith y[16*i+2] += sum3; 22312f7816d4SBarry Smith y[16*i+3] += sum4; 22322f7816d4SBarry Smith y[16*i+4] += sum5; 22332f7816d4SBarry Smith y[16*i+5] += sum6; 22342f7816d4SBarry Smith y[16*i+6] += sum7; 22352f7816d4SBarry Smith y[16*i+7] += sum8; 22362f7816d4SBarry Smith y[16*i+8] += sum9; 22372f7816d4SBarry Smith y[16*i+9] += sum10; 22382f7816d4SBarry Smith y[16*i+10] += sum11; 22392f7816d4SBarry Smith y[16*i+11] += sum12; 22402f7816d4SBarry Smith y[16*i+12] += sum13; 22412f7816d4SBarry Smith y[16*i+13] += sum14; 22422f7816d4SBarry Smith y[16*i+14] += sum15; 22432f7816d4SBarry Smith y[16*i+15] += sum16; 22442f7816d4SBarry Smith } 22452f7816d4SBarry Smith 22469566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0*a->nz)); 22479566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 22489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 22492f7816d4SBarry Smith PetscFunctionReturn(0); 22502f7816d4SBarry Smith } 22512f7816d4SBarry Smith 2252dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 22532f7816d4SBarry Smith { 22542f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22552f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2256d2840e09SBarry Smith const PetscScalar *x,*v; 2257d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 22582f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2259d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2260d2840e09SBarry Smith PetscInt n,i; 22612f7816d4SBarry Smith 22622f7816d4SBarry Smith PetscFunctionBegin; 22639566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 22649566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 22659566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 22662f7816d4SBarry Smith for (i=0; i<m; i++) { 2267bfec09a0SHong Zhang idx = a->j + a->i[i]; 2268bfec09a0SHong Zhang v = a->a + a->i[i]; 22692f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 22702f7816d4SBarry Smith alpha1 = x[16*i]; 22712f7816d4SBarry Smith alpha2 = x[16*i+1]; 22722f7816d4SBarry Smith alpha3 = x[16*i+2]; 22732f7816d4SBarry Smith alpha4 = x[16*i+3]; 22742f7816d4SBarry Smith alpha5 = x[16*i+4]; 22752f7816d4SBarry Smith alpha6 = x[16*i+5]; 22762f7816d4SBarry Smith alpha7 = x[16*i+6]; 22772f7816d4SBarry Smith alpha8 = x[16*i+7]; 22782f7816d4SBarry Smith alpha9 = x[16*i+8]; 22792f7816d4SBarry Smith alpha10 = x[16*i+9]; 22802f7816d4SBarry Smith alpha11 = x[16*i+10]; 22812f7816d4SBarry Smith alpha12 = x[16*i+11]; 22822f7816d4SBarry Smith alpha13 = x[16*i+12]; 22832f7816d4SBarry Smith alpha14 = x[16*i+13]; 22842f7816d4SBarry Smith alpha15 = x[16*i+14]; 22852f7816d4SBarry Smith alpha16 = x[16*i+15]; 22862f7816d4SBarry Smith while (n-->0) { 22872f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 22882f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 22892f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 22902f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 22912f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 22922f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 22932f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 22942f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 22952f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 22962f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 22972f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 22982f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 22992f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 23002f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 23012f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 23022f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 23032f7816d4SBarry Smith idx++; v++; 23042f7816d4SBarry Smith } 23052f7816d4SBarry Smith } 23069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0*a->nz)); 23079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 23089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 230966ed3db0SBarry Smith PetscFunctionReturn(0); 231066ed3db0SBarry Smith } 231166ed3db0SBarry Smith 2312ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2313ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2314ed1c418cSBarry Smith { 2315ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2316ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2317d2840e09SBarry Smith const PetscScalar *x,*v; 2318d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2319ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2320d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2321d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 2322ed1c418cSBarry Smith 2323ed1c418cSBarry Smith PetscFunctionBegin; 23249566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 23259566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 2326ed1c418cSBarry Smith idx = a->j; 2327ed1c418cSBarry Smith v = a->a; 2328ed1c418cSBarry Smith ii = a->i; 2329ed1c418cSBarry Smith 2330ed1c418cSBarry Smith for (i=0; i<m; i++) { 2331ed1c418cSBarry Smith jrow = ii[i]; 2332ed1c418cSBarry Smith n = ii[i+1] - jrow; 2333ed1c418cSBarry Smith sum1 = 0.0; 2334ed1c418cSBarry Smith sum2 = 0.0; 2335ed1c418cSBarry Smith sum3 = 0.0; 2336ed1c418cSBarry Smith sum4 = 0.0; 2337ed1c418cSBarry Smith sum5 = 0.0; 2338ed1c418cSBarry Smith sum6 = 0.0; 2339ed1c418cSBarry Smith sum7 = 0.0; 2340ed1c418cSBarry Smith sum8 = 0.0; 2341ed1c418cSBarry Smith sum9 = 0.0; 2342ed1c418cSBarry Smith sum10 = 0.0; 2343ed1c418cSBarry Smith sum11 = 0.0; 2344ed1c418cSBarry Smith sum12 = 0.0; 2345ed1c418cSBarry Smith sum13 = 0.0; 2346ed1c418cSBarry Smith sum14 = 0.0; 2347ed1c418cSBarry Smith sum15 = 0.0; 2348ed1c418cSBarry Smith sum16 = 0.0; 2349ed1c418cSBarry Smith sum17 = 0.0; 2350ed1c418cSBarry Smith sum18 = 0.0; 235126fbe8dcSKarl Rupp 2352ed1c418cSBarry Smith nonzerorow += (n>0); 2353ed1c418cSBarry Smith for (j=0; j<n; j++) { 2354ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2355ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2356ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2357ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2358ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2359ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2360ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2361ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2362ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2363ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2364ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2365ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2366ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2367ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2368ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2369ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2370ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2371ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2372ed1c418cSBarry Smith jrow++; 2373ed1c418cSBarry Smith } 2374ed1c418cSBarry Smith y[18*i] = sum1; 2375ed1c418cSBarry Smith y[18*i+1] = sum2; 2376ed1c418cSBarry Smith y[18*i+2] = sum3; 2377ed1c418cSBarry Smith y[18*i+3] = sum4; 2378ed1c418cSBarry Smith y[18*i+4] = sum5; 2379ed1c418cSBarry Smith y[18*i+5] = sum6; 2380ed1c418cSBarry Smith y[18*i+6] = sum7; 2381ed1c418cSBarry Smith y[18*i+7] = sum8; 2382ed1c418cSBarry Smith y[18*i+8] = sum9; 2383ed1c418cSBarry Smith y[18*i+9] = sum10; 2384ed1c418cSBarry Smith y[18*i+10] = sum11; 2385ed1c418cSBarry Smith y[18*i+11] = sum12; 2386ed1c418cSBarry Smith y[18*i+12] = sum13; 2387ed1c418cSBarry Smith y[18*i+13] = sum14; 2388ed1c418cSBarry Smith y[18*i+14] = sum15; 2389ed1c418cSBarry Smith y[18*i+15] = sum16; 2390ed1c418cSBarry Smith y[18*i+16] = sum17; 2391ed1c418cSBarry Smith y[18*i+17] = sum18; 2392ed1c418cSBarry Smith } 2393ed1c418cSBarry Smith 23949566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0*a->nz - 18.0*nonzerorow)); 23959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 23969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 2397ed1c418cSBarry Smith PetscFunctionReturn(0); 2398ed1c418cSBarry Smith } 2399ed1c418cSBarry Smith 2400ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2401ed1c418cSBarry Smith { 2402ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2403ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2404d2840e09SBarry Smith const PetscScalar *x,*v; 2405d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2406ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2407d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2408d2840e09SBarry Smith PetscInt n,i; 2409ed1c418cSBarry Smith 2410ed1c418cSBarry Smith PetscFunctionBegin; 24119566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 24129566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 24139566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 2414ed1c418cSBarry Smith 2415ed1c418cSBarry Smith for (i=0; i<m; i++) { 2416ed1c418cSBarry Smith idx = a->j + a->i[i]; 2417ed1c418cSBarry Smith v = a->a + a->i[i]; 2418ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2419ed1c418cSBarry Smith alpha1 = x[18*i]; 2420ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2421ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2422ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2423ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2424ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2425ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2426ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2427ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2428ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2429ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2430ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2431ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2432ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2433ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2434ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2435ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2436ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2437ed1c418cSBarry Smith while (n-->0) { 2438ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2439ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2440ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2441ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2442ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2443ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2444ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2445ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2446ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2447ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2448ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2449ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2450ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2451ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2452ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2453ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2454ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2455ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2456ed1c418cSBarry Smith idx++; v++; 2457ed1c418cSBarry Smith } 2458ed1c418cSBarry Smith } 24599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0*a->nz)); 24609566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 24619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 2462ed1c418cSBarry Smith PetscFunctionReturn(0); 2463ed1c418cSBarry Smith } 2464ed1c418cSBarry Smith 2465ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2466ed1c418cSBarry Smith { 2467ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2468ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2469d2840e09SBarry Smith const PetscScalar *x,*v; 2470d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2471ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2472d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2473ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2474ed1c418cSBarry Smith 2475ed1c418cSBarry Smith PetscFunctionBegin; 24769566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 24779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 24789566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 2479ed1c418cSBarry Smith idx = a->j; 2480ed1c418cSBarry Smith v = a->a; 2481ed1c418cSBarry Smith ii = a->i; 2482ed1c418cSBarry Smith 2483ed1c418cSBarry Smith for (i=0; i<m; i++) { 2484ed1c418cSBarry Smith jrow = ii[i]; 2485ed1c418cSBarry Smith n = ii[i+1] - jrow; 2486ed1c418cSBarry Smith sum1 = 0.0; 2487ed1c418cSBarry Smith sum2 = 0.0; 2488ed1c418cSBarry Smith sum3 = 0.0; 2489ed1c418cSBarry Smith sum4 = 0.0; 2490ed1c418cSBarry Smith sum5 = 0.0; 2491ed1c418cSBarry Smith sum6 = 0.0; 2492ed1c418cSBarry Smith sum7 = 0.0; 2493ed1c418cSBarry Smith sum8 = 0.0; 2494ed1c418cSBarry Smith sum9 = 0.0; 2495ed1c418cSBarry Smith sum10 = 0.0; 2496ed1c418cSBarry Smith sum11 = 0.0; 2497ed1c418cSBarry Smith sum12 = 0.0; 2498ed1c418cSBarry Smith sum13 = 0.0; 2499ed1c418cSBarry Smith sum14 = 0.0; 2500ed1c418cSBarry Smith sum15 = 0.0; 2501ed1c418cSBarry Smith sum16 = 0.0; 2502ed1c418cSBarry Smith sum17 = 0.0; 2503ed1c418cSBarry Smith sum18 = 0.0; 2504ed1c418cSBarry Smith for (j=0; j<n; j++) { 2505ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2506ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2507ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2508ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2509ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2510ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2511ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2512ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2513ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2514ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2515ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2516ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2517ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2518ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2519ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2520ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2521ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2522ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2523ed1c418cSBarry Smith jrow++; 2524ed1c418cSBarry Smith } 2525ed1c418cSBarry Smith y[18*i] += sum1; 2526ed1c418cSBarry Smith y[18*i+1] += sum2; 2527ed1c418cSBarry Smith y[18*i+2] += sum3; 2528ed1c418cSBarry Smith y[18*i+3] += sum4; 2529ed1c418cSBarry Smith y[18*i+4] += sum5; 2530ed1c418cSBarry Smith y[18*i+5] += sum6; 2531ed1c418cSBarry Smith y[18*i+6] += sum7; 2532ed1c418cSBarry Smith y[18*i+7] += sum8; 2533ed1c418cSBarry Smith y[18*i+8] += sum9; 2534ed1c418cSBarry Smith y[18*i+9] += sum10; 2535ed1c418cSBarry Smith y[18*i+10] += sum11; 2536ed1c418cSBarry Smith y[18*i+11] += sum12; 2537ed1c418cSBarry Smith y[18*i+12] += sum13; 2538ed1c418cSBarry Smith y[18*i+13] += sum14; 2539ed1c418cSBarry Smith y[18*i+14] += sum15; 2540ed1c418cSBarry Smith y[18*i+15] += sum16; 2541ed1c418cSBarry Smith y[18*i+16] += sum17; 2542ed1c418cSBarry Smith y[18*i+17] += sum18; 2543ed1c418cSBarry Smith } 2544ed1c418cSBarry Smith 25459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0*a->nz)); 25469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 25479566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 2548ed1c418cSBarry Smith PetscFunctionReturn(0); 2549ed1c418cSBarry Smith } 2550ed1c418cSBarry Smith 2551ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2552ed1c418cSBarry Smith { 2553ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2554ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2555d2840e09SBarry Smith const PetscScalar *x,*v; 2556d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2557ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2558d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2559d2840e09SBarry Smith PetscInt n,i; 2560ed1c418cSBarry Smith 2561ed1c418cSBarry Smith PetscFunctionBegin; 25629566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 25639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 25649566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 2565ed1c418cSBarry Smith for (i=0; i<m; i++) { 2566ed1c418cSBarry Smith idx = a->j + a->i[i]; 2567ed1c418cSBarry Smith v = a->a + a->i[i]; 2568ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2569ed1c418cSBarry Smith alpha1 = x[18*i]; 2570ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2571ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2572ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2573ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2574ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2575ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2576ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2577ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2578ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2579ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2580ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2581ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2582ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2583ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2584ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2585ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2586ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2587ed1c418cSBarry Smith while (n-->0) { 2588ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2589ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2590ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2591ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2592ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2593ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2594ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2595ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2596ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2597ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2598ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2599ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2600ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2601ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2602ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2603ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2604ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2605ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2606ed1c418cSBarry Smith idx++; v++; 2607ed1c418cSBarry Smith } 2608ed1c418cSBarry Smith } 26099566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0*a->nz)); 26109566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 26119566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 2612ed1c418cSBarry Smith PetscFunctionReturn(0); 2613ed1c418cSBarry Smith } 2614ed1c418cSBarry Smith 26156861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 26166861f193SBarry Smith { 26176861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26186861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26196861f193SBarry Smith const PetscScalar *x,*v; 26206861f193SBarry Smith PetscScalar *y,*sums; 26216861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 26226861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 26236861f193SBarry Smith 26246861f193SBarry Smith PetscFunctionBegin; 26259566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 26269566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 26279566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 26286861f193SBarry Smith idx = a->j; 26296861f193SBarry Smith v = a->a; 26306861f193SBarry Smith ii = a->i; 26316861f193SBarry Smith 26326861f193SBarry Smith for (i=0; i<m; i++) { 26336861f193SBarry Smith jrow = ii[i]; 26346861f193SBarry Smith n = ii[i+1] - jrow; 26356861f193SBarry Smith sums = y + dof*i; 26366861f193SBarry Smith for (j=0; j<n; j++) { 26376861f193SBarry Smith for (k=0; k<dof; k++) { 26386861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 26396861f193SBarry Smith } 26406861f193SBarry Smith jrow++; 26416861f193SBarry Smith } 26426861f193SBarry Smith } 26436861f193SBarry Smith 26449566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*dof*a->nz)); 26459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 26469566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 26476861f193SBarry Smith PetscFunctionReturn(0); 26486861f193SBarry Smith } 26496861f193SBarry Smith 26506861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 26516861f193SBarry Smith { 26526861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26536861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26546861f193SBarry Smith const PetscScalar *x,*v; 26556861f193SBarry Smith PetscScalar *y,*sums; 26566861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 26576861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 26586861f193SBarry Smith 26596861f193SBarry Smith PetscFunctionBegin; 26609566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 26619566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 26629566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 26636861f193SBarry Smith idx = a->j; 26646861f193SBarry Smith v = a->a; 26656861f193SBarry Smith ii = a->i; 26666861f193SBarry Smith 26676861f193SBarry Smith for (i=0; i<m; i++) { 26686861f193SBarry Smith jrow = ii[i]; 26696861f193SBarry Smith n = ii[i+1] - jrow; 26706861f193SBarry Smith sums = y + dof*i; 26716861f193SBarry Smith for (j=0; j<n; j++) { 26726861f193SBarry Smith for (k=0; k<dof; k++) { 26736861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 26746861f193SBarry Smith } 26756861f193SBarry Smith jrow++; 26766861f193SBarry Smith } 26776861f193SBarry Smith } 26786861f193SBarry Smith 26799566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*dof*a->nz)); 26809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 26819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 26826861f193SBarry Smith PetscFunctionReturn(0); 26836861f193SBarry Smith } 26846861f193SBarry Smith 26856861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 26866861f193SBarry Smith { 26876861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26886861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26896861f193SBarry Smith const PetscScalar *x,*v,*alpha; 26906861f193SBarry Smith PetscScalar *y; 26916861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 26926861f193SBarry Smith PetscInt n,i,k; 26936861f193SBarry Smith 26946861f193SBarry Smith PetscFunctionBegin; 26959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 26969566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 26979566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 26986861f193SBarry Smith for (i=0; i<m; i++) { 26996861f193SBarry Smith idx = a->j + a->i[i]; 27006861f193SBarry Smith v = a->a + a->i[i]; 27016861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27026861f193SBarry Smith alpha = x + dof*i; 27036861f193SBarry Smith while (n-->0) { 27046861f193SBarry Smith for (k=0; k<dof; k++) { 27056861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27066861f193SBarry Smith } 270783ce7366SBarry Smith idx++; v++; 27086861f193SBarry Smith } 27096861f193SBarry Smith } 27109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*dof*a->nz)); 27119566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 27129566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 27136861f193SBarry Smith PetscFunctionReturn(0); 27146861f193SBarry Smith } 27156861f193SBarry Smith 27166861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27176861f193SBarry Smith { 27186861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27196861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27206861f193SBarry Smith const PetscScalar *x,*v,*alpha; 27216861f193SBarry Smith PetscScalar *y; 27226861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 27236861f193SBarry Smith PetscInt n,i,k; 27246861f193SBarry Smith 27256861f193SBarry Smith PetscFunctionBegin; 27269566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 27279566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 27289566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 27296861f193SBarry Smith for (i=0; i<m; i++) { 27306861f193SBarry Smith idx = a->j + a->i[i]; 27316861f193SBarry Smith v = a->a + a->i[i]; 27326861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27336861f193SBarry Smith alpha = x + dof*i; 27346861f193SBarry Smith while (n-->0) { 27356861f193SBarry Smith for (k=0; k<dof; k++) { 27366861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27376861f193SBarry Smith } 273883ce7366SBarry Smith idx++; v++; 27396861f193SBarry Smith } 27406861f193SBarry Smith } 27419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*dof*a->nz)); 27429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 27439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 27446861f193SBarry Smith PetscFunctionReturn(0); 27456861f193SBarry Smith } 27466861f193SBarry Smith 2747f4a53059SBarry Smith /*===================================================================================*/ 2748dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2749f4a53059SBarry Smith { 27504cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2751f4a53059SBarry Smith 2752b24ad042SBarry Smith PetscFunctionBegin; 2753f4a53059SBarry Smith /* start the scatter */ 27549566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 27559566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ,xx,yy)); 27569566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 27579566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy)); 2758f4a53059SBarry Smith PetscFunctionReturn(0); 2759f4a53059SBarry Smith } 2760f4a53059SBarry Smith 2761dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 27624cb9d9b8SBarry Smith { 27634cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2764b24ad042SBarry Smith 27654cb9d9b8SBarry Smith PetscFunctionBegin; 27669566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w)); 27679566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy)); 27689566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE)); 27699566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE)); 27704cb9d9b8SBarry Smith PetscFunctionReturn(0); 27714cb9d9b8SBarry Smith } 27724cb9d9b8SBarry Smith 2773dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 27744cb9d9b8SBarry Smith { 27754cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 27764cb9d9b8SBarry Smith 2777b24ad042SBarry Smith PetscFunctionBegin; 27784cb9d9b8SBarry Smith /* start the scatter */ 27799566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 27809566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz)); 27819566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 27829566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz)); 27834cb9d9b8SBarry Smith PetscFunctionReturn(0); 27844cb9d9b8SBarry Smith } 27854cb9d9b8SBarry Smith 2786dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 27874cb9d9b8SBarry Smith { 27884cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2789b24ad042SBarry Smith 27904cb9d9b8SBarry Smith PetscFunctionBegin; 27919566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w)); 27929566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz)); 27939566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE)); 27949566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE)); 27954cb9d9b8SBarry Smith PetscFunctionReturn(0); 27964cb9d9b8SBarry Smith } 27974cb9d9b8SBarry Smith 279895ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 27994222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 28004222ddf1SHong Zhang { 28014222ddf1SHong Zhang Mat_Product *product = C->product; 28024222ddf1SHong Zhang 28034222ddf1SHong Zhang PetscFunctionBegin; 28044222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 28054222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 280698921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]); 28074222ddf1SHong Zhang PetscFunctionReturn(0); 28084222ddf1SHong Zhang } 28094222ddf1SHong Zhang 28104222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 28114222ddf1SHong Zhang { 28124222ddf1SHong Zhang PetscErrorCode ierr; 28134222ddf1SHong Zhang Mat_Product *product = C->product; 28144222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 28154222ddf1SHong Zhang Mat A=product->A,P=product->B; 28164222ddf1SHong Zhang PetscInt alg=1; /* set default algorithm */ 28174222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 28184222ddf1SHong Zhang const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"}; 28194222ddf1SHong Zhang PetscInt nalg=4; 28204222ddf1SHong Zhang #else 28214222ddf1SHong Zhang const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"}; 28224222ddf1SHong Zhang PetscInt nalg=5; 28234222ddf1SHong Zhang #endif 28244222ddf1SHong Zhang 28254222ddf1SHong Zhang PetscFunctionBegin; 2826*08401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]); 28274222ddf1SHong Zhang 28284222ddf1SHong Zhang /* PtAP */ 28294222ddf1SHong Zhang /* Check matrix local sizes */ 28302c71b3e2SJacob Faibussowitsch PetscCheckFalse(A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); 28312c71b3e2SJacob Faibussowitsch PetscCheckFalse(A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); 28324222ddf1SHong Zhang 28334222ddf1SHong Zhang /* Set the default algorithm */ 28349566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 28354222ddf1SHong Zhang if (flg) { 28369566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 28374222ddf1SHong Zhang } 28384222ddf1SHong Zhang 28394222ddf1SHong Zhang /* Get runtime option */ 28409566063dSJacob Faibussowitsch ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr); 28419566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg)); 28424222ddf1SHong Zhang if (flg) { 28439566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 28444222ddf1SHong Zhang } 28459566063dSJacob Faibussowitsch ierr = PetscOptionsEnd();PetscCall(ierr); 28464222ddf1SHong Zhang 28479566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg,"allatonce",&flg)); 28484222ddf1SHong Zhang if (flg) { 28494222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 28504222ddf1SHong Zhang PetscFunctionReturn(0); 28514222ddf1SHong Zhang } 28524222ddf1SHong Zhang 28539566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg,"allatonce_merged",&flg)); 28544222ddf1SHong Zhang if (flg) { 28554222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 28564222ddf1SHong Zhang PetscFunctionReturn(0); 28574222ddf1SHong Zhang } 28584222ddf1SHong Zhang 28594222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 28609566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 28619566063dSJacob Faibussowitsch PetscCall(MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P)); 28629566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 28634222ddf1SHong Zhang PetscFunctionReturn(0); 28644222ddf1SHong Zhang } 28654222ddf1SHong Zhang 28664222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 28674222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C) 28687ba1a0bfSKris Buschelman { 28690298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 28707ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 28717ba1a0bfSKris Buschelman Mat P =pp->AIJ; 28727ba1a0bfSKris Buschelman Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2873d2840e09SBarry Smith PetscInt *pti,*ptj,*ptJ; 28747ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2875d2840e09SBarry Smith const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2876d2840e09SBarry Smith PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 28777ba1a0bfSKris Buschelman MatScalar *ca; 2878d2840e09SBarry Smith const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 28797ba1a0bfSKris Buschelman 28807ba1a0bfSKris Buschelman PetscFunctionBegin; 28817ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 28829566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 28837ba1a0bfSKris Buschelman 28847ba1a0bfSKris Buschelman cn = pn*ppdof; 28857ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 28867ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 28879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn+1,&ci)); 28887ba1a0bfSKris Buschelman ci[0] = 0; 28897ba1a0bfSKris Buschelman 28907ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 28919566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow)); 28929566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow,an)); 28939566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow,cn)); 28947ba1a0bfSKris Buschelman 28957ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 28967ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 28977ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 28989566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space)); 28997ba1a0bfSKris Buschelman current_space = free_space; 29007ba1a0bfSKris Buschelman 29017ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 29027ba1a0bfSKris Buschelman for (i=0; i<pn; i++) { 29037ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 29047ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 29057ba1a0bfSKris Buschelman for (dof=0; dof<ppdof; dof++) { 29067ba1a0bfSKris Buschelman ptanzi = 0; 29077ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 29087ba1a0bfSKris Buschelman for (j=0; j<ptnzi; j++) { 29097ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 29107ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 29117ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 29127ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 29137ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 29147ba1a0bfSKris Buschelman for (k=0; k<anzj; k++) { 29157ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 29167ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 29177ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 29187ba1a0bfSKris Buschelman } 29197ba1a0bfSKris Buschelman } 29207ba1a0bfSKris Buschelman } 29217ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 29227ba1a0bfSKris Buschelman ptaj = ptasparserow; 29237ba1a0bfSKris Buschelman cnzi = 0; 29247ba1a0bfSKris Buschelman for (j=0; j<ptanzi; j++) { 29257ba1a0bfSKris Buschelman /* Get offset within block of P */ 29267ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 29277ba1a0bfSKris Buschelman /* Get block row of P */ 29287ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 29297ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 29307ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 29317ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29327ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 29337ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 29347ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 29357ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 29367ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 29377ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 29387ba1a0bfSKris Buschelman } 29397ba1a0bfSKris Buschelman } 29407ba1a0bfSKris Buschelman } 29417ba1a0bfSKris Buschelman 29427ba1a0bfSKris Buschelman /* sort sparserow */ 29439566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi,sparserow)); 29447ba1a0bfSKris Buschelman 29457ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 29467ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 29477ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 29489566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 29497ba1a0bfSKris Buschelman } 29507ba1a0bfSKris Buschelman 29517ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 29529566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array,sparserow,cnzi)); 295326fbe8dcSKarl Rupp 29547ba1a0bfSKris Buschelman current_space->array += cnzi; 29557ba1a0bfSKris Buschelman current_space->local_used += cnzi; 29567ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 29577ba1a0bfSKris Buschelman 295826fbe8dcSKarl Rupp for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 295926fbe8dcSKarl Rupp for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 296026fbe8dcSKarl Rupp 29617ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 29627ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 29637ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 29647ba1a0bfSKris Buschelman } 29657ba1a0bfSKris Buschelman } 29667ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 29677ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 29687ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 29699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn]+1,&cj)); 29709566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 29719566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow,ptasparserow,denserow,sparserow)); 29727ba1a0bfSKris Buschelman 29737ba1a0bfSKris Buschelman /* Allocate space for ca */ 29749566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn]+1,&ca)); 29757ba1a0bfSKris Buschelman 29767ba1a0bfSKris Buschelman /* put together the new matrix */ 29779566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C)); 29789566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C,pp->dof)); 29797ba1a0bfSKris Buschelman 29807ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 29817ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 29824222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 2983e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2984e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 29857ba1a0bfSKris Buschelman c->nonew = 0; 298626fbe8dcSKarl Rupp 29874222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29884222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 29897ba1a0bfSKris Buschelman 29907ba1a0bfSKris Buschelman /* Clean up. */ 29919566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 29927ba1a0bfSKris Buschelman PetscFunctionReturn(0); 29937ba1a0bfSKris Buschelman } 29947ba1a0bfSKris Buschelman 29957ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 29967ba1a0bfSKris Buschelman { 29977ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 29987ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 29997ba1a0bfSKris Buschelman Mat P =pp->AIJ; 30007ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 30017ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 30027ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3003a2ea699eSBarry Smith const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3004d2840e09SBarry Smith const PetscInt *ci=c->i,*cj=c->j,*cjj; 3005d2840e09SBarry Smith const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3006d2840e09SBarry Smith PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3007a2ea699eSBarry Smith const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3008d2840e09SBarry Smith MatScalar *ca=c->a,*caj,*apa; 30097ba1a0bfSKris Buschelman 30107ba1a0bfSKris Buschelman PetscFunctionBegin; 30117ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 30129566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense)); 30137ba1a0bfSKris Buschelman 30147ba1a0bfSKris Buschelman /* Clear old values in C */ 30159566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca,ci[cm])); 30167ba1a0bfSKris Buschelman 30177ba1a0bfSKris Buschelman for (i=0; i<am; i++) { 30187ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 30197ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 30207ba1a0bfSKris Buschelman apnzj = 0; 30217ba1a0bfSKris Buschelman for (j=0; j<anzi; j++) { 30227ba1a0bfSKris Buschelman /* Get offset within block of P */ 30237ba1a0bfSKris Buschelman pshift = *aj%ppdof; 30247ba1a0bfSKris Buschelman /* Get block row of P */ 30257ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 30267ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 30277ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30287ba1a0bfSKris Buschelman paj = pa + pi[prow]; 30297ba1a0bfSKris Buschelman for (k=0; k<pnzj; k++) { 30307ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 30317ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 30327ba1a0bfSKris Buschelman apjdense[poffset] = -1; 30337ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 30347ba1a0bfSKris Buschelman } 30357ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 30367ba1a0bfSKris Buschelman } 30379566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*pnzj)); 30387ba1a0bfSKris Buschelman aa++; 30397ba1a0bfSKris Buschelman } 30407ba1a0bfSKris Buschelman 30417ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 30427ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 30439566063dSJacob Faibussowitsch PetscCall(PetscSortInt(apnzj,apj)); 30447ba1a0bfSKris Buschelman 30457ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 30467ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 30477ba1a0bfSKris Buschelman pshift = i%ppdof; 30487ba1a0bfSKris Buschelman poffset = pi[prow]; 30497ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 30507ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 30517ba1a0bfSKris Buschelman pJ = pj+poffset; 30527ba1a0bfSKris Buschelman pA = pa+poffset; 30537ba1a0bfSKris Buschelman for (j=0; j<pnzi; j++) { 30547ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 30557ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 30567ba1a0bfSKris Buschelman caj = ca + ci[crow]; 30577ba1a0bfSKris Buschelman pJ++; 30587ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 30597ba1a0bfSKris Buschelman for (k=0,nextap=0; nextap<apnzj; k++) { 306026fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 30617ba1a0bfSKris Buschelman } 30629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*apnzj)); 30637ba1a0bfSKris Buschelman pA++; 30647ba1a0bfSKris Buschelman } 30657ba1a0bfSKris Buschelman 30667ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 30677ba1a0bfSKris Buschelman for (j=0; j<apnzj; j++) { 30687ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 30697ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 30707ba1a0bfSKris Buschelman } 30717ba1a0bfSKris Buschelman } 30727ba1a0bfSKris Buschelman 30737ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 30749566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 30759566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 30769566063dSJacob Faibussowitsch PetscCall(PetscFree3(apa,apj,apjdense)); 307795ddefa2SHong Zhang PetscFunctionReturn(0); 307895ddefa2SHong Zhang } 30797ba1a0bfSKris Buschelman 30804222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 30812121bac1SHong Zhang { 30824222ddf1SHong Zhang Mat_Product *product = C->product; 30834222ddf1SHong Zhang Mat A=product->A,P=product->B; 30842121bac1SHong Zhang 30852121bac1SHong Zhang PetscFunctionBegin; 30869566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C)); 30872121bac1SHong Zhang PetscFunctionReturn(0); 30882121bac1SHong Zhang } 30892121bac1SHong Zhang 3090f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 309195ddefa2SHong Zhang { 309295ddefa2SHong Zhang PetscFunctionBegin; 30933e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 309495ddefa2SHong Zhang } 309595ddefa2SHong Zhang 3096f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 309795ddefa2SHong Zhang { 309895ddefa2SHong Zhang PetscFunctionBegin; 30993e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 31007ba1a0bfSKris Buschelman } 31015c65b9ecSFande Kong 3102bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat); 3103bc8e477aSFande Kong 3104bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C) 3105bc8e477aSFande Kong { 3106bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3107bc8e477aSFande Kong 3108bc8e477aSFande Kong PetscFunctionBegin; 310934bcad68SFande Kong 31109566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C)); 3111bc8e477aSFande Kong PetscFunctionReturn(0); 3112bc8e477aSFande Kong } 3113bc8e477aSFande Kong 31144222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat); 3115bc8e477aSFande Kong 31164222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C) 3117bc8e477aSFande Kong { 3118bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3119bc8e477aSFande Kong 3120bc8e477aSFande Kong PetscFunctionBegin; 31219566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C)); 31224222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3123bc8e477aSFande Kong PetscFunctionReturn(0); 3124bc8e477aSFande Kong } 3125bc8e477aSFande Kong 3126bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat); 3127bc8e477aSFande Kong 3128bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C) 3129bc8e477aSFande Kong { 3130bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3131bc8e477aSFande Kong 3132bc8e477aSFande Kong PetscFunctionBegin; 313334bcad68SFande Kong 31349566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C)); 3135bc8e477aSFande Kong PetscFunctionReturn(0); 3136bc8e477aSFande Kong } 3137bc8e477aSFande Kong 31384222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat); 3139bc8e477aSFande Kong 31404222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C) 3141bc8e477aSFande Kong { 3142bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3143bc8e477aSFande Kong 3144bc8e477aSFande Kong PetscFunctionBegin; 314534bcad68SFande Kong 31469566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C)); 31474222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3148bc8e477aSFande Kong PetscFunctionReturn(0); 3149bc8e477aSFande Kong } 3150bc8e477aSFande Kong 31514222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3152bc8e477aSFande Kong { 31534222ddf1SHong Zhang Mat_Product *product = C->product; 31544222ddf1SHong Zhang Mat A=product->A,P=product->B; 31554222ddf1SHong Zhang PetscBool flg; 3156bc8e477aSFande Kong 3157bc8e477aSFande Kong PetscFunctionBegin; 31589566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"allatonce",&flg)); 31594222ddf1SHong Zhang if (flg) { 31609566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C)); 31614222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 31624222ddf1SHong Zhang PetscFunctionReturn(0); 3163bc8e477aSFande Kong } 316434bcad68SFande Kong 31659566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"allatonce_merged",&flg)); 31664222ddf1SHong Zhang if (flg) { 31679566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C)); 31684222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 31694222ddf1SHong Zhang PetscFunctionReturn(0); 31704222ddf1SHong Zhang } 317134bcad68SFande Kong 31724222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 3173bc8e477aSFande Kong } 3174bc8e477aSFande Kong 3175cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 31769c4fc4c7SBarry Smith { 31779c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3178ceb03754SKris Buschelman Mat a = b->AIJ,B; 31799c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 31800fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3181ba8c8a56SBarry Smith PetscInt *cols; 3182ba8c8a56SBarry Smith PetscScalar *vals; 31839c4fc4c7SBarry Smith 31849c4fc4c7SBarry Smith PetscFunctionBegin; 31859566063dSJacob Faibussowitsch PetscCall(MatGetSize(a,&m,&n)); 31869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof*m,&ilen)); 31879c4fc4c7SBarry Smith for (i=0; i<m; i++) { 31889c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 318926fbe8dcSKarl Rupp for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 31909c4fc4c7SBarry Smith } 31919566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 31929566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B,dof*m,dof*n,dof*m,dof*n)); 31939566063dSJacob Faibussowitsch PetscCall(MatSetType(B,newtype)); 31949566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B,0,ilen)); 31959566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 31969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax,&icols)); 31979c4fc4c7SBarry Smith ii = 0; 31989c4fc4c7SBarry Smith for (i=0; i<m; i++) { 31999566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals)); 32000fd73130SBarry Smith for (j=0; j<dof; j++) { 320126fbe8dcSKarl Rupp for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 32029566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES)); 32039c4fc4c7SBarry Smith ii++; 32049c4fc4c7SBarry Smith } 32059566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals)); 32069c4fc4c7SBarry Smith } 32079566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 32089566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 32099566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3210ceb03754SKris Buschelman 3211511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32129566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A,&B)); 3213c3d102feSKris Buschelman } else { 3214ceb03754SKris Buschelman *newmat = B; 3215c3d102feSKris Buschelman } 32169c4fc4c7SBarry Smith PetscFunctionReturn(0); 32179c4fc4c7SBarry Smith } 32189c4fc4c7SBarry Smith 3219c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3220be1d678aSKris Buschelman 3221cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32220fd73130SBarry Smith { 32230fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3224ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 32250fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 32260fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 32270fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 32280fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 32290298fd71SBarry Smith PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 32300298fd71SBarry Smith PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 32310fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 32320fd73130SBarry Smith PetscScalar *vals,*ovals; 32330fd73130SBarry Smith 32340fd73130SBarry Smith PetscFunctionBegin; 32359566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz)); 3236d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32370fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 32380fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 32390fd73130SBarry Smith for (j=0; j<dof; j++) { 32400fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 32410fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 32420fd73130SBarry Smith } 32430fd73130SBarry Smith } 32449566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 32459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 32469566063dSJacob Faibussowitsch PetscCall(MatSetType(B,newtype)); 32479566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B,0,dnz,0,onz)); 32489566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B,dof)); 32499566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz,onz)); 32500fd73130SBarry Smith 32519566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax,&icols,onmax,&oicols)); 3252d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 3253d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 32540fd73130SBarry Smith garray = mpiaij->garray; 32550fd73130SBarry Smith 32560fd73130SBarry Smith ii = rstart; 3257d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32589566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals)); 32599566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals)); 32600fd73130SBarry Smith for (j=0; j<dof; j++) { 32610fd73130SBarry Smith for (k=0; k<ncols; k++) { 32620fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 32630fd73130SBarry Smith } 32640fd73130SBarry Smith for (k=0; k<oncols; k++) { 32650fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 32660fd73130SBarry Smith } 32679566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES)); 32689566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES)); 32690fd73130SBarry Smith ii++; 32700fd73130SBarry Smith } 32719566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals)); 32729566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals)); 32730fd73130SBarry Smith } 32749566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols,oicols)); 32750fd73130SBarry Smith 32769566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 32779566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3278ceb03754SKris Buschelman 3279511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32807adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 32817adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 328226fbe8dcSKarl Rupp 32839566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A,&B)); 328426fbe8dcSKarl Rupp 32857adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3286c3d102feSKris Buschelman } else { 3287ceb03754SKris Buschelman *newmat = B; 3288c3d102feSKris Buschelman } 32890fd73130SBarry Smith PetscFunctionReturn(0); 32900fd73130SBarry Smith } 32910fd73130SBarry Smith 32927dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 32939e516c8fSBarry Smith { 32949e516c8fSBarry Smith Mat A; 32959e516c8fSBarry Smith 32969e516c8fSBarry Smith PetscFunctionBegin; 32979566063dSJacob Faibussowitsch PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 32989566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat)); 32999566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 33009e516c8fSBarry Smith PetscFunctionReturn(0); 33019e516c8fSBarry Smith } 33020fd73130SBarry Smith 3303ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3304ec626240SStefano Zampini { 3305ec626240SStefano Zampini Mat A; 3306ec626240SStefano Zampini 3307ec626240SStefano Zampini PetscFunctionBegin; 33089566063dSJacob Faibussowitsch PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 33099566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A,n,irow,icol,scall,submat)); 33109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 3311ec626240SStefano Zampini PetscFunctionReturn(0); 3312ec626240SStefano Zampini } 3313ec626240SStefano Zampini 3314bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3315480dffcfSJed Brown /*@ 33160bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 33170bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 33180bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 33190bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 33200bad9183SKris Buschelman 3321ff585edeSJed Brown Collective 3322ff585edeSJed Brown 3323ff585edeSJed Brown Input Parameters: 3324ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3325ff585edeSJed Brown - dof - the block size (number of components per node) 3326ff585edeSJed Brown 3327ff585edeSJed Brown Output Parameter: 3328ff585edeSJed Brown . maij - the new MAIJ matrix 3329ff585edeSJed Brown 33300bad9183SKris Buschelman Operations provided: 333167b8a455SSatish Balay .vb 333267b8a455SSatish Balay MatMult 333367b8a455SSatish Balay MatMultTranspose 333467b8a455SSatish Balay MatMultAdd 333567b8a455SSatish Balay MatMultTransposeAdd 333667b8a455SSatish Balay MatView 333767b8a455SSatish Balay .ve 33380bad9183SKris Buschelman 33390bad9183SKris Buschelman Level: advanced 33400bad9183SKris Buschelman 3341b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3342ff585edeSJed Brown @*/ 33437087cfbeSBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 334482b1193eSBarry Smith { 3345b24ad042SBarry Smith PetscMPIInt size; 3346b24ad042SBarry Smith PetscInt n; 334782b1193eSBarry Smith Mat B; 3348fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3349fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3350fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3351fdc842d1SBarry Smith #endif 335282b1193eSBarry Smith 335382b1193eSBarry Smith PetscFunctionBegin; 3354fdc842d1SBarry Smith dof = PetscAbs(dof); 33559566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 3356d72c5c08SBarry Smith 335726fbe8dcSKarl Rupp if (dof == 1) *maij = A; 335826fbe8dcSKarl Rupp else { 33599566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3360c248e2fdSStefano Zampini /* propagate vec type */ 33619566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B,A->defaultvectype)); 33629566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N)); 33639566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap,dof)); 33649566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap,dof)); 33659566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 33669566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 336726fbe8dcSKarl Rupp 3368cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3369d72c5c08SBarry Smith 33709566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3371f4a53059SBarry Smith if (size == 1) { 3372feb9fe23SJed Brown Mat_SeqMAIJ *b; 3373feb9fe23SJed Brown 33749566063dSJacob Faibussowitsch PetscCall(MatSetType(B,MATSEQMAIJ)); 337526fbe8dcSKarl Rupp 33760298fd71SBarry Smith B->ops->setup = NULL; 33774cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 33780fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 33794222ddf1SHong Zhang 3380feb9fe23SJed Brown b = (Mat_SeqMAIJ*)B->data; 3381b9b97703SBarry Smith b->dof = dof; 33824cb9d9b8SBarry Smith b->AIJ = A; 338326fbe8dcSKarl Rupp 3384d72c5c08SBarry Smith if (dof == 2) { 3385cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3386cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3387cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3388cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3389bcc973b7SBarry Smith } else if (dof == 3) { 3390bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3391bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3392bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3393bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3394bcc973b7SBarry Smith } else if (dof == 4) { 3395bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3396bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3397bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3398bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3399f9fae5adSBarry Smith } else if (dof == 5) { 3400f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3401f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3402f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3403f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 34046cd98798SBarry Smith } else if (dof == 6) { 34056cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 34066cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 34076cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 34086cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3409ed8eea03SBarry Smith } else if (dof == 7) { 3410ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3411ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3412ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3413ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 341466ed3db0SBarry Smith } else if (dof == 8) { 341566ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 341666ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 341766ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 341866ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 34192b692628SMatthew Knepley } else if (dof == 9) { 34202b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 34212b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 34222b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 34232b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3424b9cda22cSBarry Smith } else if (dof == 10) { 3425b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3426b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3427b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3428b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3429dbdd7285SBarry Smith } else if (dof == 11) { 3430dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3431dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3432dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3433dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 34342f7816d4SBarry Smith } else if (dof == 16) { 34352f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 34362f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 34372f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 34382f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3439ed1c418cSBarry Smith } else if (dof == 18) { 3440ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3441ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3442ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3443ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 344482b1193eSBarry Smith } else { 34456861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 34466861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 34476861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 34486861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 344982b1193eSBarry Smith } 3450fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 34519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ)); 3452fdc842d1SBarry Smith #endif 34539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ)); 34549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3455f4a53059SBarry Smith } else { 3456f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3457feb9fe23SJed Brown Mat_MPIMAIJ *b; 3458f4a53059SBarry Smith IS from,to; 3459f4a53059SBarry Smith Vec gvec; 3460f4a53059SBarry Smith 34619566063dSJacob Faibussowitsch PetscCall(MatSetType(B,MATMPIMAIJ)); 346226fbe8dcSKarl Rupp 34630298fd71SBarry Smith B->ops->setup = NULL; 3464d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 34650fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 346626fbe8dcSKarl Rupp 3467b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3468b9b97703SBarry Smith b->dof = dof; 3469b9b97703SBarry Smith b->A = A; 347026fbe8dcSKarl Rupp 34719566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ)); 34729566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ)); 34734cb9d9b8SBarry Smith 34749566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec,&n)); 34759566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF,&b->w)); 34769566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w,n*dof,n*dof)); 34779566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w,dof)); 34789566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w,VECSEQ)); 3479f4a53059SBarry Smith 3480f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 34819566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from)); 34829566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to)); 3483f4a53059SBarry Smith 3484f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 34859566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec)); 3486f4a53059SBarry Smith 3487f4a53059SBarry Smith /* generate the scatter context */ 34889566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec,from,b->w,to,&b->ctx)); 3489f4a53059SBarry Smith 34909566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 34919566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 34929566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 3493f4a53059SBarry Smith 3494f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 34954cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 34964cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 34974cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 349826fbe8dcSKarl Rupp 3499fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 35009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ)); 3501fdc842d1SBarry Smith #endif 35029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ)); 35039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3504f4a53059SBarry Smith } 35057dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3506ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 35079566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3508fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3509fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 3510fdc842d1SBarry Smith { 3511fdc842d1SBarry Smith PetscBool flg; 3512fdc842d1SBarry Smith if (convert) { 35139566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"")); 3514fdc842d1SBarry Smith if (flg) { 35159566063dSJacob Faibussowitsch PetscCall(MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B)); 3516fdc842d1SBarry Smith } 3517fdc842d1SBarry Smith } 3518fdc842d1SBarry Smith } 3519fdc842d1SBarry Smith #endif 3520cd3bbe55SBarry Smith *maij = B; 3521d72c5c08SBarry Smith } 352282b1193eSBarry Smith PetscFunctionReturn(0); 352382b1193eSBarry Smith } 3524