1be1d678aSKris Buschelman 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 2182b1193eSBarry Smith 22b350b9acSSatish Balay /*@ 23ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24ff585edeSJed Brown 25ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26ff585edeSJed Brown 27ff585edeSJed Brown Input Parameter: 28ff585edeSJed Brown . A - the MAIJ matrix 29ff585edeSJed Brown 30ff585edeSJed Brown Output Parameter: 31ff585edeSJed Brown . B - the AIJ matrix 32ff585edeSJed Brown 33ff585edeSJed Brown Level: advanced 34ff585edeSJed Brown 3595452b02SPatrick Sanan Notes: 3695452b02SPatrick Sanan The reference count on the AIJ matrix is not increased so you should not destroy it. 37ff585edeSJed Brown 38db781477SPatrick Sanan .seealso: `MatCreateMAIJ()` 39ff585edeSJed Brown @*/ 407087cfbeSBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41b9b97703SBarry Smith { 42ace3abfcSBarry Smith PetscBool ismpimaij,isseqmaij; 43b9b97703SBarry Smith 44b9b97703SBarry Smith PetscFunctionBegin; 459566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij)); 469566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij)); 47b9b97703SBarry Smith if (ismpimaij) { 48b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 49b9b97703SBarry Smith 50b9b97703SBarry Smith *B = b->A; 51b9b97703SBarry Smith } else if (isseqmaij) { 52b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 53b9b97703SBarry Smith 54b9b97703SBarry Smith *B = b->AIJ; 55b9b97703SBarry Smith } else { 56b9b97703SBarry Smith *B = A; 57b9b97703SBarry Smith } 58b9b97703SBarry Smith PetscFunctionReturn(0); 59b9b97703SBarry Smith } 60b9b97703SBarry Smith 61480dffcfSJed Brown /*@ 62ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 63ff585edeSJed Brown 643f9fe445SBarry Smith Logically Collective 65ff585edeSJed Brown 66d8d19677SJose E. Roman Input Parameters: 67ff585edeSJed Brown + A - the MAIJ matrix 68ff585edeSJed Brown - dof - the block size for the new matrix 69ff585edeSJed Brown 70ff585edeSJed Brown Output Parameter: 71ff585edeSJed Brown . B - the new MAIJ matrix 72ff585edeSJed Brown 73ff585edeSJed Brown Level: advanced 74ff585edeSJed Brown 75db781477SPatrick Sanan .seealso: `MatCreateMAIJ()` 76ff585edeSJed Brown @*/ 777087cfbeSBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 78b9b97703SBarry Smith { 790298fd71SBarry Smith Mat Aij = NULL; 80b9b97703SBarry Smith 81b9b97703SBarry Smith PetscFunctionBegin; 82c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A,dof,2); 839566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A,&Aij)); 849566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij,dof,B)); 85b9b97703SBarry Smith PetscFunctionReturn(0); 86b9b97703SBarry Smith } 87b9b97703SBarry Smith 88dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 8982b1193eSBarry Smith { 904cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9182b1193eSBarry Smith 9282b1193eSBarry Smith PetscFunctionBegin; 939566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 949566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 95*2e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaijcusparse_C",NULL)); 969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL)); 979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL)); 984cb9d9b8SBarry Smith PetscFunctionReturn(0); 994cb9d9b8SBarry Smith } 1004cb9d9b8SBarry Smith 101356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A) 102356c569eSBarry Smith { 103356c569eSBarry Smith PetscFunctionBegin; 104ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 105356c569eSBarry Smith } 106356c569eSBarry Smith 1070fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1080fd73130SBarry Smith { 1090fd73130SBarry Smith Mat B; 1100fd73130SBarry Smith 1110fd73130SBarry Smith PetscFunctionBegin; 1129566063dSJacob Faibussowitsch PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1139566063dSJacob Faibussowitsch PetscCall(MatView(B,viewer)); 1149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1150fd73130SBarry Smith PetscFunctionReturn(0); 1160fd73130SBarry Smith } 1170fd73130SBarry Smith 1180fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1190fd73130SBarry Smith { 1200fd73130SBarry Smith Mat B; 1210fd73130SBarry Smith 1220fd73130SBarry Smith PetscFunctionBegin; 1239566063dSJacob Faibussowitsch PetscCall(MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B)); 1249566063dSJacob Faibussowitsch PetscCall(MatView(B,viewer)); 1259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1260fd73130SBarry Smith PetscFunctionReturn(0); 1270fd73130SBarry Smith } 1280fd73130SBarry Smith 129dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1304cb9d9b8SBarry Smith { 1314cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1324cb9d9b8SBarry Smith 1334cb9d9b8SBarry Smith PetscFunctionBegin; 1349566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1359566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1369566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1379566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1389566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1399566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 140*2e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaijcusparse_C",NULL)); 1419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL)); 1429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL)); 1439566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL)); 144b9b97703SBarry Smith PetscFunctionReturn(0); 145b9b97703SBarry Smith } 146b9b97703SBarry Smith 1470bad9183SKris Buschelman /*MC 148fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1490bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1500bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1510bad9183SKris Buschelman 1520bad9183SKris Buschelman Operations provided: 15367b8a455SSatish Balay .vb 15467b8a455SSatish Balay MatMult 15567b8a455SSatish Balay MatMultTranspose 15667b8a455SSatish Balay MatMultAdd 15767b8a455SSatish Balay MatMultTransposeAdd 15867b8a455SSatish Balay .ve 1590bad9183SKris Buschelman 1600bad9183SKris Buschelman Level: advanced 1610bad9183SKris Buschelman 162db781477SPatrick Sanan .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1630bad9183SKris Buschelman M*/ 1640bad9183SKris Buschelman 1658cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 16682b1193eSBarry Smith { 1674cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 16864b52464SHong Zhang PetscMPIInt size; 16982b1193eSBarry Smith 17082b1193eSBarry Smith PetscFunctionBegin; 1719566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A,&b)); 172b0a32e0cSBarry Smith A->data = (void*)b; 17326fbe8dcSKarl Rupp 1749566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps))); 17526fbe8dcSKarl Rupp 176356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 177f4a53059SBarry Smith 178f4259b30SLisandro Dalcin b->AIJ = NULL; 179cd3bbe55SBarry Smith b->dof = 0; 180f4259b30SLisandro Dalcin b->OAIJ = NULL; 181f4259b30SLisandro Dalcin b->ctx = NULL; 182f4259b30SLisandro Dalcin b->w = NULL; 1839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 18464b52464SHong Zhang if (size == 1) { 1859566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ)); 18664b52464SHong Zhang } else { 1879566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ)); 18864b52464SHong Zhang } 18932e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 19032e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 19182b1193eSBarry Smith PetscFunctionReturn(0); 19282b1193eSBarry Smith } 19382b1193eSBarry Smith 194cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 195dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 19682b1193eSBarry Smith { 1974cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 198bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 199d2840e09SBarry Smith const PetscScalar *x,*v; 200d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 201d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 202d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 20382b1193eSBarry Smith 204bcc973b7SBarry Smith PetscFunctionBegin; 2059566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2069566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 207bcc973b7SBarry Smith idx = a->j; 208bcc973b7SBarry Smith v = a->a; 209bcc973b7SBarry Smith ii = a->i; 210bcc973b7SBarry Smith 211bcc973b7SBarry Smith for (i=0; i<m; i++) { 212bcc973b7SBarry Smith jrow = ii[i]; 213bcc973b7SBarry Smith n = ii[i+1] - jrow; 214bcc973b7SBarry Smith sum1 = 0.0; 215bcc973b7SBarry Smith sum2 = 0.0; 21626fbe8dcSKarl Rupp 217b3c51c66SMatthew Knepley nonzerorow += (n>0); 218bcc973b7SBarry Smith for (j=0; j<n; j++) { 219bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 220bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 221bcc973b7SBarry Smith jrow++; 222bcc973b7SBarry Smith } 223bcc973b7SBarry Smith y[2*i] = sum1; 224bcc973b7SBarry Smith y[2*i+1] = sum2; 225bcc973b7SBarry Smith } 226bcc973b7SBarry Smith 2279566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0*a->nz - 2.0*nonzerorow)); 2289566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 2299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 23082b1193eSBarry Smith PetscFunctionReturn(0); 23182b1193eSBarry Smith } 232bcc973b7SBarry Smith 233dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 23482b1193eSBarry Smith { 2354cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 236bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 237d2840e09SBarry Smith const PetscScalar *x,*v; 238d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 239d2840e09SBarry Smith PetscInt n,i; 240d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 24182b1193eSBarry Smith 242bcc973b7SBarry Smith PetscFunctionBegin; 2439566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 2449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2459566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 246bfec09a0SHong Zhang 247bcc973b7SBarry Smith for (i=0; i<m; i++) { 248bfec09a0SHong Zhang idx = a->j + a->i[i]; 249bfec09a0SHong Zhang v = a->a + a->i[i]; 250bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 251bcc973b7SBarry Smith alpha1 = x[2*i]; 252bcc973b7SBarry Smith alpha2 = x[2*i+1]; 25326fbe8dcSKarl Rupp while (n-->0) { 25426fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 25526fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 25626fbe8dcSKarl Rupp idx++; v++; 25726fbe8dcSKarl Rupp } 258bcc973b7SBarry Smith } 2599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0*a->nz)); 2609566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 2619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 26282b1193eSBarry Smith PetscFunctionReturn(0); 26382b1193eSBarry Smith } 264bcc973b7SBarry Smith 265dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 26682b1193eSBarry Smith { 2674cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 268bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 269d2840e09SBarry Smith const PetscScalar *x,*v; 270d2840e09SBarry Smith PetscScalar *y,sum1, sum2; 271b24ad042SBarry Smith PetscInt n,i,jrow,j; 272d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27382b1193eSBarry Smith 274bcc973b7SBarry Smith PetscFunctionBegin; 2759566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 2769566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2779566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 278bcc973b7SBarry Smith idx = a->j; 279bcc973b7SBarry Smith v = a->a; 280bcc973b7SBarry Smith ii = a->i; 281bcc973b7SBarry Smith 282bcc973b7SBarry Smith for (i=0; i<m; i++) { 283bcc973b7SBarry Smith jrow = ii[i]; 284bcc973b7SBarry Smith n = ii[i+1] - jrow; 285bcc973b7SBarry Smith sum1 = 0.0; 286bcc973b7SBarry Smith sum2 = 0.0; 287bcc973b7SBarry Smith for (j=0; j<n; j++) { 288bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 289bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 290bcc973b7SBarry Smith jrow++; 291bcc973b7SBarry Smith } 292bcc973b7SBarry Smith y[2*i] += sum1; 293bcc973b7SBarry Smith y[2*i+1] += sum2; 294bcc973b7SBarry Smith } 295bcc973b7SBarry Smith 2969566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0*a->nz)); 2979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 2989566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 299bcc973b7SBarry Smith PetscFunctionReturn(0); 30082b1193eSBarry Smith } 301dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 30282b1193eSBarry Smith { 3034cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 304bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 305d2840e09SBarry Smith const PetscScalar *x,*v; 306d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 307d2840e09SBarry Smith PetscInt n,i; 308d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 30982b1193eSBarry Smith 310bcc973b7SBarry Smith PetscFunctionBegin; 3119566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 3129566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 3139566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 314bfec09a0SHong Zhang 315bcc973b7SBarry Smith for (i=0; i<m; i++) { 316bfec09a0SHong Zhang idx = a->j + a->i[i]; 317bfec09a0SHong Zhang v = a->a + a->i[i]; 318bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 319bcc973b7SBarry Smith alpha1 = x[2*i]; 320bcc973b7SBarry Smith alpha2 = x[2*i+1]; 32126fbe8dcSKarl Rupp while (n-->0) { 32226fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 32326fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 32426fbe8dcSKarl Rupp idx++; v++; 32526fbe8dcSKarl Rupp } 326bcc973b7SBarry Smith } 3279566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(4.0*a->nz)); 3289566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 3299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 330bcc973b7SBarry Smith PetscFunctionReturn(0); 33182b1193eSBarry Smith } 332cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 333dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 334bcc973b7SBarry Smith { 3354cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 336bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 337d2840e09SBarry Smith const PetscScalar *x,*v; 338d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 339d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 340d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 34182b1193eSBarry Smith 342bcc973b7SBarry Smith PetscFunctionBegin; 3439566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 3449566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 345bcc973b7SBarry Smith idx = a->j; 346bcc973b7SBarry Smith v = a->a; 347bcc973b7SBarry Smith ii = a->i; 348bcc973b7SBarry Smith 349bcc973b7SBarry Smith for (i=0; i<m; i++) { 350bcc973b7SBarry Smith jrow = ii[i]; 351bcc973b7SBarry Smith n = ii[i+1] - jrow; 352bcc973b7SBarry Smith sum1 = 0.0; 353bcc973b7SBarry Smith sum2 = 0.0; 354bcc973b7SBarry Smith sum3 = 0.0; 35526fbe8dcSKarl Rupp 356b3c51c66SMatthew Knepley nonzerorow += (n>0); 357bcc973b7SBarry Smith for (j=0; j<n; j++) { 358bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 359bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 360bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 361bcc973b7SBarry Smith jrow++; 362bcc973b7SBarry Smith } 363bcc973b7SBarry Smith y[3*i] = sum1; 364bcc973b7SBarry Smith y[3*i+1] = sum2; 365bcc973b7SBarry Smith y[3*i+2] = sum3; 366bcc973b7SBarry Smith } 367bcc973b7SBarry Smith 3689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0*a->nz - 3.0*nonzerorow)); 3699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 3709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 371bcc973b7SBarry Smith PetscFunctionReturn(0); 372bcc973b7SBarry Smith } 373bcc973b7SBarry Smith 374dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 375bcc973b7SBarry Smith { 3764cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 377bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 378d2840e09SBarry Smith const PetscScalar *x,*v; 379d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 380d2840e09SBarry Smith PetscInt n,i; 381d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 382bcc973b7SBarry Smith 383bcc973b7SBarry Smith PetscFunctionBegin; 3849566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 3859566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 3869566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 387bfec09a0SHong Zhang 388bcc973b7SBarry Smith for (i=0; i<m; i++) { 389bfec09a0SHong Zhang idx = a->j + a->i[i]; 390bfec09a0SHong Zhang v = a->a + a->i[i]; 391bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 392bcc973b7SBarry Smith alpha1 = x[3*i]; 393bcc973b7SBarry Smith alpha2 = x[3*i+1]; 394bcc973b7SBarry Smith alpha3 = x[3*i+2]; 395bcc973b7SBarry Smith while (n-->0) { 396bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 397bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 398bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 399bcc973b7SBarry Smith idx++; v++; 400bcc973b7SBarry Smith } 401bcc973b7SBarry Smith } 4029566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0*a->nz)); 4039566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4049566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 405bcc973b7SBarry Smith PetscFunctionReturn(0); 406bcc973b7SBarry Smith } 407bcc973b7SBarry Smith 408dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 409bcc973b7SBarry Smith { 4104cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 411bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 412d2840e09SBarry Smith const PetscScalar *x,*v; 413d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 414b24ad042SBarry Smith PetscInt n,i,jrow,j; 415d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 416bcc973b7SBarry Smith 417bcc973b7SBarry Smith PetscFunctionBegin; 4189566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 4199566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4209566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 421bcc973b7SBarry Smith idx = a->j; 422bcc973b7SBarry Smith v = a->a; 423bcc973b7SBarry Smith ii = a->i; 424bcc973b7SBarry Smith 425bcc973b7SBarry Smith for (i=0; i<m; i++) { 426bcc973b7SBarry Smith jrow = ii[i]; 427bcc973b7SBarry Smith n = ii[i+1] - jrow; 428bcc973b7SBarry Smith sum1 = 0.0; 429bcc973b7SBarry Smith sum2 = 0.0; 430bcc973b7SBarry Smith sum3 = 0.0; 431bcc973b7SBarry Smith for (j=0; j<n; j++) { 432bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 433bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 434bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 435bcc973b7SBarry Smith jrow++; 436bcc973b7SBarry Smith } 437bcc973b7SBarry Smith y[3*i] += sum1; 438bcc973b7SBarry Smith y[3*i+1] += sum2; 439bcc973b7SBarry Smith y[3*i+2] += sum3; 440bcc973b7SBarry Smith } 441bcc973b7SBarry Smith 4429566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0*a->nz)); 4439566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4449566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 445bcc973b7SBarry Smith PetscFunctionReturn(0); 446bcc973b7SBarry Smith } 447dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 448bcc973b7SBarry Smith { 4494cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 450bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 451d2840e09SBarry Smith const PetscScalar *x,*v; 452d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 453d2840e09SBarry Smith PetscInt n,i; 454d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 455bcc973b7SBarry Smith 456bcc973b7SBarry Smith PetscFunctionBegin; 4579566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 4589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4599566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 460bcc973b7SBarry Smith for (i=0; i<m; i++) { 461bfec09a0SHong Zhang idx = a->j + a->i[i]; 462bfec09a0SHong Zhang v = a->a + a->i[i]; 463bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 464bcc973b7SBarry Smith alpha1 = x[3*i]; 465bcc973b7SBarry Smith alpha2 = x[3*i+1]; 466bcc973b7SBarry Smith alpha3 = x[3*i+2]; 467bcc973b7SBarry Smith while (n-->0) { 468bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 469bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 470bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 471bcc973b7SBarry Smith idx++; v++; 472bcc973b7SBarry Smith } 473bcc973b7SBarry Smith } 4749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(6.0*a->nz)); 4759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 477bcc973b7SBarry Smith PetscFunctionReturn(0); 478bcc973b7SBarry Smith } 479bcc973b7SBarry Smith 480bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 481dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 482bcc973b7SBarry Smith { 4834cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 484bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 485d2840e09SBarry Smith const PetscScalar *x,*v; 486d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 487d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 488d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 489bcc973b7SBarry Smith 490bcc973b7SBarry Smith PetscFunctionBegin; 4919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4929566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 493bcc973b7SBarry Smith idx = a->j; 494bcc973b7SBarry Smith v = a->a; 495bcc973b7SBarry Smith ii = a->i; 496bcc973b7SBarry Smith 497bcc973b7SBarry Smith for (i=0; i<m; i++) { 498bcc973b7SBarry Smith jrow = ii[i]; 499bcc973b7SBarry Smith n = ii[i+1] - jrow; 500bcc973b7SBarry Smith sum1 = 0.0; 501bcc973b7SBarry Smith sum2 = 0.0; 502bcc973b7SBarry Smith sum3 = 0.0; 503bcc973b7SBarry Smith sum4 = 0.0; 504b3c51c66SMatthew Knepley nonzerorow += (n>0); 505bcc973b7SBarry Smith for (j=0; j<n; j++) { 506bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 507bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 508bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 509bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 510bcc973b7SBarry Smith jrow++; 511bcc973b7SBarry Smith } 512bcc973b7SBarry Smith y[4*i] = sum1; 513bcc973b7SBarry Smith y[4*i+1] = sum2; 514bcc973b7SBarry Smith y[4*i+2] = sum3; 515bcc973b7SBarry Smith y[4*i+3] = sum4; 516bcc973b7SBarry Smith } 517bcc973b7SBarry Smith 5189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0*a->nz - 4.0*nonzerorow)); 5199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 5209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 521bcc973b7SBarry Smith PetscFunctionReturn(0); 522bcc973b7SBarry Smith } 523bcc973b7SBarry Smith 524dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 525bcc973b7SBarry Smith { 5264cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 527bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 528d2840e09SBarry Smith const PetscScalar *x,*v; 529d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 530d2840e09SBarry Smith PetscInt n,i; 531d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 532bcc973b7SBarry Smith 533bcc973b7SBarry Smith PetscFunctionBegin; 5349566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 5359566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 5369566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 537bcc973b7SBarry Smith for (i=0; i<m; i++) { 538bfec09a0SHong Zhang idx = a->j + a->i[i]; 539bfec09a0SHong Zhang v = a->a + a->i[i]; 540bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 541bcc973b7SBarry Smith alpha1 = x[4*i]; 542bcc973b7SBarry Smith alpha2 = x[4*i+1]; 543bcc973b7SBarry Smith alpha3 = x[4*i+2]; 544bcc973b7SBarry Smith alpha4 = x[4*i+3]; 545bcc973b7SBarry Smith while (n-->0) { 546bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 547bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 548bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 549bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 550bcc973b7SBarry Smith idx++; v++; 551bcc973b7SBarry Smith } 552bcc973b7SBarry Smith } 5539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0*a->nz)); 5549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 5559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 556bcc973b7SBarry Smith PetscFunctionReturn(0); 557bcc973b7SBarry Smith } 558bcc973b7SBarry Smith 559dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 560bcc973b7SBarry Smith { 5614cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 562f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 563d2840e09SBarry Smith const PetscScalar *x,*v; 564d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 565b24ad042SBarry Smith PetscInt n,i,jrow,j; 566d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 567f9fae5adSBarry Smith 568f9fae5adSBarry Smith PetscFunctionBegin; 5699566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 5709566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 5719566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 572f9fae5adSBarry Smith idx = a->j; 573f9fae5adSBarry Smith v = a->a; 574f9fae5adSBarry Smith ii = a->i; 575f9fae5adSBarry Smith 576f9fae5adSBarry Smith for (i=0; i<m; i++) { 577f9fae5adSBarry Smith jrow = ii[i]; 578f9fae5adSBarry Smith n = ii[i+1] - jrow; 579f9fae5adSBarry Smith sum1 = 0.0; 580f9fae5adSBarry Smith sum2 = 0.0; 581f9fae5adSBarry Smith sum3 = 0.0; 582f9fae5adSBarry Smith sum4 = 0.0; 583f9fae5adSBarry Smith for (j=0; j<n; j++) { 584f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 585f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 586f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 587f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 588f9fae5adSBarry Smith jrow++; 589f9fae5adSBarry Smith } 590f9fae5adSBarry Smith y[4*i] += sum1; 591f9fae5adSBarry Smith y[4*i+1] += sum2; 592f9fae5adSBarry Smith y[4*i+2] += sum3; 593f9fae5adSBarry Smith y[4*i+3] += sum4; 594f9fae5adSBarry Smith } 595f9fae5adSBarry Smith 5969566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0*a->nz)); 5979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 5989566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 599f9fae5adSBarry Smith PetscFunctionReturn(0); 600bcc973b7SBarry Smith } 601dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 602bcc973b7SBarry Smith { 6034cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 604f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 605d2840e09SBarry Smith const PetscScalar *x,*v; 606d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 607d2840e09SBarry Smith PetscInt n,i; 608d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 609f9fae5adSBarry Smith 610f9fae5adSBarry Smith PetscFunctionBegin; 6119566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 6129566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 6139566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 614bfec09a0SHong Zhang 615f9fae5adSBarry Smith for (i=0; i<m; i++) { 616bfec09a0SHong Zhang idx = a->j + a->i[i]; 617bfec09a0SHong Zhang v = a->a + a->i[i]; 618f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 619f9fae5adSBarry Smith alpha1 = x[4*i]; 620f9fae5adSBarry Smith alpha2 = x[4*i+1]; 621f9fae5adSBarry Smith alpha3 = x[4*i+2]; 622f9fae5adSBarry Smith alpha4 = x[4*i+3]; 623f9fae5adSBarry Smith while (n-->0) { 624f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 625f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 626f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 627f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 628f9fae5adSBarry Smith idx++; v++; 629f9fae5adSBarry Smith } 630f9fae5adSBarry Smith } 6319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(8.0*a->nz)); 6329566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 6339566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 634f9fae5adSBarry Smith PetscFunctionReturn(0); 635f9fae5adSBarry Smith } 636f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6376cd98798SBarry Smith 638dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 639f9fae5adSBarry Smith { 6404cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 641f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 642d2840e09SBarry Smith const PetscScalar *x,*v; 643d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 644d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 645d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 646f9fae5adSBarry Smith 647f9fae5adSBarry Smith PetscFunctionBegin; 6489566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 6499566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 650f9fae5adSBarry Smith idx = a->j; 651f9fae5adSBarry Smith v = a->a; 652f9fae5adSBarry Smith ii = a->i; 653f9fae5adSBarry Smith 654f9fae5adSBarry Smith for (i=0; i<m; i++) { 655f9fae5adSBarry Smith jrow = ii[i]; 656f9fae5adSBarry Smith n = ii[i+1] - jrow; 657f9fae5adSBarry Smith sum1 = 0.0; 658f9fae5adSBarry Smith sum2 = 0.0; 659f9fae5adSBarry Smith sum3 = 0.0; 660f9fae5adSBarry Smith sum4 = 0.0; 661f9fae5adSBarry Smith sum5 = 0.0; 66226fbe8dcSKarl Rupp 663b3c51c66SMatthew Knepley nonzerorow += (n>0); 664f9fae5adSBarry Smith for (j=0; j<n; j++) { 665f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 666f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 667f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 668f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 669f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 670f9fae5adSBarry Smith jrow++; 671f9fae5adSBarry Smith } 672f9fae5adSBarry Smith y[5*i] = sum1; 673f9fae5adSBarry Smith y[5*i+1] = sum2; 674f9fae5adSBarry Smith y[5*i+2] = sum3; 675f9fae5adSBarry Smith y[5*i+3] = sum4; 676f9fae5adSBarry Smith y[5*i+4] = sum5; 677f9fae5adSBarry Smith } 678f9fae5adSBarry Smith 6799566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0*a->nz - 5.0*nonzerorow)); 6809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 6819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 682f9fae5adSBarry Smith PetscFunctionReturn(0); 683f9fae5adSBarry Smith } 684f9fae5adSBarry Smith 685dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 686f9fae5adSBarry Smith { 6874cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 688f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 689d2840e09SBarry Smith const PetscScalar *x,*v; 690d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 691d2840e09SBarry Smith PetscInt n,i; 692d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 693f9fae5adSBarry Smith 694f9fae5adSBarry Smith PetscFunctionBegin; 6959566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 6969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 6979566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 698bfec09a0SHong Zhang 699f9fae5adSBarry Smith for (i=0; i<m; i++) { 700bfec09a0SHong Zhang idx = a->j + a->i[i]; 701bfec09a0SHong Zhang v = a->a + a->i[i]; 702f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 703f9fae5adSBarry Smith alpha1 = x[5*i]; 704f9fae5adSBarry Smith alpha2 = x[5*i+1]; 705f9fae5adSBarry Smith alpha3 = x[5*i+2]; 706f9fae5adSBarry Smith alpha4 = x[5*i+3]; 707f9fae5adSBarry Smith alpha5 = x[5*i+4]; 708f9fae5adSBarry Smith while (n-->0) { 709f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 710f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 711f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 712f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 713f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 714f9fae5adSBarry Smith idx++; v++; 715f9fae5adSBarry Smith } 716f9fae5adSBarry Smith } 7179566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0*a->nz)); 7189566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 7199566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 720f9fae5adSBarry Smith PetscFunctionReturn(0); 721f9fae5adSBarry Smith } 722f9fae5adSBarry Smith 723dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 724f9fae5adSBarry Smith { 7254cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 726f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 727d2840e09SBarry Smith const PetscScalar *x,*v; 728d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 729b24ad042SBarry Smith PetscInt n,i,jrow,j; 730d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 731f9fae5adSBarry Smith 732f9fae5adSBarry Smith PetscFunctionBegin; 7339566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 7349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 7359566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 736f9fae5adSBarry Smith idx = a->j; 737f9fae5adSBarry Smith v = a->a; 738f9fae5adSBarry Smith ii = a->i; 739f9fae5adSBarry Smith 740f9fae5adSBarry Smith for (i=0; i<m; i++) { 741f9fae5adSBarry Smith jrow = ii[i]; 742f9fae5adSBarry Smith n = ii[i+1] - jrow; 743f9fae5adSBarry Smith sum1 = 0.0; 744f9fae5adSBarry Smith sum2 = 0.0; 745f9fae5adSBarry Smith sum3 = 0.0; 746f9fae5adSBarry Smith sum4 = 0.0; 747f9fae5adSBarry Smith sum5 = 0.0; 748f9fae5adSBarry Smith for (j=0; j<n; j++) { 749f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 750f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 751f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 752f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 753f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 754f9fae5adSBarry Smith jrow++; 755f9fae5adSBarry Smith } 756f9fae5adSBarry Smith y[5*i] += sum1; 757f9fae5adSBarry Smith y[5*i+1] += sum2; 758f9fae5adSBarry Smith y[5*i+2] += sum3; 759f9fae5adSBarry Smith y[5*i+3] += sum4; 760f9fae5adSBarry Smith y[5*i+4] += sum5; 761f9fae5adSBarry Smith } 762f9fae5adSBarry Smith 7639566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0*a->nz)); 7649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 7659566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 766f9fae5adSBarry Smith PetscFunctionReturn(0); 767f9fae5adSBarry Smith } 768f9fae5adSBarry Smith 769dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 770f9fae5adSBarry Smith { 7714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 772f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 773d2840e09SBarry Smith const PetscScalar *x,*v; 774d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 775d2840e09SBarry Smith PetscInt n,i; 776d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 777f9fae5adSBarry Smith 778f9fae5adSBarry Smith PetscFunctionBegin; 7799566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 7809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 7819566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 782bfec09a0SHong Zhang 783f9fae5adSBarry Smith for (i=0; i<m; i++) { 784bfec09a0SHong Zhang idx = a->j + a->i[i]; 785bfec09a0SHong Zhang v = a->a + a->i[i]; 786f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 787f9fae5adSBarry Smith alpha1 = x[5*i]; 788f9fae5adSBarry Smith alpha2 = x[5*i+1]; 789f9fae5adSBarry Smith alpha3 = x[5*i+2]; 790f9fae5adSBarry Smith alpha4 = x[5*i+3]; 791f9fae5adSBarry Smith alpha5 = x[5*i+4]; 792f9fae5adSBarry Smith while (n-->0) { 793f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 794f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 795f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 796f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 797f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 798f9fae5adSBarry Smith idx++; v++; 799f9fae5adSBarry Smith } 800f9fae5adSBarry Smith } 8019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(10.0*a->nz)); 8029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 8039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 804f9fae5adSBarry Smith PetscFunctionReturn(0); 805bcc973b7SBarry Smith } 806bcc973b7SBarry Smith 8076cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 808dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8096cd98798SBarry Smith { 8106cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8116cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 812d2840e09SBarry Smith const PetscScalar *x,*v; 813d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 814d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 815d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 8166cd98798SBarry Smith 8176cd98798SBarry Smith PetscFunctionBegin; 8189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 8199566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 8206cd98798SBarry Smith idx = a->j; 8216cd98798SBarry Smith v = a->a; 8226cd98798SBarry Smith ii = a->i; 8236cd98798SBarry Smith 8246cd98798SBarry Smith for (i=0; i<m; i++) { 8256cd98798SBarry Smith jrow = ii[i]; 8266cd98798SBarry Smith n = ii[i+1] - jrow; 8276cd98798SBarry Smith sum1 = 0.0; 8286cd98798SBarry Smith sum2 = 0.0; 8296cd98798SBarry Smith sum3 = 0.0; 8306cd98798SBarry Smith sum4 = 0.0; 8316cd98798SBarry Smith sum5 = 0.0; 8326cd98798SBarry Smith sum6 = 0.0; 83326fbe8dcSKarl Rupp 834b3c51c66SMatthew Knepley nonzerorow += (n>0); 8356cd98798SBarry Smith for (j=0; j<n; j++) { 8366cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8376cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8386cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8396cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8406cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8416cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8426cd98798SBarry Smith jrow++; 8436cd98798SBarry Smith } 8446cd98798SBarry Smith y[6*i] = sum1; 8456cd98798SBarry Smith y[6*i+1] = sum2; 8466cd98798SBarry Smith y[6*i+2] = sum3; 8476cd98798SBarry Smith y[6*i+3] = sum4; 8486cd98798SBarry Smith y[6*i+4] = sum5; 8496cd98798SBarry Smith y[6*i+5] = sum6; 8506cd98798SBarry Smith } 8516cd98798SBarry Smith 8529566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0*a->nz - 6.0*nonzerorow)); 8539566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 8549566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 8556cd98798SBarry Smith PetscFunctionReturn(0); 8566cd98798SBarry Smith } 8576cd98798SBarry Smith 858dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8596cd98798SBarry Smith { 8606cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8616cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 862d2840e09SBarry Smith const PetscScalar *x,*v; 863d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 864d2840e09SBarry Smith PetscInt n,i; 865d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 8666cd98798SBarry Smith 8676cd98798SBarry Smith PetscFunctionBegin; 8689566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 8699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 8709566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 871bfec09a0SHong Zhang 8726cd98798SBarry Smith for (i=0; i<m; i++) { 873bfec09a0SHong Zhang idx = a->j + a->i[i]; 874bfec09a0SHong Zhang v = a->a + a->i[i]; 8756cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8766cd98798SBarry Smith alpha1 = x[6*i]; 8776cd98798SBarry Smith alpha2 = x[6*i+1]; 8786cd98798SBarry Smith alpha3 = x[6*i+2]; 8796cd98798SBarry Smith alpha4 = x[6*i+3]; 8806cd98798SBarry Smith alpha5 = x[6*i+4]; 8816cd98798SBarry Smith alpha6 = x[6*i+5]; 8826cd98798SBarry Smith while (n-->0) { 8836cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8846cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8856cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8866cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8876cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8886cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8896cd98798SBarry Smith idx++; v++; 8906cd98798SBarry Smith } 8916cd98798SBarry Smith } 8929566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0*a->nz)); 8939566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 8949566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 8956cd98798SBarry Smith PetscFunctionReturn(0); 8966cd98798SBarry Smith } 8976cd98798SBarry Smith 898dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8996cd98798SBarry Smith { 9006cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9016cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 902d2840e09SBarry Smith const PetscScalar *x,*v; 903d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 904b24ad042SBarry Smith PetscInt n,i,jrow,j; 905d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 9066cd98798SBarry Smith 9076cd98798SBarry Smith PetscFunctionBegin; 9089566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 9099566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 9109566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 9116cd98798SBarry Smith idx = a->j; 9126cd98798SBarry Smith v = a->a; 9136cd98798SBarry Smith ii = a->i; 9146cd98798SBarry Smith 9156cd98798SBarry Smith for (i=0; i<m; i++) { 9166cd98798SBarry Smith jrow = ii[i]; 9176cd98798SBarry Smith n = ii[i+1] - jrow; 9186cd98798SBarry Smith sum1 = 0.0; 9196cd98798SBarry Smith sum2 = 0.0; 9206cd98798SBarry Smith sum3 = 0.0; 9216cd98798SBarry Smith sum4 = 0.0; 9226cd98798SBarry Smith sum5 = 0.0; 9236cd98798SBarry Smith sum6 = 0.0; 9246cd98798SBarry Smith for (j=0; j<n; j++) { 9256cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9266cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9276cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9286cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9296cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9306cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9316cd98798SBarry Smith jrow++; 9326cd98798SBarry Smith } 9336cd98798SBarry Smith y[6*i] += sum1; 9346cd98798SBarry Smith y[6*i+1] += sum2; 9356cd98798SBarry Smith y[6*i+2] += sum3; 9366cd98798SBarry Smith y[6*i+3] += sum4; 9376cd98798SBarry Smith y[6*i+4] += sum5; 9386cd98798SBarry Smith y[6*i+5] += sum6; 9396cd98798SBarry Smith } 9406cd98798SBarry Smith 9419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0*a->nz)); 9429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 9439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 9446cd98798SBarry Smith PetscFunctionReturn(0); 9456cd98798SBarry Smith } 9466cd98798SBarry Smith 947dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9486cd98798SBarry Smith { 9496cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9506cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 951d2840e09SBarry Smith const PetscScalar *x,*v; 952d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 953d2840e09SBarry Smith PetscInt n,i; 954d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 9556cd98798SBarry Smith 9566cd98798SBarry Smith PetscFunctionBegin; 9579566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 9589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 9599566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 960bfec09a0SHong Zhang 9616cd98798SBarry Smith for (i=0; i<m; i++) { 962bfec09a0SHong Zhang idx = a->j + a->i[i]; 963bfec09a0SHong Zhang v = a->a + a->i[i]; 9646cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9656cd98798SBarry Smith alpha1 = x[6*i]; 9666cd98798SBarry Smith alpha2 = x[6*i+1]; 9676cd98798SBarry Smith alpha3 = x[6*i+2]; 9686cd98798SBarry Smith alpha4 = x[6*i+3]; 9696cd98798SBarry Smith alpha5 = x[6*i+4]; 9706cd98798SBarry Smith alpha6 = x[6*i+5]; 9716cd98798SBarry Smith while (n-->0) { 9726cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9736cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9746cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9756cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9766cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9776cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9786cd98798SBarry Smith idx++; v++; 9796cd98798SBarry Smith } 9806cd98798SBarry Smith } 9819566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(12.0*a->nz)); 9829566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 9839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 9846cd98798SBarry Smith PetscFunctionReturn(0); 9856cd98798SBarry Smith } 9866cd98798SBarry Smith 98766ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 988ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 989ed8eea03SBarry Smith { 990ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 991ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 992d2840e09SBarry Smith const PetscScalar *x,*v; 993d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 994d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 995d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 996ed8eea03SBarry Smith 997ed8eea03SBarry Smith PetscFunctionBegin; 9989566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 9999566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1000ed8eea03SBarry Smith idx = a->j; 1001ed8eea03SBarry Smith v = a->a; 1002ed8eea03SBarry Smith ii = a->i; 1003ed8eea03SBarry Smith 1004ed8eea03SBarry Smith for (i=0; i<m; i++) { 1005ed8eea03SBarry Smith jrow = ii[i]; 1006ed8eea03SBarry Smith n = ii[i+1] - jrow; 1007ed8eea03SBarry Smith sum1 = 0.0; 1008ed8eea03SBarry Smith sum2 = 0.0; 1009ed8eea03SBarry Smith sum3 = 0.0; 1010ed8eea03SBarry Smith sum4 = 0.0; 1011ed8eea03SBarry Smith sum5 = 0.0; 1012ed8eea03SBarry Smith sum6 = 0.0; 1013ed8eea03SBarry Smith sum7 = 0.0; 101426fbe8dcSKarl Rupp 1015b3c51c66SMatthew Knepley nonzerorow += (n>0); 1016ed8eea03SBarry Smith for (j=0; j<n; j++) { 1017ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1018ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1019ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1020ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1021ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1022ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1023ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1024ed8eea03SBarry Smith jrow++; 1025ed8eea03SBarry Smith } 1026ed8eea03SBarry Smith y[7*i] = sum1; 1027ed8eea03SBarry Smith y[7*i+1] = sum2; 1028ed8eea03SBarry Smith y[7*i+2] = sum3; 1029ed8eea03SBarry Smith y[7*i+3] = sum4; 1030ed8eea03SBarry Smith y[7*i+4] = sum5; 1031ed8eea03SBarry Smith y[7*i+5] = sum6; 1032ed8eea03SBarry Smith y[7*i+6] = sum7; 1033ed8eea03SBarry Smith } 1034ed8eea03SBarry Smith 10359566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0*a->nz - 7.0*nonzerorow)); 10369566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 10379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1038ed8eea03SBarry Smith PetscFunctionReturn(0); 1039ed8eea03SBarry Smith } 1040ed8eea03SBarry Smith 1041ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1042ed8eea03SBarry Smith { 1043ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1044ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1045d2840e09SBarry Smith const PetscScalar *x,*v; 1046d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1047d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1048d2840e09SBarry Smith PetscInt n,i; 1049ed8eea03SBarry Smith 1050ed8eea03SBarry Smith PetscFunctionBegin; 10519566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 10529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 10539566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1054ed8eea03SBarry Smith 1055ed8eea03SBarry Smith for (i=0; i<m; i++) { 1056ed8eea03SBarry Smith idx = a->j + a->i[i]; 1057ed8eea03SBarry Smith v = a->a + a->i[i]; 1058ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1059ed8eea03SBarry Smith alpha1 = x[7*i]; 1060ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1061ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1062ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1063ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1064ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1065ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1066ed8eea03SBarry Smith while (n-->0) { 1067ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1068ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1069ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1070ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1071ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1072ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1073ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1074ed8eea03SBarry Smith idx++; v++; 1075ed8eea03SBarry Smith } 1076ed8eea03SBarry Smith } 10779566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0*a->nz)); 10789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 10799566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1080ed8eea03SBarry Smith PetscFunctionReturn(0); 1081ed8eea03SBarry Smith } 1082ed8eea03SBarry Smith 1083ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1084ed8eea03SBarry Smith { 1085ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1086ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1087d2840e09SBarry Smith const PetscScalar *x,*v; 1088d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1089d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1090ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1091ed8eea03SBarry Smith 1092ed8eea03SBarry Smith PetscFunctionBegin; 10939566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 10949566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 10959566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1096ed8eea03SBarry Smith idx = a->j; 1097ed8eea03SBarry Smith v = a->a; 1098ed8eea03SBarry Smith ii = a->i; 1099ed8eea03SBarry Smith 1100ed8eea03SBarry Smith for (i=0; i<m; i++) { 1101ed8eea03SBarry Smith jrow = ii[i]; 1102ed8eea03SBarry Smith n = ii[i+1] - jrow; 1103ed8eea03SBarry Smith sum1 = 0.0; 1104ed8eea03SBarry Smith sum2 = 0.0; 1105ed8eea03SBarry Smith sum3 = 0.0; 1106ed8eea03SBarry Smith sum4 = 0.0; 1107ed8eea03SBarry Smith sum5 = 0.0; 1108ed8eea03SBarry Smith sum6 = 0.0; 1109ed8eea03SBarry Smith sum7 = 0.0; 1110ed8eea03SBarry Smith for (j=0; j<n; j++) { 1111ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1112ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1113ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1114ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1115ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1116ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1117ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1118ed8eea03SBarry Smith jrow++; 1119ed8eea03SBarry Smith } 1120ed8eea03SBarry Smith y[7*i] += sum1; 1121ed8eea03SBarry Smith y[7*i+1] += sum2; 1122ed8eea03SBarry Smith y[7*i+2] += sum3; 1123ed8eea03SBarry Smith y[7*i+3] += sum4; 1124ed8eea03SBarry Smith y[7*i+4] += sum5; 1125ed8eea03SBarry Smith y[7*i+5] += sum6; 1126ed8eea03SBarry Smith y[7*i+6] += sum7; 1127ed8eea03SBarry Smith } 1128ed8eea03SBarry Smith 11299566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0*a->nz)); 11309566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 11319566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 1132ed8eea03SBarry Smith PetscFunctionReturn(0); 1133ed8eea03SBarry Smith } 1134ed8eea03SBarry Smith 1135ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1136ed8eea03SBarry Smith { 1137ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1138ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1139d2840e09SBarry Smith const PetscScalar *x,*v; 1140d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1141d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1142d2840e09SBarry Smith PetscInt n,i; 1143ed8eea03SBarry Smith 1144ed8eea03SBarry Smith PetscFunctionBegin; 11459566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 11469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 11479566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1148ed8eea03SBarry Smith for (i=0; i<m; i++) { 1149ed8eea03SBarry Smith idx = a->j + a->i[i]; 1150ed8eea03SBarry Smith v = a->a + a->i[i]; 1151ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1152ed8eea03SBarry Smith alpha1 = x[7*i]; 1153ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1154ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1155ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1156ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1157ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1158ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1159ed8eea03SBarry Smith while (n-->0) { 1160ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1161ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1162ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1163ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1164ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1165ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1166ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1167ed8eea03SBarry Smith idx++; v++; 1168ed8eea03SBarry Smith } 1169ed8eea03SBarry Smith } 11709566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(14.0*a->nz)); 11719566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 11729566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 1173ed8eea03SBarry Smith PetscFunctionReturn(0); 1174ed8eea03SBarry Smith } 1175ed8eea03SBarry Smith 1176dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 117766ed3db0SBarry Smith { 117866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 117966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1180d2840e09SBarry Smith const PetscScalar *x,*v; 1181d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1182d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1183d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 118466ed3db0SBarry Smith 118566ed3db0SBarry Smith PetscFunctionBegin; 11869566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 11879566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 118866ed3db0SBarry Smith idx = a->j; 118966ed3db0SBarry Smith v = a->a; 119066ed3db0SBarry Smith ii = a->i; 119166ed3db0SBarry Smith 119266ed3db0SBarry Smith for (i=0; i<m; i++) { 119366ed3db0SBarry Smith jrow = ii[i]; 119466ed3db0SBarry Smith n = ii[i+1] - jrow; 119566ed3db0SBarry Smith sum1 = 0.0; 119666ed3db0SBarry Smith sum2 = 0.0; 119766ed3db0SBarry Smith sum3 = 0.0; 119866ed3db0SBarry Smith sum4 = 0.0; 119966ed3db0SBarry Smith sum5 = 0.0; 120066ed3db0SBarry Smith sum6 = 0.0; 120166ed3db0SBarry Smith sum7 = 0.0; 120266ed3db0SBarry Smith sum8 = 0.0; 120326fbe8dcSKarl Rupp 1204b3c51c66SMatthew Knepley nonzerorow += (n>0); 120566ed3db0SBarry Smith for (j=0; j<n; j++) { 120666ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 120766ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 120866ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 120966ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 121066ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 121166ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 121266ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 121366ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 121466ed3db0SBarry Smith jrow++; 121566ed3db0SBarry Smith } 121666ed3db0SBarry Smith y[8*i] = sum1; 121766ed3db0SBarry Smith y[8*i+1] = sum2; 121866ed3db0SBarry Smith y[8*i+2] = sum3; 121966ed3db0SBarry Smith y[8*i+3] = sum4; 122066ed3db0SBarry Smith y[8*i+4] = sum5; 122166ed3db0SBarry Smith y[8*i+5] = sum6; 122266ed3db0SBarry Smith y[8*i+6] = sum7; 122366ed3db0SBarry Smith y[8*i+7] = sum8; 122466ed3db0SBarry Smith } 122566ed3db0SBarry Smith 12269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0*a->nz - 8.0*nonzerorow)); 12279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 12289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 122966ed3db0SBarry Smith PetscFunctionReturn(0); 123066ed3db0SBarry Smith } 123166ed3db0SBarry Smith 1232dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 123366ed3db0SBarry Smith { 123466ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 123566ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1236d2840e09SBarry Smith const PetscScalar *x,*v; 1237d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1238d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1239d2840e09SBarry Smith PetscInt n,i; 124066ed3db0SBarry Smith 124166ed3db0SBarry Smith PetscFunctionBegin; 12429566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 12439566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 12449566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1245bfec09a0SHong Zhang 124666ed3db0SBarry Smith for (i=0; i<m; i++) { 1247bfec09a0SHong Zhang idx = a->j + a->i[i]; 1248bfec09a0SHong Zhang v = a->a + a->i[i]; 124966ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 125066ed3db0SBarry Smith alpha1 = x[8*i]; 125166ed3db0SBarry Smith alpha2 = x[8*i+1]; 125266ed3db0SBarry Smith alpha3 = x[8*i+2]; 125366ed3db0SBarry Smith alpha4 = x[8*i+3]; 125466ed3db0SBarry Smith alpha5 = x[8*i+4]; 125566ed3db0SBarry Smith alpha6 = x[8*i+5]; 125666ed3db0SBarry Smith alpha7 = x[8*i+6]; 125766ed3db0SBarry Smith alpha8 = x[8*i+7]; 125866ed3db0SBarry Smith while (n-->0) { 125966ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 126066ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 126166ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 126266ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 126366ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 126466ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 126566ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 126666ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 126766ed3db0SBarry Smith idx++; v++; 126866ed3db0SBarry Smith } 126966ed3db0SBarry Smith } 12709566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0*a->nz)); 12719566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 12729566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 127366ed3db0SBarry Smith PetscFunctionReturn(0); 127466ed3db0SBarry Smith } 127566ed3db0SBarry Smith 1276dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 127766ed3db0SBarry Smith { 127866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 127966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1280d2840e09SBarry Smith const PetscScalar *x,*v; 1281d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1282d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1283b24ad042SBarry Smith PetscInt n,i,jrow,j; 128466ed3db0SBarry Smith 128566ed3db0SBarry Smith PetscFunctionBegin; 12869566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 12879566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 12889566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 128966ed3db0SBarry Smith idx = a->j; 129066ed3db0SBarry Smith v = a->a; 129166ed3db0SBarry Smith ii = a->i; 129266ed3db0SBarry Smith 129366ed3db0SBarry Smith for (i=0; i<m; i++) { 129466ed3db0SBarry Smith jrow = ii[i]; 129566ed3db0SBarry Smith n = ii[i+1] - jrow; 129666ed3db0SBarry Smith sum1 = 0.0; 129766ed3db0SBarry Smith sum2 = 0.0; 129866ed3db0SBarry Smith sum3 = 0.0; 129966ed3db0SBarry Smith sum4 = 0.0; 130066ed3db0SBarry Smith sum5 = 0.0; 130166ed3db0SBarry Smith sum6 = 0.0; 130266ed3db0SBarry Smith sum7 = 0.0; 130366ed3db0SBarry Smith sum8 = 0.0; 130466ed3db0SBarry Smith for (j=0; j<n; j++) { 130566ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 130666ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 130766ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 130866ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 130966ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 131066ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 131166ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 131266ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 131366ed3db0SBarry Smith jrow++; 131466ed3db0SBarry Smith } 131566ed3db0SBarry Smith y[8*i] += sum1; 131666ed3db0SBarry Smith y[8*i+1] += sum2; 131766ed3db0SBarry Smith y[8*i+2] += sum3; 131866ed3db0SBarry Smith y[8*i+3] += sum4; 131966ed3db0SBarry Smith y[8*i+4] += sum5; 132066ed3db0SBarry Smith y[8*i+5] += sum6; 132166ed3db0SBarry Smith y[8*i+6] += sum7; 132266ed3db0SBarry Smith y[8*i+7] += sum8; 132366ed3db0SBarry Smith } 132466ed3db0SBarry Smith 13259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0*a->nz)); 13269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 13279566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 132866ed3db0SBarry Smith PetscFunctionReturn(0); 132966ed3db0SBarry Smith } 133066ed3db0SBarry Smith 1331dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 133266ed3db0SBarry Smith { 133366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 133466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1335d2840e09SBarry Smith const PetscScalar *x,*v; 1336d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1337d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1338d2840e09SBarry Smith PetscInt n,i; 133966ed3db0SBarry Smith 134066ed3db0SBarry Smith PetscFunctionBegin; 13419566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 13429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 13439566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 134466ed3db0SBarry Smith for (i=0; i<m; i++) { 1345bfec09a0SHong Zhang idx = a->j + a->i[i]; 1346bfec09a0SHong Zhang v = a->a + a->i[i]; 134766ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 134866ed3db0SBarry Smith alpha1 = x[8*i]; 134966ed3db0SBarry Smith alpha2 = x[8*i+1]; 135066ed3db0SBarry Smith alpha3 = x[8*i+2]; 135166ed3db0SBarry Smith alpha4 = x[8*i+3]; 135266ed3db0SBarry Smith alpha5 = x[8*i+4]; 135366ed3db0SBarry Smith alpha6 = x[8*i+5]; 135466ed3db0SBarry Smith alpha7 = x[8*i+6]; 135566ed3db0SBarry Smith alpha8 = x[8*i+7]; 135666ed3db0SBarry Smith while (n-->0) { 135766ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 135866ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 135966ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 136066ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 136166ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 136266ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 136366ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 136466ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 136566ed3db0SBarry Smith idx++; v++; 136666ed3db0SBarry Smith } 136766ed3db0SBarry Smith } 13689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0*a->nz)); 13699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 13709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 13712f7816d4SBarry Smith PetscFunctionReturn(0); 13722f7816d4SBarry Smith } 13732f7816d4SBarry Smith 13742b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 1375dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 13762b692628SMatthew Knepley { 13772b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13782b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1379d2840e09SBarry Smith const PetscScalar *x,*v; 1380d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1381d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1382d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 13832b692628SMatthew Knepley 13842b692628SMatthew Knepley PetscFunctionBegin; 13859566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 13869566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 13872b692628SMatthew Knepley idx = a->j; 13882b692628SMatthew Knepley v = a->a; 13892b692628SMatthew Knepley ii = a->i; 13902b692628SMatthew Knepley 13912b692628SMatthew Knepley for (i=0; i<m; i++) { 13922b692628SMatthew Knepley jrow = ii[i]; 13932b692628SMatthew Knepley n = ii[i+1] - jrow; 13942b692628SMatthew Knepley sum1 = 0.0; 13952b692628SMatthew Knepley sum2 = 0.0; 13962b692628SMatthew Knepley sum3 = 0.0; 13972b692628SMatthew Knepley sum4 = 0.0; 13982b692628SMatthew Knepley sum5 = 0.0; 13992b692628SMatthew Knepley sum6 = 0.0; 14002b692628SMatthew Knepley sum7 = 0.0; 14012b692628SMatthew Knepley sum8 = 0.0; 14022b692628SMatthew Knepley sum9 = 0.0; 140326fbe8dcSKarl Rupp 1404b3c51c66SMatthew Knepley nonzerorow += (n>0); 14052b692628SMatthew Knepley for (j=0; j<n; j++) { 14062b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14072b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14082b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14092b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14102b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14112b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14122b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14132b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14142b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14152b692628SMatthew Knepley jrow++; 14162b692628SMatthew Knepley } 14172b692628SMatthew Knepley y[9*i] = sum1; 14182b692628SMatthew Knepley y[9*i+1] = sum2; 14192b692628SMatthew Knepley y[9*i+2] = sum3; 14202b692628SMatthew Knepley y[9*i+3] = sum4; 14212b692628SMatthew Knepley y[9*i+4] = sum5; 14222b692628SMatthew Knepley y[9*i+5] = sum6; 14232b692628SMatthew Knepley y[9*i+6] = sum7; 14242b692628SMatthew Knepley y[9*i+7] = sum8; 14252b692628SMatthew Knepley y[9*i+8] = sum9; 14262b692628SMatthew Knepley } 14272b692628SMatthew Knepley 14289566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0*a->nz - 9*nonzerorow)); 14299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 14309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 14312b692628SMatthew Knepley PetscFunctionReturn(0); 14322b692628SMatthew Knepley } 14332b692628SMatthew Knepley 1434b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1435b9cda22cSBarry Smith 1436dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14372b692628SMatthew Knepley { 14382b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14392b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1440d2840e09SBarry Smith const PetscScalar *x,*v; 1441d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1442d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1443d2840e09SBarry Smith PetscInt n,i; 14442b692628SMatthew Knepley 14452b692628SMatthew Knepley PetscFunctionBegin; 14469566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 14479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 14489566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 14492b692628SMatthew Knepley 14502b692628SMatthew Knepley for (i=0; i<m; i++) { 14512b692628SMatthew Knepley idx = a->j + a->i[i]; 14522b692628SMatthew Knepley v = a->a + a->i[i]; 14532b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14542b692628SMatthew Knepley alpha1 = x[9*i]; 14552b692628SMatthew Knepley alpha2 = x[9*i+1]; 14562b692628SMatthew Knepley alpha3 = x[9*i+2]; 14572b692628SMatthew Knepley alpha4 = x[9*i+3]; 14582b692628SMatthew Knepley alpha5 = x[9*i+4]; 14592b692628SMatthew Knepley alpha6 = x[9*i+5]; 14602b692628SMatthew Knepley alpha7 = x[9*i+6]; 14612b692628SMatthew Knepley alpha8 = x[9*i+7]; 14622f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14632b692628SMatthew Knepley while (n-->0) { 14642b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 14652b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 14662b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 14672b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 14682b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 14692b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 14702b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 14712b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 14722b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 14732b692628SMatthew Knepley idx++; v++; 14742b692628SMatthew Knepley } 14752b692628SMatthew Knepley } 14769566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0*a->nz)); 14779566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 14789566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 14792b692628SMatthew Knepley PetscFunctionReturn(0); 14802b692628SMatthew Knepley } 14812b692628SMatthew Knepley 1482dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 14832b692628SMatthew Knepley { 14842b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14852b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1486d2840e09SBarry Smith const PetscScalar *x,*v; 1487d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1488d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1489b24ad042SBarry Smith PetscInt n,i,jrow,j; 14902b692628SMatthew Knepley 14912b692628SMatthew Knepley PetscFunctionBegin; 14929566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 14939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 14949566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 14952b692628SMatthew Knepley idx = a->j; 14962b692628SMatthew Knepley v = a->a; 14972b692628SMatthew Knepley ii = a->i; 14982b692628SMatthew Knepley 14992b692628SMatthew Knepley for (i=0; i<m; i++) { 15002b692628SMatthew Knepley jrow = ii[i]; 15012b692628SMatthew Knepley n = ii[i+1] - jrow; 15022b692628SMatthew Knepley sum1 = 0.0; 15032b692628SMatthew Knepley sum2 = 0.0; 15042b692628SMatthew Knepley sum3 = 0.0; 15052b692628SMatthew Knepley sum4 = 0.0; 15062b692628SMatthew Knepley sum5 = 0.0; 15072b692628SMatthew Knepley sum6 = 0.0; 15082b692628SMatthew Knepley sum7 = 0.0; 15092b692628SMatthew Knepley sum8 = 0.0; 15102b692628SMatthew Knepley sum9 = 0.0; 15112b692628SMatthew Knepley for (j=0; j<n; j++) { 15122b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15132b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15142b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15152b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15162b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15172b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15182b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15192b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15202b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15212b692628SMatthew Knepley jrow++; 15222b692628SMatthew Knepley } 15232b692628SMatthew Knepley y[9*i] += sum1; 15242b692628SMatthew Knepley y[9*i+1] += sum2; 15252b692628SMatthew Knepley y[9*i+2] += sum3; 15262b692628SMatthew Knepley y[9*i+3] += sum4; 15272b692628SMatthew Knepley y[9*i+4] += sum5; 15282b692628SMatthew Knepley y[9*i+5] += sum6; 15292b692628SMatthew Knepley y[9*i+6] += sum7; 15302b692628SMatthew Knepley y[9*i+7] += sum8; 15312b692628SMatthew Knepley y[9*i+8] += sum9; 15322b692628SMatthew Knepley } 15332b692628SMatthew Knepley 15349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0*a->nz)); 15359566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 15369566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 15372b692628SMatthew Knepley PetscFunctionReturn(0); 15382b692628SMatthew Knepley } 15392b692628SMatthew Knepley 1540dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15412b692628SMatthew Knepley { 15422b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15432b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1544d2840e09SBarry Smith const PetscScalar *x,*v; 1545d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1546d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1547d2840e09SBarry Smith PetscInt n,i; 15482b692628SMatthew Knepley 15492b692628SMatthew Knepley PetscFunctionBegin; 15509566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 15519566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 15529566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 15532b692628SMatthew Knepley for (i=0; i<m; i++) { 15542b692628SMatthew Knepley idx = a->j + a->i[i]; 15552b692628SMatthew Knepley v = a->a + a->i[i]; 15562b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15572b692628SMatthew Knepley alpha1 = x[9*i]; 15582b692628SMatthew Knepley alpha2 = x[9*i+1]; 15592b692628SMatthew Knepley alpha3 = x[9*i+2]; 15602b692628SMatthew Knepley alpha4 = x[9*i+3]; 15612b692628SMatthew Knepley alpha5 = x[9*i+4]; 15622b692628SMatthew Knepley alpha6 = x[9*i+5]; 15632b692628SMatthew Knepley alpha7 = x[9*i+6]; 15642b692628SMatthew Knepley alpha8 = x[9*i+7]; 15652b692628SMatthew Knepley alpha9 = x[9*i+8]; 15662b692628SMatthew Knepley while (n-->0) { 15672b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15682b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15692b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15702b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15712b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15722b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15732b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15742b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15752b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15762b692628SMatthew Knepley idx++; v++; 15772b692628SMatthew Knepley } 15782b692628SMatthew Knepley } 15799566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(18.0*a->nz)); 15809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 15819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 15822b692628SMatthew Knepley PetscFunctionReturn(0); 15832b692628SMatthew Knepley } 1584b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1585b9cda22cSBarry Smith { 1586b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1587b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1588d2840e09SBarry Smith const PetscScalar *x,*v; 1589d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1590d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1591d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1592b9cda22cSBarry Smith 1593b9cda22cSBarry Smith PetscFunctionBegin; 15949566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 15959566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1596b9cda22cSBarry Smith idx = a->j; 1597b9cda22cSBarry Smith v = a->a; 1598b9cda22cSBarry Smith ii = a->i; 1599b9cda22cSBarry Smith 1600b9cda22cSBarry Smith for (i=0; i<m; i++) { 1601b9cda22cSBarry Smith jrow = ii[i]; 1602b9cda22cSBarry Smith n = ii[i+1] - jrow; 1603b9cda22cSBarry Smith sum1 = 0.0; 1604b9cda22cSBarry Smith sum2 = 0.0; 1605b9cda22cSBarry Smith sum3 = 0.0; 1606b9cda22cSBarry Smith sum4 = 0.0; 1607b9cda22cSBarry Smith sum5 = 0.0; 1608b9cda22cSBarry Smith sum6 = 0.0; 1609b9cda22cSBarry Smith sum7 = 0.0; 1610b9cda22cSBarry Smith sum8 = 0.0; 1611b9cda22cSBarry Smith sum9 = 0.0; 1612b9cda22cSBarry Smith sum10 = 0.0; 161326fbe8dcSKarl Rupp 1614b3c51c66SMatthew Knepley nonzerorow += (n>0); 1615b9cda22cSBarry Smith for (j=0; j<n; j++) { 1616b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1617b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1618b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1619b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1620b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1621b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1622b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1623b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1624b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1625b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1626b9cda22cSBarry Smith jrow++; 1627b9cda22cSBarry Smith } 1628b9cda22cSBarry Smith y[10*i] = sum1; 1629b9cda22cSBarry Smith y[10*i+1] = sum2; 1630b9cda22cSBarry Smith y[10*i+2] = sum3; 1631b9cda22cSBarry Smith y[10*i+3] = sum4; 1632b9cda22cSBarry Smith y[10*i+4] = sum5; 1633b9cda22cSBarry Smith y[10*i+5] = sum6; 1634b9cda22cSBarry Smith y[10*i+6] = sum7; 1635b9cda22cSBarry Smith y[10*i+7] = sum8; 1636b9cda22cSBarry Smith y[10*i+8] = sum9; 1637b9cda22cSBarry Smith y[10*i+9] = sum10; 1638b9cda22cSBarry Smith } 1639b9cda22cSBarry Smith 16409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0*a->nz - 10.0*nonzerorow)); 16419566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 16429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1643b9cda22cSBarry Smith PetscFunctionReturn(0); 1644b9cda22cSBarry Smith } 1645b9cda22cSBarry Smith 1646b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1647b9cda22cSBarry Smith { 1648b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1649b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1650d2840e09SBarry Smith const PetscScalar *x,*v; 1651d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1652d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1653b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1654b9cda22cSBarry Smith 1655b9cda22cSBarry Smith PetscFunctionBegin; 16569566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 16579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 16589566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1659b9cda22cSBarry Smith idx = a->j; 1660b9cda22cSBarry Smith v = a->a; 1661b9cda22cSBarry Smith ii = a->i; 1662b9cda22cSBarry Smith 1663b9cda22cSBarry Smith for (i=0; i<m; i++) { 1664b9cda22cSBarry Smith jrow = ii[i]; 1665b9cda22cSBarry Smith n = ii[i+1] - jrow; 1666b9cda22cSBarry Smith sum1 = 0.0; 1667b9cda22cSBarry Smith sum2 = 0.0; 1668b9cda22cSBarry Smith sum3 = 0.0; 1669b9cda22cSBarry Smith sum4 = 0.0; 1670b9cda22cSBarry Smith sum5 = 0.0; 1671b9cda22cSBarry Smith sum6 = 0.0; 1672b9cda22cSBarry Smith sum7 = 0.0; 1673b9cda22cSBarry Smith sum8 = 0.0; 1674b9cda22cSBarry Smith sum9 = 0.0; 1675b9cda22cSBarry Smith sum10 = 0.0; 1676b9cda22cSBarry Smith for (j=0; j<n; j++) { 1677b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1678b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1679b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1680b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1681b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1682b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1683b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1684b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1685b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1686b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1687b9cda22cSBarry Smith jrow++; 1688b9cda22cSBarry Smith } 1689b9cda22cSBarry Smith y[10*i] += sum1; 1690b9cda22cSBarry Smith y[10*i+1] += sum2; 1691b9cda22cSBarry Smith y[10*i+2] += sum3; 1692b9cda22cSBarry Smith y[10*i+3] += sum4; 1693b9cda22cSBarry Smith y[10*i+4] += sum5; 1694b9cda22cSBarry Smith y[10*i+5] += sum6; 1695b9cda22cSBarry Smith y[10*i+6] += sum7; 1696b9cda22cSBarry Smith y[10*i+7] += sum8; 1697b9cda22cSBarry Smith y[10*i+8] += sum9; 1698b9cda22cSBarry Smith y[10*i+9] += sum10; 1699b9cda22cSBarry Smith } 1700b9cda22cSBarry Smith 17019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0*a->nz)); 17029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 17039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1704b9cda22cSBarry Smith PetscFunctionReturn(0); 1705b9cda22cSBarry Smith } 1706b9cda22cSBarry Smith 1707b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1708b9cda22cSBarry Smith { 1709b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1710b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1711d2840e09SBarry Smith const PetscScalar *x,*v; 1712d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1713d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1714d2840e09SBarry Smith PetscInt n,i; 1715b9cda22cSBarry Smith 1716b9cda22cSBarry Smith PetscFunctionBegin; 17179566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 17189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 17199566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1720b9cda22cSBarry Smith 1721b9cda22cSBarry Smith for (i=0; i<m; i++) { 1722b9cda22cSBarry Smith idx = a->j + a->i[i]; 1723b9cda22cSBarry Smith v = a->a + a->i[i]; 1724b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1725e29fdc22SBarry Smith alpha1 = x[10*i]; 1726e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1727e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1728e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1729e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1730e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1731e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1732e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1733e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1734b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1735b9cda22cSBarry Smith while (n-->0) { 1736e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1737e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1738e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1739e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1740e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1741e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1742e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1743e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1744e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1745b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1746b9cda22cSBarry Smith idx++; v++; 1747b9cda22cSBarry Smith } 1748b9cda22cSBarry Smith } 17499566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0*a->nz)); 17509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 17519566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1752b9cda22cSBarry Smith PetscFunctionReturn(0); 1753b9cda22cSBarry Smith } 1754b9cda22cSBarry Smith 1755b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1756b9cda22cSBarry Smith { 1757b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1758b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1759d2840e09SBarry Smith const PetscScalar *x,*v; 1760d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1761d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1762d2840e09SBarry Smith PetscInt n,i; 1763b9cda22cSBarry Smith 1764b9cda22cSBarry Smith PetscFunctionBegin; 17659566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 17669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 17679566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1768b9cda22cSBarry Smith for (i=0; i<m; i++) { 1769b9cda22cSBarry Smith idx = a->j + a->i[i]; 1770b9cda22cSBarry Smith v = a->a + a->i[i]; 1771b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1772b9cda22cSBarry Smith alpha1 = x[10*i]; 1773b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1774b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1775b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1776b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1777b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1778b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1779b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1780b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1781b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1782b9cda22cSBarry Smith while (n-->0) { 1783b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1784b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1785b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1786b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1787b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1788b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1789b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1790b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1791b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1792b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1793b9cda22cSBarry Smith idx++; v++; 1794b9cda22cSBarry Smith } 1795b9cda22cSBarry Smith } 17969566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(20.0*a->nz)); 17979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 17989566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 1799b9cda22cSBarry Smith PetscFunctionReturn(0); 1800b9cda22cSBarry Smith } 1801b9cda22cSBarry Smith 18022f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 1803dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1804dbdd7285SBarry Smith { 1805dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1806dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1807d2840e09SBarry Smith const PetscScalar *x,*v; 1808d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1809d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1810d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1811dbdd7285SBarry Smith 1812dbdd7285SBarry Smith PetscFunctionBegin; 18139566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 18149566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1815dbdd7285SBarry Smith idx = a->j; 1816dbdd7285SBarry Smith v = a->a; 1817dbdd7285SBarry Smith ii = a->i; 1818dbdd7285SBarry Smith 1819dbdd7285SBarry Smith for (i=0; i<m; i++) { 1820dbdd7285SBarry Smith jrow = ii[i]; 1821dbdd7285SBarry Smith n = ii[i+1] - jrow; 1822dbdd7285SBarry Smith sum1 = 0.0; 1823dbdd7285SBarry Smith sum2 = 0.0; 1824dbdd7285SBarry Smith sum3 = 0.0; 1825dbdd7285SBarry Smith sum4 = 0.0; 1826dbdd7285SBarry Smith sum5 = 0.0; 1827dbdd7285SBarry Smith sum6 = 0.0; 1828dbdd7285SBarry Smith sum7 = 0.0; 1829dbdd7285SBarry Smith sum8 = 0.0; 1830dbdd7285SBarry Smith sum9 = 0.0; 1831dbdd7285SBarry Smith sum10 = 0.0; 1832dbdd7285SBarry Smith sum11 = 0.0; 183326fbe8dcSKarl Rupp 1834dbdd7285SBarry Smith nonzerorow += (n>0); 1835dbdd7285SBarry Smith for (j=0; j<n; j++) { 1836dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1837dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1838dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1839dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1840dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1841dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1842dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1843dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1844dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1845dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1846dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1847dbdd7285SBarry Smith jrow++; 1848dbdd7285SBarry Smith } 1849dbdd7285SBarry Smith y[11*i] = sum1; 1850dbdd7285SBarry Smith y[11*i+1] = sum2; 1851dbdd7285SBarry Smith y[11*i+2] = sum3; 1852dbdd7285SBarry Smith y[11*i+3] = sum4; 1853dbdd7285SBarry Smith y[11*i+4] = sum5; 1854dbdd7285SBarry Smith y[11*i+5] = sum6; 1855dbdd7285SBarry Smith y[11*i+6] = sum7; 1856dbdd7285SBarry Smith y[11*i+7] = sum8; 1857dbdd7285SBarry Smith y[11*i+8] = sum9; 1858dbdd7285SBarry Smith y[11*i+9] = sum10; 1859dbdd7285SBarry Smith y[11*i+10] = sum11; 1860dbdd7285SBarry Smith } 1861dbdd7285SBarry Smith 18629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0*a->nz - 11*nonzerorow)); 18639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 18649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1865dbdd7285SBarry Smith PetscFunctionReturn(0); 1866dbdd7285SBarry Smith } 1867dbdd7285SBarry Smith 1868dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1869dbdd7285SBarry Smith { 1870dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1871dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1872d2840e09SBarry Smith const PetscScalar *x,*v; 1873d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1874d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1875dbdd7285SBarry Smith PetscInt n,i,jrow,j; 1876dbdd7285SBarry Smith 1877dbdd7285SBarry Smith PetscFunctionBegin; 18789566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 18799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 18809566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1881dbdd7285SBarry Smith idx = a->j; 1882dbdd7285SBarry Smith v = a->a; 1883dbdd7285SBarry Smith ii = a->i; 1884dbdd7285SBarry Smith 1885dbdd7285SBarry Smith for (i=0; i<m; i++) { 1886dbdd7285SBarry Smith jrow = ii[i]; 1887dbdd7285SBarry Smith n = ii[i+1] - jrow; 1888dbdd7285SBarry Smith sum1 = 0.0; 1889dbdd7285SBarry Smith sum2 = 0.0; 1890dbdd7285SBarry Smith sum3 = 0.0; 1891dbdd7285SBarry Smith sum4 = 0.0; 1892dbdd7285SBarry Smith sum5 = 0.0; 1893dbdd7285SBarry Smith sum6 = 0.0; 1894dbdd7285SBarry Smith sum7 = 0.0; 1895dbdd7285SBarry Smith sum8 = 0.0; 1896dbdd7285SBarry Smith sum9 = 0.0; 1897dbdd7285SBarry Smith sum10 = 0.0; 1898dbdd7285SBarry Smith sum11 = 0.0; 1899dbdd7285SBarry Smith for (j=0; j<n; j++) { 1900dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1901dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1902dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1903dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1904dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1905dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1906dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1907dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1908dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1909dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1910dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1911dbdd7285SBarry Smith jrow++; 1912dbdd7285SBarry Smith } 1913dbdd7285SBarry Smith y[11*i] += sum1; 1914dbdd7285SBarry Smith y[11*i+1] += sum2; 1915dbdd7285SBarry Smith y[11*i+2] += sum3; 1916dbdd7285SBarry Smith y[11*i+3] += sum4; 1917dbdd7285SBarry Smith y[11*i+4] += sum5; 1918dbdd7285SBarry Smith y[11*i+5] += sum6; 1919dbdd7285SBarry Smith y[11*i+6] += sum7; 1920dbdd7285SBarry Smith y[11*i+7] += sum8; 1921dbdd7285SBarry Smith y[11*i+8] += sum9; 1922dbdd7285SBarry Smith y[11*i+9] += sum10; 1923dbdd7285SBarry Smith y[11*i+10] += sum11; 1924dbdd7285SBarry Smith } 1925dbdd7285SBarry Smith 19269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0*a->nz)); 19279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 19289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1929dbdd7285SBarry Smith PetscFunctionReturn(0); 1930dbdd7285SBarry Smith } 1931dbdd7285SBarry Smith 1932dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1933dbdd7285SBarry Smith { 1934dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1935dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1936d2840e09SBarry Smith const PetscScalar *x,*v; 1937d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1938d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1939d2840e09SBarry Smith PetscInt n,i; 1940dbdd7285SBarry Smith 1941dbdd7285SBarry Smith PetscFunctionBegin; 19429566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 19439566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 19449566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 1945dbdd7285SBarry Smith 1946dbdd7285SBarry Smith for (i=0; i<m; i++) { 1947dbdd7285SBarry Smith idx = a->j + a->i[i]; 1948dbdd7285SBarry Smith v = a->a + a->i[i]; 1949dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 1950dbdd7285SBarry Smith alpha1 = x[11*i]; 1951dbdd7285SBarry Smith alpha2 = x[11*i+1]; 1952dbdd7285SBarry Smith alpha3 = x[11*i+2]; 1953dbdd7285SBarry Smith alpha4 = x[11*i+3]; 1954dbdd7285SBarry Smith alpha5 = x[11*i+4]; 1955dbdd7285SBarry Smith alpha6 = x[11*i+5]; 1956dbdd7285SBarry Smith alpha7 = x[11*i+6]; 1957dbdd7285SBarry Smith alpha8 = x[11*i+7]; 1958dbdd7285SBarry Smith alpha9 = x[11*i+8]; 1959dbdd7285SBarry Smith alpha10 = x[11*i+9]; 1960dbdd7285SBarry Smith alpha11 = x[11*i+10]; 1961dbdd7285SBarry Smith while (n-->0) { 1962dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 1963dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 1964dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 1965dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 1966dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 1967dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 1968dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 1969dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 1970dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 1971dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 1972dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 1973dbdd7285SBarry Smith idx++; v++; 1974dbdd7285SBarry Smith } 1975dbdd7285SBarry Smith } 19769566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0*a->nz)); 19779566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 19789566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 1979dbdd7285SBarry Smith PetscFunctionReturn(0); 1980dbdd7285SBarry Smith } 1981dbdd7285SBarry Smith 1982dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1983dbdd7285SBarry Smith { 1984dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1985dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1986d2840e09SBarry Smith const PetscScalar *x,*v; 1987d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1988d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1989d2840e09SBarry Smith PetscInt n,i; 1990dbdd7285SBarry Smith 1991dbdd7285SBarry Smith PetscFunctionBegin; 19929566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 19939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 19949566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 1995dbdd7285SBarry Smith for (i=0; i<m; i++) { 1996dbdd7285SBarry Smith idx = a->j + a->i[i]; 1997dbdd7285SBarry Smith v = a->a + a->i[i]; 1998dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 1999dbdd7285SBarry Smith alpha1 = x[11*i]; 2000dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2001dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2002dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2003dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2004dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2005dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2006dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2007dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2008dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2009dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2010dbdd7285SBarry Smith while (n-->0) { 2011dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2012dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2013dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2014dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2015dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2016dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2017dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2018dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2019dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2020dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2021dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2022dbdd7285SBarry Smith idx++; v++; 2023dbdd7285SBarry Smith } 2024dbdd7285SBarry Smith } 20259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(22.0*a->nz)); 20269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 20279566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 2028dbdd7285SBarry Smith PetscFunctionReturn(0); 2029dbdd7285SBarry Smith } 2030dbdd7285SBarry Smith 2031dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2032dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 20332f7816d4SBarry Smith { 20342f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20352f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2036d2840e09SBarry Smith const PetscScalar *x,*v; 2037d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 20382f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2039d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2040d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 20412f7816d4SBarry Smith 20422f7816d4SBarry Smith PetscFunctionBegin; 20439566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 20449566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 20452f7816d4SBarry Smith idx = a->j; 20462f7816d4SBarry Smith v = a->a; 20472f7816d4SBarry Smith ii = a->i; 20482f7816d4SBarry Smith 20492f7816d4SBarry Smith for (i=0; i<m; i++) { 20502f7816d4SBarry Smith jrow = ii[i]; 20512f7816d4SBarry Smith n = ii[i+1] - jrow; 20522f7816d4SBarry Smith sum1 = 0.0; 20532f7816d4SBarry Smith sum2 = 0.0; 20542f7816d4SBarry Smith sum3 = 0.0; 20552f7816d4SBarry Smith sum4 = 0.0; 20562f7816d4SBarry Smith sum5 = 0.0; 20572f7816d4SBarry Smith sum6 = 0.0; 20582f7816d4SBarry Smith sum7 = 0.0; 20592f7816d4SBarry Smith sum8 = 0.0; 20602f7816d4SBarry Smith sum9 = 0.0; 20612f7816d4SBarry Smith sum10 = 0.0; 20622f7816d4SBarry Smith sum11 = 0.0; 20632f7816d4SBarry Smith sum12 = 0.0; 20642f7816d4SBarry Smith sum13 = 0.0; 20652f7816d4SBarry Smith sum14 = 0.0; 20662f7816d4SBarry Smith sum15 = 0.0; 20672f7816d4SBarry Smith sum16 = 0.0; 206826fbe8dcSKarl Rupp 2069b3c51c66SMatthew Knepley nonzerorow += (n>0); 20702f7816d4SBarry Smith for (j=0; j<n; j++) { 20712f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 20722f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 20732f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 20742f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 20752f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 20762f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 20772f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 20782f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 20792f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 20802f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 20812f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 20822f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 20832f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 20842f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 20852f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 20862f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 20872f7816d4SBarry Smith jrow++; 20882f7816d4SBarry Smith } 20892f7816d4SBarry Smith y[16*i] = sum1; 20902f7816d4SBarry Smith y[16*i+1] = sum2; 20912f7816d4SBarry Smith y[16*i+2] = sum3; 20922f7816d4SBarry Smith y[16*i+3] = sum4; 20932f7816d4SBarry Smith y[16*i+4] = sum5; 20942f7816d4SBarry Smith y[16*i+5] = sum6; 20952f7816d4SBarry Smith y[16*i+6] = sum7; 20962f7816d4SBarry Smith y[16*i+7] = sum8; 20972f7816d4SBarry Smith y[16*i+8] = sum9; 20982f7816d4SBarry Smith y[16*i+9] = sum10; 20992f7816d4SBarry Smith y[16*i+10] = sum11; 21002f7816d4SBarry Smith y[16*i+11] = sum12; 21012f7816d4SBarry Smith y[16*i+12] = sum13; 21022f7816d4SBarry Smith y[16*i+13] = sum14; 21032f7816d4SBarry Smith y[16*i+14] = sum15; 21042f7816d4SBarry Smith y[16*i+15] = sum16; 21052f7816d4SBarry Smith } 21062f7816d4SBarry Smith 21079566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0*a->nz - 16.0*nonzerorow)); 21089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 21099566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 21102f7816d4SBarry Smith PetscFunctionReturn(0); 21112f7816d4SBarry Smith } 21122f7816d4SBarry Smith 2113dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 21142f7816d4SBarry Smith { 21152f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21162f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2117d2840e09SBarry Smith const PetscScalar *x,*v; 2118d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 21192f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2120d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2121d2840e09SBarry Smith PetscInt n,i; 21222f7816d4SBarry Smith 21232f7816d4SBarry Smith PetscFunctionBegin; 21249566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 21259566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 21269566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 2127bfec09a0SHong Zhang 21282f7816d4SBarry Smith for (i=0; i<m; i++) { 2129bfec09a0SHong Zhang idx = a->j + a->i[i]; 2130bfec09a0SHong Zhang v = a->a + a->i[i]; 21312f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 21322f7816d4SBarry Smith alpha1 = x[16*i]; 21332f7816d4SBarry Smith alpha2 = x[16*i+1]; 21342f7816d4SBarry Smith alpha3 = x[16*i+2]; 21352f7816d4SBarry Smith alpha4 = x[16*i+3]; 21362f7816d4SBarry Smith alpha5 = x[16*i+4]; 21372f7816d4SBarry Smith alpha6 = x[16*i+5]; 21382f7816d4SBarry Smith alpha7 = x[16*i+6]; 21392f7816d4SBarry Smith alpha8 = x[16*i+7]; 21402f7816d4SBarry Smith alpha9 = x[16*i+8]; 21412f7816d4SBarry Smith alpha10 = x[16*i+9]; 21422f7816d4SBarry Smith alpha11 = x[16*i+10]; 21432f7816d4SBarry Smith alpha12 = x[16*i+11]; 21442f7816d4SBarry Smith alpha13 = x[16*i+12]; 21452f7816d4SBarry Smith alpha14 = x[16*i+13]; 21462f7816d4SBarry Smith alpha15 = x[16*i+14]; 21472f7816d4SBarry Smith alpha16 = x[16*i+15]; 21482f7816d4SBarry Smith while (n-->0) { 21492f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 21502f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 21512f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 21522f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 21532f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 21542f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 21552f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 21562f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 21572f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 21582f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 21592f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 21602f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 21612f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 21622f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 21632f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 21642f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 21652f7816d4SBarry Smith idx++; v++; 21662f7816d4SBarry Smith } 21672f7816d4SBarry Smith } 21689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0*a->nz)); 21699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 21709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 21712f7816d4SBarry Smith PetscFunctionReturn(0); 21722f7816d4SBarry Smith } 21732f7816d4SBarry Smith 2174dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 21752f7816d4SBarry Smith { 21762f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21772f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2178d2840e09SBarry Smith const PetscScalar *x,*v; 2179d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21802f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2181d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2182b24ad042SBarry Smith PetscInt n,i,jrow,j; 21832f7816d4SBarry Smith 21842f7816d4SBarry Smith PetscFunctionBegin; 21859566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 21869566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 21879566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 21882f7816d4SBarry Smith idx = a->j; 21892f7816d4SBarry Smith v = a->a; 21902f7816d4SBarry Smith ii = a->i; 21912f7816d4SBarry Smith 21922f7816d4SBarry Smith for (i=0; i<m; i++) { 21932f7816d4SBarry Smith jrow = ii[i]; 21942f7816d4SBarry Smith n = ii[i+1] - jrow; 21952f7816d4SBarry Smith sum1 = 0.0; 21962f7816d4SBarry Smith sum2 = 0.0; 21972f7816d4SBarry Smith sum3 = 0.0; 21982f7816d4SBarry Smith sum4 = 0.0; 21992f7816d4SBarry Smith sum5 = 0.0; 22002f7816d4SBarry Smith sum6 = 0.0; 22012f7816d4SBarry Smith sum7 = 0.0; 22022f7816d4SBarry Smith sum8 = 0.0; 22032f7816d4SBarry Smith sum9 = 0.0; 22042f7816d4SBarry Smith sum10 = 0.0; 22052f7816d4SBarry Smith sum11 = 0.0; 22062f7816d4SBarry Smith sum12 = 0.0; 22072f7816d4SBarry Smith sum13 = 0.0; 22082f7816d4SBarry Smith sum14 = 0.0; 22092f7816d4SBarry Smith sum15 = 0.0; 22102f7816d4SBarry Smith sum16 = 0.0; 22112f7816d4SBarry Smith for (j=0; j<n; j++) { 22122f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 22132f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 22142f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 22152f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 22162f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 22172f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 22182f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 22192f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 22202f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 22212f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 22222f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 22232f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 22242f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 22252f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 22262f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 22272f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 22282f7816d4SBarry Smith jrow++; 22292f7816d4SBarry Smith } 22302f7816d4SBarry Smith y[16*i] += sum1; 22312f7816d4SBarry Smith y[16*i+1] += sum2; 22322f7816d4SBarry Smith y[16*i+2] += sum3; 22332f7816d4SBarry Smith y[16*i+3] += sum4; 22342f7816d4SBarry Smith y[16*i+4] += sum5; 22352f7816d4SBarry Smith y[16*i+5] += sum6; 22362f7816d4SBarry Smith y[16*i+6] += sum7; 22372f7816d4SBarry Smith y[16*i+7] += sum8; 22382f7816d4SBarry Smith y[16*i+8] += sum9; 22392f7816d4SBarry Smith y[16*i+9] += sum10; 22402f7816d4SBarry Smith y[16*i+10] += sum11; 22412f7816d4SBarry Smith y[16*i+11] += sum12; 22422f7816d4SBarry Smith y[16*i+12] += sum13; 22432f7816d4SBarry Smith y[16*i+13] += sum14; 22442f7816d4SBarry Smith y[16*i+14] += sum15; 22452f7816d4SBarry Smith y[16*i+15] += sum16; 22462f7816d4SBarry Smith } 22472f7816d4SBarry Smith 22489566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0*a->nz)); 22499566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 22509566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 22512f7816d4SBarry Smith PetscFunctionReturn(0); 22522f7816d4SBarry Smith } 22532f7816d4SBarry Smith 2254dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 22552f7816d4SBarry Smith { 22562f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22572f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2258d2840e09SBarry Smith const PetscScalar *x,*v; 2259d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 22602f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2261d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2262d2840e09SBarry Smith PetscInt n,i; 22632f7816d4SBarry Smith 22642f7816d4SBarry Smith PetscFunctionBegin; 22659566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 22669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 22679566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 22682f7816d4SBarry Smith for (i=0; i<m; i++) { 2269bfec09a0SHong Zhang idx = a->j + a->i[i]; 2270bfec09a0SHong Zhang v = a->a + a->i[i]; 22712f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 22722f7816d4SBarry Smith alpha1 = x[16*i]; 22732f7816d4SBarry Smith alpha2 = x[16*i+1]; 22742f7816d4SBarry Smith alpha3 = x[16*i+2]; 22752f7816d4SBarry Smith alpha4 = x[16*i+3]; 22762f7816d4SBarry Smith alpha5 = x[16*i+4]; 22772f7816d4SBarry Smith alpha6 = x[16*i+5]; 22782f7816d4SBarry Smith alpha7 = x[16*i+6]; 22792f7816d4SBarry Smith alpha8 = x[16*i+7]; 22802f7816d4SBarry Smith alpha9 = x[16*i+8]; 22812f7816d4SBarry Smith alpha10 = x[16*i+9]; 22822f7816d4SBarry Smith alpha11 = x[16*i+10]; 22832f7816d4SBarry Smith alpha12 = x[16*i+11]; 22842f7816d4SBarry Smith alpha13 = x[16*i+12]; 22852f7816d4SBarry Smith alpha14 = x[16*i+13]; 22862f7816d4SBarry Smith alpha15 = x[16*i+14]; 22872f7816d4SBarry Smith alpha16 = x[16*i+15]; 22882f7816d4SBarry Smith while (n-->0) { 22892f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 22902f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 22912f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 22922f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 22932f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 22942f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 22952f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 22962f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 22972f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 22982f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 22992f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 23002f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 23012f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 23022f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 23032f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 23042f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 23052f7816d4SBarry Smith idx++; v++; 23062f7816d4SBarry Smith } 23072f7816d4SBarry Smith } 23089566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(32.0*a->nz)); 23099566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 23109566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 231166ed3db0SBarry Smith PetscFunctionReturn(0); 231266ed3db0SBarry Smith } 231366ed3db0SBarry Smith 2314ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2315ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2316ed1c418cSBarry Smith { 2317ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2318ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2319d2840e09SBarry Smith const PetscScalar *x,*v; 2320d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2321ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2322d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2323d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 2324ed1c418cSBarry Smith 2325ed1c418cSBarry Smith PetscFunctionBegin; 23269566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 23279566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 2328ed1c418cSBarry Smith idx = a->j; 2329ed1c418cSBarry Smith v = a->a; 2330ed1c418cSBarry Smith ii = a->i; 2331ed1c418cSBarry Smith 2332ed1c418cSBarry Smith for (i=0; i<m; i++) { 2333ed1c418cSBarry Smith jrow = ii[i]; 2334ed1c418cSBarry Smith n = ii[i+1] - jrow; 2335ed1c418cSBarry Smith sum1 = 0.0; 2336ed1c418cSBarry Smith sum2 = 0.0; 2337ed1c418cSBarry Smith sum3 = 0.0; 2338ed1c418cSBarry Smith sum4 = 0.0; 2339ed1c418cSBarry Smith sum5 = 0.0; 2340ed1c418cSBarry Smith sum6 = 0.0; 2341ed1c418cSBarry Smith sum7 = 0.0; 2342ed1c418cSBarry Smith sum8 = 0.0; 2343ed1c418cSBarry Smith sum9 = 0.0; 2344ed1c418cSBarry Smith sum10 = 0.0; 2345ed1c418cSBarry Smith sum11 = 0.0; 2346ed1c418cSBarry Smith sum12 = 0.0; 2347ed1c418cSBarry Smith sum13 = 0.0; 2348ed1c418cSBarry Smith sum14 = 0.0; 2349ed1c418cSBarry Smith sum15 = 0.0; 2350ed1c418cSBarry Smith sum16 = 0.0; 2351ed1c418cSBarry Smith sum17 = 0.0; 2352ed1c418cSBarry Smith sum18 = 0.0; 235326fbe8dcSKarl Rupp 2354ed1c418cSBarry Smith nonzerorow += (n>0); 2355ed1c418cSBarry Smith for (j=0; j<n; j++) { 2356ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2357ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2358ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2359ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2360ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2361ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2362ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2363ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2364ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2365ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2366ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2367ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2368ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2369ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2370ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2371ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2372ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2373ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2374ed1c418cSBarry Smith jrow++; 2375ed1c418cSBarry Smith } 2376ed1c418cSBarry Smith y[18*i] = sum1; 2377ed1c418cSBarry Smith y[18*i+1] = sum2; 2378ed1c418cSBarry Smith y[18*i+2] = sum3; 2379ed1c418cSBarry Smith y[18*i+3] = sum4; 2380ed1c418cSBarry Smith y[18*i+4] = sum5; 2381ed1c418cSBarry Smith y[18*i+5] = sum6; 2382ed1c418cSBarry Smith y[18*i+6] = sum7; 2383ed1c418cSBarry Smith y[18*i+7] = sum8; 2384ed1c418cSBarry Smith y[18*i+8] = sum9; 2385ed1c418cSBarry Smith y[18*i+9] = sum10; 2386ed1c418cSBarry Smith y[18*i+10] = sum11; 2387ed1c418cSBarry Smith y[18*i+11] = sum12; 2388ed1c418cSBarry Smith y[18*i+12] = sum13; 2389ed1c418cSBarry Smith y[18*i+13] = sum14; 2390ed1c418cSBarry Smith y[18*i+14] = sum15; 2391ed1c418cSBarry Smith y[18*i+15] = sum16; 2392ed1c418cSBarry Smith y[18*i+16] = sum17; 2393ed1c418cSBarry Smith y[18*i+17] = sum18; 2394ed1c418cSBarry Smith } 2395ed1c418cSBarry Smith 23969566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0*a->nz - 18.0*nonzerorow)); 23979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 23989566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 2399ed1c418cSBarry Smith PetscFunctionReturn(0); 2400ed1c418cSBarry Smith } 2401ed1c418cSBarry Smith 2402ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2403ed1c418cSBarry Smith { 2404ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2405ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2406d2840e09SBarry Smith const PetscScalar *x,*v; 2407d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2408ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2409d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2410d2840e09SBarry Smith PetscInt n,i; 2411ed1c418cSBarry Smith 2412ed1c418cSBarry Smith PetscFunctionBegin; 24139566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 24149566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 24159566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 2416ed1c418cSBarry Smith 2417ed1c418cSBarry Smith for (i=0; i<m; i++) { 2418ed1c418cSBarry Smith idx = a->j + a->i[i]; 2419ed1c418cSBarry Smith v = a->a + a->i[i]; 2420ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2421ed1c418cSBarry Smith alpha1 = x[18*i]; 2422ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2423ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2424ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2425ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2426ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2427ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2428ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2429ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2430ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2431ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2432ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2433ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2434ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2435ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2436ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2437ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2438ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2439ed1c418cSBarry Smith while (n-->0) { 2440ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2441ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2442ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2443ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2444ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2445ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2446ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2447ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2448ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2449ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2450ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2451ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2452ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2453ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2454ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2455ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2456ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2457ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2458ed1c418cSBarry Smith idx++; v++; 2459ed1c418cSBarry Smith } 2460ed1c418cSBarry Smith } 24619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0*a->nz)); 24629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 24639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 2464ed1c418cSBarry Smith PetscFunctionReturn(0); 2465ed1c418cSBarry Smith } 2466ed1c418cSBarry Smith 2467ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2468ed1c418cSBarry Smith { 2469ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2470ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2471d2840e09SBarry Smith const PetscScalar *x,*v; 2472d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2473ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2474d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2475ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2476ed1c418cSBarry Smith 2477ed1c418cSBarry Smith PetscFunctionBegin; 24789566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 24799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 24809566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 2481ed1c418cSBarry Smith idx = a->j; 2482ed1c418cSBarry Smith v = a->a; 2483ed1c418cSBarry Smith ii = a->i; 2484ed1c418cSBarry Smith 2485ed1c418cSBarry Smith for (i=0; i<m; i++) { 2486ed1c418cSBarry Smith jrow = ii[i]; 2487ed1c418cSBarry Smith n = ii[i+1] - jrow; 2488ed1c418cSBarry Smith sum1 = 0.0; 2489ed1c418cSBarry Smith sum2 = 0.0; 2490ed1c418cSBarry Smith sum3 = 0.0; 2491ed1c418cSBarry Smith sum4 = 0.0; 2492ed1c418cSBarry Smith sum5 = 0.0; 2493ed1c418cSBarry Smith sum6 = 0.0; 2494ed1c418cSBarry Smith sum7 = 0.0; 2495ed1c418cSBarry Smith sum8 = 0.0; 2496ed1c418cSBarry Smith sum9 = 0.0; 2497ed1c418cSBarry Smith sum10 = 0.0; 2498ed1c418cSBarry Smith sum11 = 0.0; 2499ed1c418cSBarry Smith sum12 = 0.0; 2500ed1c418cSBarry Smith sum13 = 0.0; 2501ed1c418cSBarry Smith sum14 = 0.0; 2502ed1c418cSBarry Smith sum15 = 0.0; 2503ed1c418cSBarry Smith sum16 = 0.0; 2504ed1c418cSBarry Smith sum17 = 0.0; 2505ed1c418cSBarry Smith sum18 = 0.0; 2506ed1c418cSBarry Smith for (j=0; j<n; j++) { 2507ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2508ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2509ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2510ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2511ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2512ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2513ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2514ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2515ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2516ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2517ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2518ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2519ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2520ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2521ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2522ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2523ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2524ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2525ed1c418cSBarry Smith jrow++; 2526ed1c418cSBarry Smith } 2527ed1c418cSBarry Smith y[18*i] += sum1; 2528ed1c418cSBarry Smith y[18*i+1] += sum2; 2529ed1c418cSBarry Smith y[18*i+2] += sum3; 2530ed1c418cSBarry Smith y[18*i+3] += sum4; 2531ed1c418cSBarry Smith y[18*i+4] += sum5; 2532ed1c418cSBarry Smith y[18*i+5] += sum6; 2533ed1c418cSBarry Smith y[18*i+6] += sum7; 2534ed1c418cSBarry Smith y[18*i+7] += sum8; 2535ed1c418cSBarry Smith y[18*i+8] += sum9; 2536ed1c418cSBarry Smith y[18*i+9] += sum10; 2537ed1c418cSBarry Smith y[18*i+10] += sum11; 2538ed1c418cSBarry Smith y[18*i+11] += sum12; 2539ed1c418cSBarry Smith y[18*i+12] += sum13; 2540ed1c418cSBarry Smith y[18*i+13] += sum14; 2541ed1c418cSBarry Smith y[18*i+14] += sum15; 2542ed1c418cSBarry Smith y[18*i+15] += sum16; 2543ed1c418cSBarry Smith y[18*i+16] += sum17; 2544ed1c418cSBarry Smith y[18*i+17] += sum18; 2545ed1c418cSBarry Smith } 2546ed1c418cSBarry Smith 25479566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0*a->nz)); 25489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 25499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 2550ed1c418cSBarry Smith PetscFunctionReturn(0); 2551ed1c418cSBarry Smith } 2552ed1c418cSBarry Smith 2553ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2554ed1c418cSBarry Smith { 2555ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2556ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2557d2840e09SBarry Smith const PetscScalar *x,*v; 2558d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2559ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2560d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2561d2840e09SBarry Smith PetscInt n,i; 2562ed1c418cSBarry Smith 2563ed1c418cSBarry Smith PetscFunctionBegin; 25649566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 25659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 25669566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 2567ed1c418cSBarry Smith for (i=0; i<m; i++) { 2568ed1c418cSBarry Smith idx = a->j + a->i[i]; 2569ed1c418cSBarry Smith v = a->a + a->i[i]; 2570ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2571ed1c418cSBarry Smith alpha1 = x[18*i]; 2572ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2573ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2574ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2575ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2576ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2577ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2578ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2579ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2580ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2581ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2582ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2583ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2584ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2585ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2586ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2587ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2588ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2589ed1c418cSBarry Smith while (n-->0) { 2590ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2591ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2592ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2593ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2594ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2595ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2596ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2597ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2598ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2599ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2600ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2601ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2602ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2603ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2604ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2605ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2606ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2607ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2608ed1c418cSBarry Smith idx++; v++; 2609ed1c418cSBarry Smith } 2610ed1c418cSBarry Smith } 26119566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(36.0*a->nz)); 26129566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 26139566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 2614ed1c418cSBarry Smith PetscFunctionReturn(0); 2615ed1c418cSBarry Smith } 2616ed1c418cSBarry Smith 26176861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 26186861f193SBarry Smith { 26196861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26206861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26216861f193SBarry Smith const PetscScalar *x,*v; 26226861f193SBarry Smith PetscScalar *y,*sums; 26236861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 26246861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 26256861f193SBarry Smith 26266861f193SBarry Smith PetscFunctionBegin; 26279566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 26289566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 26299566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 26306861f193SBarry Smith idx = a->j; 26316861f193SBarry Smith v = a->a; 26326861f193SBarry Smith ii = a->i; 26336861f193SBarry Smith 26346861f193SBarry Smith for (i=0; i<m; i++) { 26356861f193SBarry Smith jrow = ii[i]; 26366861f193SBarry Smith n = ii[i+1] - jrow; 26376861f193SBarry Smith sums = y + dof*i; 26386861f193SBarry Smith for (j=0; j<n; j++) { 26396861f193SBarry Smith for (k=0; k<dof; k++) { 26406861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 26416861f193SBarry Smith } 26426861f193SBarry Smith jrow++; 26436861f193SBarry Smith } 26446861f193SBarry Smith } 26456861f193SBarry Smith 26469566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*dof*a->nz)); 26479566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 26489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 26496861f193SBarry Smith PetscFunctionReturn(0); 26506861f193SBarry Smith } 26516861f193SBarry Smith 26526861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 26536861f193SBarry Smith { 26546861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26556861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26566861f193SBarry Smith const PetscScalar *x,*v; 26576861f193SBarry Smith PetscScalar *y,*sums; 26586861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 26596861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 26606861f193SBarry Smith 26616861f193SBarry Smith PetscFunctionBegin; 26629566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 26639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 26649566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 26656861f193SBarry Smith idx = a->j; 26666861f193SBarry Smith v = a->a; 26676861f193SBarry Smith ii = a->i; 26686861f193SBarry Smith 26696861f193SBarry Smith for (i=0; i<m; i++) { 26706861f193SBarry Smith jrow = ii[i]; 26716861f193SBarry Smith n = ii[i+1] - jrow; 26726861f193SBarry Smith sums = y + dof*i; 26736861f193SBarry Smith for (j=0; j<n; j++) { 26746861f193SBarry Smith for (k=0; k<dof; k++) { 26756861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 26766861f193SBarry Smith } 26776861f193SBarry Smith jrow++; 26786861f193SBarry Smith } 26796861f193SBarry Smith } 26806861f193SBarry Smith 26819566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*dof*a->nz)); 26829566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 26839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 26846861f193SBarry Smith PetscFunctionReturn(0); 26856861f193SBarry Smith } 26866861f193SBarry Smith 26876861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 26886861f193SBarry Smith { 26896861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26906861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26916861f193SBarry Smith const PetscScalar *x,*v,*alpha; 26926861f193SBarry Smith PetscScalar *y; 26936861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 26946861f193SBarry Smith PetscInt n,i,k; 26956861f193SBarry Smith 26966861f193SBarry Smith PetscFunctionBegin; 26979566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 26989566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 26999566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 27006861f193SBarry Smith for (i=0; i<m; i++) { 27016861f193SBarry Smith idx = a->j + a->i[i]; 27026861f193SBarry Smith v = a->a + a->i[i]; 27036861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27046861f193SBarry Smith alpha = x + dof*i; 27056861f193SBarry Smith while (n-->0) { 27066861f193SBarry Smith for (k=0; k<dof; k++) { 27076861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27086861f193SBarry Smith } 270983ce7366SBarry Smith idx++; v++; 27106861f193SBarry Smith } 27116861f193SBarry Smith } 27129566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*dof*a->nz)); 27139566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 27149566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 27156861f193SBarry Smith PetscFunctionReturn(0); 27166861f193SBarry Smith } 27176861f193SBarry Smith 27186861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27196861f193SBarry Smith { 27206861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27216861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27226861f193SBarry Smith const PetscScalar *x,*v,*alpha; 27236861f193SBarry Smith PetscScalar *y; 27246861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 27256861f193SBarry Smith PetscInt n,i,k; 27266861f193SBarry Smith 27276861f193SBarry Smith PetscFunctionBegin; 27289566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy,zz)); 27299566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 27309566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz,&y)); 27316861f193SBarry Smith for (i=0; i<m; i++) { 27326861f193SBarry Smith idx = a->j + a->i[i]; 27336861f193SBarry Smith v = a->a + a->i[i]; 27346861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27356861f193SBarry Smith alpha = x + dof*i; 27366861f193SBarry Smith while (n-->0) { 27376861f193SBarry Smith for (k=0; k<dof; k++) { 27386861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27396861f193SBarry Smith } 274083ce7366SBarry Smith idx++; v++; 27416861f193SBarry Smith } 27426861f193SBarry Smith } 27439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*dof*a->nz)); 27449566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 27459566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz,&y)); 27466861f193SBarry Smith PetscFunctionReturn(0); 27476861f193SBarry Smith } 27486861f193SBarry Smith 2749f4a53059SBarry Smith /*===================================================================================*/ 2750dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2751f4a53059SBarry Smith { 27524cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2753f4a53059SBarry Smith 2754b24ad042SBarry Smith PetscFunctionBegin; 2755f4a53059SBarry Smith /* start the scatter */ 27569566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 27579566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ,xx,yy)); 27589566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 27599566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy)); 2760f4a53059SBarry Smith PetscFunctionReturn(0); 2761f4a53059SBarry Smith } 2762f4a53059SBarry Smith 2763dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 27644cb9d9b8SBarry Smith { 27654cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2766b24ad042SBarry Smith 27674cb9d9b8SBarry Smith PetscFunctionBegin; 27689566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w)); 27699566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy)); 27709566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE)); 27719566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE)); 27724cb9d9b8SBarry Smith PetscFunctionReturn(0); 27734cb9d9b8SBarry Smith } 27744cb9d9b8SBarry Smith 2775dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 27764cb9d9b8SBarry Smith { 27774cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 27784cb9d9b8SBarry Smith 2779b24ad042SBarry Smith PetscFunctionBegin; 27804cb9d9b8SBarry Smith /* start the scatter */ 27819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 27829566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz)); 27839566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 27849566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz)); 27854cb9d9b8SBarry Smith PetscFunctionReturn(0); 27864cb9d9b8SBarry Smith } 27874cb9d9b8SBarry Smith 2788dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 27894cb9d9b8SBarry Smith { 27904cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2791b24ad042SBarry Smith 27924cb9d9b8SBarry Smith PetscFunctionBegin; 27939566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w)); 27949566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz)); 27959566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE)); 27969566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE)); 27974cb9d9b8SBarry Smith PetscFunctionReturn(0); 27984cb9d9b8SBarry Smith } 27994cb9d9b8SBarry Smith 280095ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 28014222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 28024222ddf1SHong Zhang { 28034222ddf1SHong Zhang Mat_Product *product = C->product; 28044222ddf1SHong Zhang 28054222ddf1SHong Zhang PetscFunctionBegin; 28064222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 28074222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 280898921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]); 28094222ddf1SHong Zhang PetscFunctionReturn(0); 28104222ddf1SHong Zhang } 28114222ddf1SHong Zhang 28124222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 28134222ddf1SHong Zhang { 28144222ddf1SHong Zhang Mat_Product *product = C->product; 28154222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 28164222ddf1SHong Zhang Mat A=product->A,P=product->B; 28174222ddf1SHong Zhang PetscInt alg=1; /* set default algorithm */ 28184222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 28194222ddf1SHong Zhang const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"}; 28204222ddf1SHong Zhang PetscInt nalg=4; 28214222ddf1SHong Zhang #else 28224222ddf1SHong Zhang const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"}; 28234222ddf1SHong Zhang PetscInt nalg=5; 28244222ddf1SHong Zhang #endif 28254222ddf1SHong Zhang 28264222ddf1SHong Zhang PetscFunctionBegin; 282708401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]); 28284222ddf1SHong Zhang 28294222ddf1SHong Zhang /* PtAP */ 28304222ddf1SHong Zhang /* Check matrix local sizes */ 2831aed4548fSBarry Smith PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); 2832aed4548fSBarry Smith PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); 28334222ddf1SHong Zhang 28344222ddf1SHong Zhang /* Set the default algorithm */ 28359566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 28364222ddf1SHong Zhang if (flg) { 28379566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 28384222ddf1SHong Zhang } 28394222ddf1SHong Zhang 28404222ddf1SHong Zhang /* Get runtime option */ 2841d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 28429566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg)); 28434222ddf1SHong Zhang if (flg) { 28449566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 28454222ddf1SHong Zhang } 2846d0609cedSBarry Smith PetscOptionsEnd(); 28474222ddf1SHong Zhang 28489566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg,"allatonce",&flg)); 28494222ddf1SHong Zhang if (flg) { 28504222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 28514222ddf1SHong Zhang PetscFunctionReturn(0); 28524222ddf1SHong Zhang } 28534222ddf1SHong Zhang 28549566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg,"allatonce_merged",&flg)); 28554222ddf1SHong Zhang if (flg) { 28564222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 28574222ddf1SHong Zhang PetscFunctionReturn(0); 28584222ddf1SHong Zhang } 28594222ddf1SHong Zhang 28604222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 28619566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 28629566063dSJacob Faibussowitsch PetscCall(MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P)); 28639566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 28644222ddf1SHong Zhang PetscFunctionReturn(0); 28654222ddf1SHong Zhang } 28664222ddf1SHong Zhang 28674222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 28684222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C) 28697ba1a0bfSKris Buschelman { 28700298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 28717ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 28727ba1a0bfSKris Buschelman Mat P =pp->AIJ; 28737ba1a0bfSKris Buschelman Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2874d2840e09SBarry Smith PetscInt *pti,*ptj,*ptJ; 28757ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2876d2840e09SBarry Smith const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2877d2840e09SBarry Smith PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 28787ba1a0bfSKris Buschelman MatScalar *ca; 2879d2840e09SBarry Smith const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 28807ba1a0bfSKris Buschelman 28817ba1a0bfSKris Buschelman PetscFunctionBegin; 28827ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 28839566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 28847ba1a0bfSKris Buschelman 28857ba1a0bfSKris Buschelman cn = pn*ppdof; 28867ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 28877ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 28889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn+1,&ci)); 28897ba1a0bfSKris Buschelman ci[0] = 0; 28907ba1a0bfSKris Buschelman 28917ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 28929566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow)); 28939566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow,an)); 28949566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow,cn)); 28957ba1a0bfSKris Buschelman 28967ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 28977ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 28987ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 28999566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space)); 29007ba1a0bfSKris Buschelman current_space = free_space; 29017ba1a0bfSKris Buschelman 29027ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 29037ba1a0bfSKris Buschelman for (i=0; i<pn; i++) { 29047ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 29057ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 29067ba1a0bfSKris Buschelman for (dof=0; dof<ppdof; dof++) { 29077ba1a0bfSKris Buschelman ptanzi = 0; 29087ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 29097ba1a0bfSKris Buschelman for (j=0; j<ptnzi; j++) { 29107ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 29117ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 29127ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 29137ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 29147ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 29157ba1a0bfSKris Buschelman for (k=0; k<anzj; k++) { 29167ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 29177ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 29187ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 29197ba1a0bfSKris Buschelman } 29207ba1a0bfSKris Buschelman } 29217ba1a0bfSKris Buschelman } 29227ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 29237ba1a0bfSKris Buschelman ptaj = ptasparserow; 29247ba1a0bfSKris Buschelman cnzi = 0; 29257ba1a0bfSKris Buschelman for (j=0; j<ptanzi; j++) { 29267ba1a0bfSKris Buschelman /* Get offset within block of P */ 29277ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 29287ba1a0bfSKris Buschelman /* Get block row of P */ 29297ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 29307ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 29317ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 29327ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29337ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 29347ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 29357ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 29367ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 29377ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 29387ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 29397ba1a0bfSKris Buschelman } 29407ba1a0bfSKris Buschelman } 29417ba1a0bfSKris Buschelman } 29427ba1a0bfSKris Buschelman 29437ba1a0bfSKris Buschelman /* sort sparserow */ 29449566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi,sparserow)); 29457ba1a0bfSKris Buschelman 29467ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 29477ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 29487ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 29499566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 29507ba1a0bfSKris Buschelman } 29517ba1a0bfSKris Buschelman 29527ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 29539566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array,sparserow,cnzi)); 295426fbe8dcSKarl Rupp 29557ba1a0bfSKris Buschelman current_space->array += cnzi; 29567ba1a0bfSKris Buschelman current_space->local_used += cnzi; 29577ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 29587ba1a0bfSKris Buschelman 295926fbe8dcSKarl Rupp for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 296026fbe8dcSKarl Rupp for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 296126fbe8dcSKarl Rupp 29627ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 29637ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 29647ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 29657ba1a0bfSKris Buschelman } 29667ba1a0bfSKris Buschelman } 29677ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 29687ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 29697ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 29709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn]+1,&cj)); 29719566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 29729566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow,ptasparserow,denserow,sparserow)); 29737ba1a0bfSKris Buschelman 29747ba1a0bfSKris Buschelman /* Allocate space for ca */ 29759566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn]+1,&ca)); 29767ba1a0bfSKris Buschelman 29777ba1a0bfSKris Buschelman /* put together the new matrix */ 29789566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C)); 29799566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C,pp->dof)); 29807ba1a0bfSKris Buschelman 29817ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 29827ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 29834222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 2984e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2985e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 29867ba1a0bfSKris Buschelman c->nonew = 0; 298726fbe8dcSKarl Rupp 29884222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29894222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 29907ba1a0bfSKris Buschelman 29917ba1a0bfSKris Buschelman /* Clean up. */ 29929566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 29937ba1a0bfSKris Buschelman PetscFunctionReturn(0); 29947ba1a0bfSKris Buschelman } 29957ba1a0bfSKris Buschelman 29967ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 29977ba1a0bfSKris Buschelman { 29987ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 29997ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 30007ba1a0bfSKris Buschelman Mat P =pp->AIJ; 30017ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 30027ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 30037ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3004a2ea699eSBarry Smith const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3005d2840e09SBarry Smith const PetscInt *ci=c->i,*cj=c->j,*cjj; 3006d2840e09SBarry Smith const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3007d2840e09SBarry Smith PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3008a2ea699eSBarry Smith const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3009d2840e09SBarry Smith MatScalar *ca=c->a,*caj,*apa; 30107ba1a0bfSKris Buschelman 30117ba1a0bfSKris Buschelman PetscFunctionBegin; 30127ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 30139566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense)); 30147ba1a0bfSKris Buschelman 30157ba1a0bfSKris Buschelman /* Clear old values in C */ 30169566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca,ci[cm])); 30177ba1a0bfSKris Buschelman 30187ba1a0bfSKris Buschelman for (i=0; i<am; i++) { 30197ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 30207ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 30217ba1a0bfSKris Buschelman apnzj = 0; 30227ba1a0bfSKris Buschelman for (j=0; j<anzi; j++) { 30237ba1a0bfSKris Buschelman /* Get offset within block of P */ 30247ba1a0bfSKris Buschelman pshift = *aj%ppdof; 30257ba1a0bfSKris Buschelman /* Get block row of P */ 30267ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 30277ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 30287ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30297ba1a0bfSKris Buschelman paj = pa + pi[prow]; 30307ba1a0bfSKris Buschelman for (k=0; k<pnzj; k++) { 30317ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 30327ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 30337ba1a0bfSKris Buschelman apjdense[poffset] = -1; 30347ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 30357ba1a0bfSKris Buschelman } 30367ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 30377ba1a0bfSKris Buschelman } 30389566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*pnzj)); 30397ba1a0bfSKris Buschelman aa++; 30407ba1a0bfSKris Buschelman } 30417ba1a0bfSKris Buschelman 30427ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 30437ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 30449566063dSJacob Faibussowitsch PetscCall(PetscSortInt(apnzj,apj)); 30457ba1a0bfSKris Buschelman 30467ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 30477ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 30487ba1a0bfSKris Buschelman pshift = i%ppdof; 30497ba1a0bfSKris Buschelman poffset = pi[prow]; 30507ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 30517ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 30527ba1a0bfSKris Buschelman pJ = pj+poffset; 30537ba1a0bfSKris Buschelman pA = pa+poffset; 30547ba1a0bfSKris Buschelman for (j=0; j<pnzi; j++) { 30557ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 30567ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 30577ba1a0bfSKris Buschelman caj = ca + ci[crow]; 30587ba1a0bfSKris Buschelman pJ++; 30597ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 30607ba1a0bfSKris Buschelman for (k=0,nextap=0; nextap<apnzj; k++) { 306126fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 30627ba1a0bfSKris Buschelman } 30639566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*apnzj)); 30647ba1a0bfSKris Buschelman pA++; 30657ba1a0bfSKris Buschelman } 30667ba1a0bfSKris Buschelman 30677ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 30687ba1a0bfSKris Buschelman for (j=0; j<apnzj; j++) { 30697ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 30707ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 30717ba1a0bfSKris Buschelman } 30727ba1a0bfSKris Buschelman } 30737ba1a0bfSKris Buschelman 30747ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 30759566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 30769566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 30779566063dSJacob Faibussowitsch PetscCall(PetscFree3(apa,apj,apjdense)); 307895ddefa2SHong Zhang PetscFunctionReturn(0); 307995ddefa2SHong Zhang } 30807ba1a0bfSKris Buschelman 30814222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 30822121bac1SHong Zhang { 30834222ddf1SHong Zhang Mat_Product *product = C->product; 30844222ddf1SHong Zhang Mat A=product->A,P=product->B; 30852121bac1SHong Zhang 30862121bac1SHong Zhang PetscFunctionBegin; 30879566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C)); 30882121bac1SHong Zhang PetscFunctionReturn(0); 30892121bac1SHong Zhang } 30902121bac1SHong Zhang 3091f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 309295ddefa2SHong Zhang { 309395ddefa2SHong Zhang PetscFunctionBegin; 30943e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 309595ddefa2SHong Zhang } 309695ddefa2SHong Zhang 3097f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 309895ddefa2SHong Zhang { 309995ddefa2SHong Zhang PetscFunctionBegin; 31003e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 31017ba1a0bfSKris Buschelman } 31025c65b9ecSFande Kong 3103bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat); 3104bc8e477aSFande Kong 3105bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C) 3106bc8e477aSFande Kong { 3107bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3108bc8e477aSFande Kong 3109bc8e477aSFande Kong PetscFunctionBegin; 311034bcad68SFande Kong 31119566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C)); 3112bc8e477aSFande Kong PetscFunctionReturn(0); 3113bc8e477aSFande Kong } 3114bc8e477aSFande Kong 31154222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat); 3116bc8e477aSFande Kong 31174222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C) 3118bc8e477aSFande Kong { 3119bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3120bc8e477aSFande Kong 3121bc8e477aSFande Kong PetscFunctionBegin; 31229566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C)); 31234222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3124bc8e477aSFande Kong PetscFunctionReturn(0); 3125bc8e477aSFande Kong } 3126bc8e477aSFande Kong 3127bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat); 3128bc8e477aSFande Kong 3129bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C) 3130bc8e477aSFande Kong { 3131bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3132bc8e477aSFande Kong 3133bc8e477aSFande Kong PetscFunctionBegin; 313434bcad68SFande Kong 31359566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C)); 3136bc8e477aSFande Kong PetscFunctionReturn(0); 3137bc8e477aSFande Kong } 3138bc8e477aSFande Kong 31394222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat); 3140bc8e477aSFande Kong 31414222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C) 3142bc8e477aSFande Kong { 3143bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3144bc8e477aSFande Kong 3145bc8e477aSFande Kong PetscFunctionBegin; 314634bcad68SFande Kong 31479566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C)); 31484222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3149bc8e477aSFande Kong PetscFunctionReturn(0); 3150bc8e477aSFande Kong } 3151bc8e477aSFande Kong 31524222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3153bc8e477aSFande Kong { 31544222ddf1SHong Zhang Mat_Product *product = C->product; 31554222ddf1SHong Zhang Mat A=product->A,P=product->B; 31564222ddf1SHong Zhang PetscBool flg; 3157bc8e477aSFande Kong 3158bc8e477aSFande Kong PetscFunctionBegin; 31599566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"allatonce",&flg)); 31604222ddf1SHong Zhang if (flg) { 31619566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C)); 31624222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 31634222ddf1SHong Zhang PetscFunctionReturn(0); 3164bc8e477aSFande Kong } 316534bcad68SFande Kong 31669566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"allatonce_merged",&flg)); 31674222ddf1SHong Zhang if (flg) { 31689566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C)); 31694222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 31704222ddf1SHong Zhang PetscFunctionReturn(0); 31714222ddf1SHong Zhang } 317234bcad68SFande Kong 31734222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 3174bc8e477aSFande Kong } 3175bc8e477aSFande Kong 3176cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 31779c4fc4c7SBarry Smith { 31789c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3179ceb03754SKris Buschelman Mat a = b->AIJ,B; 31809c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 31810fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3182ba8c8a56SBarry Smith PetscInt *cols; 3183ba8c8a56SBarry Smith PetscScalar *vals; 31849c4fc4c7SBarry Smith 31859c4fc4c7SBarry Smith PetscFunctionBegin; 31869566063dSJacob Faibussowitsch PetscCall(MatGetSize(a,&m,&n)); 31879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof*m,&ilen)); 31889c4fc4c7SBarry Smith for (i=0; i<m; i++) { 31899c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 319026fbe8dcSKarl Rupp for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 31919c4fc4c7SBarry Smith } 31929566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 31939566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B,dof*m,dof*n,dof*m,dof*n)); 31949566063dSJacob Faibussowitsch PetscCall(MatSetType(B,newtype)); 31959566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B,0,ilen)); 31969566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 31979566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax,&icols)); 31989c4fc4c7SBarry Smith ii = 0; 31999c4fc4c7SBarry Smith for (i=0; i<m; i++) { 32009566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals)); 32010fd73130SBarry Smith for (j=0; j<dof; j++) { 320226fbe8dcSKarl Rupp for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 32039566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES)); 32049c4fc4c7SBarry Smith ii++; 32059c4fc4c7SBarry Smith } 32069566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals)); 32079c4fc4c7SBarry Smith } 32089566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 32099566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 32109566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3211ceb03754SKris Buschelman 3212511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32139566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A,&B)); 3214c3d102feSKris Buschelman } else { 3215ceb03754SKris Buschelman *newmat = B; 3216c3d102feSKris Buschelman } 32179c4fc4c7SBarry Smith PetscFunctionReturn(0); 32189c4fc4c7SBarry Smith } 32199c4fc4c7SBarry Smith 3220c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3221be1d678aSKris Buschelman 3222cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32230fd73130SBarry Smith { 32240fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3225ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 32260fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 32270fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 32280fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 32290fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 32300298fd71SBarry Smith PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 32310298fd71SBarry Smith PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 32320fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 32330fd73130SBarry Smith PetscScalar *vals,*ovals; 32340fd73130SBarry Smith 32350fd73130SBarry Smith PetscFunctionBegin; 32369566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz)); 3237d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32380fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 32390fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 32400fd73130SBarry Smith for (j=0; j<dof; j++) { 32410fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 32420fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 32430fd73130SBarry Smith } 32440fd73130SBarry Smith } 32459566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 32469566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 32479566063dSJacob Faibussowitsch PetscCall(MatSetType(B,newtype)); 32489566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B,0,dnz,0,onz)); 32499566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B,dof)); 32509566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz,onz)); 32510fd73130SBarry Smith 32529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax,&icols,onmax,&oicols)); 3253d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 3254d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 32550fd73130SBarry Smith garray = mpiaij->garray; 32560fd73130SBarry Smith 32570fd73130SBarry Smith ii = rstart; 3258d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32599566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals)); 32609566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals)); 32610fd73130SBarry Smith for (j=0; j<dof; j++) { 32620fd73130SBarry Smith for (k=0; k<ncols; k++) { 32630fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 32640fd73130SBarry Smith } 32650fd73130SBarry Smith for (k=0; k<oncols; k++) { 32660fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 32670fd73130SBarry Smith } 32689566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES)); 32699566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES)); 32700fd73130SBarry Smith ii++; 32710fd73130SBarry Smith } 32729566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals)); 32739566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals)); 32740fd73130SBarry Smith } 32759566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols,oicols)); 32760fd73130SBarry Smith 32779566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 32789566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3279ceb03754SKris Buschelman 3280511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32817adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 32827adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 328326fbe8dcSKarl Rupp 32849566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A,&B)); 328526fbe8dcSKarl Rupp 32867adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3287c3d102feSKris Buschelman } else { 3288ceb03754SKris Buschelman *newmat = B; 3289c3d102feSKris Buschelman } 32900fd73130SBarry Smith PetscFunctionReturn(0); 32910fd73130SBarry Smith } 32920fd73130SBarry Smith 32937dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 32949e516c8fSBarry Smith { 32959e516c8fSBarry Smith Mat A; 32969e516c8fSBarry Smith 32979e516c8fSBarry Smith PetscFunctionBegin; 32989566063dSJacob Faibussowitsch PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 32999566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat)); 33009566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 33019e516c8fSBarry Smith PetscFunctionReturn(0); 33029e516c8fSBarry Smith } 33030fd73130SBarry Smith 3304ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3305ec626240SStefano Zampini { 3306ec626240SStefano Zampini Mat A; 3307ec626240SStefano Zampini 3308ec626240SStefano Zampini PetscFunctionBegin; 33099566063dSJacob Faibussowitsch PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 33109566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A,n,irow,icol,scall,submat)); 33119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 3312ec626240SStefano Zampini PetscFunctionReturn(0); 3313ec626240SStefano Zampini } 3314ec626240SStefano Zampini 3315bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3316480dffcfSJed Brown /*@ 33170bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 33180bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 33190bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 33200bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 33210bad9183SKris Buschelman 3322ff585edeSJed Brown Collective 3323ff585edeSJed Brown 3324ff585edeSJed Brown Input Parameters: 3325ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3326ff585edeSJed Brown - dof - the block size (number of components per node) 3327ff585edeSJed Brown 3328ff585edeSJed Brown Output Parameter: 3329ff585edeSJed Brown . maij - the new MAIJ matrix 3330ff585edeSJed Brown 33310bad9183SKris Buschelman Operations provided: 333267b8a455SSatish Balay .vb 333367b8a455SSatish Balay MatMult 333467b8a455SSatish Balay MatMultTranspose 333567b8a455SSatish Balay MatMultAdd 333667b8a455SSatish Balay MatMultTransposeAdd 333767b8a455SSatish Balay MatView 333867b8a455SSatish Balay .ve 33390bad9183SKris Buschelman 33400bad9183SKris Buschelman Level: advanced 33410bad9183SKris Buschelman 3342db781477SPatrick Sanan .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3343ff585edeSJed Brown @*/ 33447087cfbeSBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 334582b1193eSBarry Smith { 3346b24ad042SBarry Smith PetscMPIInt size; 3347b24ad042SBarry Smith PetscInt n; 334882b1193eSBarry Smith Mat B; 3349fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3350fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3351fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3352fdc842d1SBarry Smith #endif 335382b1193eSBarry Smith 335482b1193eSBarry Smith PetscFunctionBegin; 3355fdc842d1SBarry Smith dof = PetscAbs(dof); 33569566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 3357d72c5c08SBarry Smith 335826fbe8dcSKarl Rupp if (dof == 1) *maij = A; 335926fbe8dcSKarl Rupp else { 33609566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3361c248e2fdSStefano Zampini /* propagate vec type */ 33629566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B,A->defaultvectype)); 33639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N)); 33649566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap,dof)); 33659566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap,dof)); 33669566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 33679566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 336826fbe8dcSKarl Rupp 3369cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3370d72c5c08SBarry Smith 33719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3372f4a53059SBarry Smith if (size == 1) { 3373feb9fe23SJed Brown Mat_SeqMAIJ *b; 3374feb9fe23SJed Brown 33759566063dSJacob Faibussowitsch PetscCall(MatSetType(B,MATSEQMAIJ)); 337626fbe8dcSKarl Rupp 33770298fd71SBarry Smith B->ops->setup = NULL; 33784cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 33790fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 33804222ddf1SHong Zhang 3381feb9fe23SJed Brown b = (Mat_SeqMAIJ*)B->data; 3382b9b97703SBarry Smith b->dof = dof; 33834cb9d9b8SBarry Smith b->AIJ = A; 338426fbe8dcSKarl Rupp 3385d72c5c08SBarry Smith if (dof == 2) { 3386cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3387cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3388cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3389cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3390bcc973b7SBarry Smith } else if (dof == 3) { 3391bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3392bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3393bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3394bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3395bcc973b7SBarry Smith } else if (dof == 4) { 3396bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3397bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3398bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3399bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3400f9fae5adSBarry Smith } else if (dof == 5) { 3401f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3402f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3403f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3404f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 34056cd98798SBarry Smith } else if (dof == 6) { 34066cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 34076cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 34086cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 34096cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3410ed8eea03SBarry Smith } else if (dof == 7) { 3411ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3412ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3413ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3414ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 341566ed3db0SBarry Smith } else if (dof == 8) { 341666ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 341766ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 341866ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 341966ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 34202b692628SMatthew Knepley } else if (dof == 9) { 34212b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 34222b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 34232b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 34242b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3425b9cda22cSBarry Smith } else if (dof == 10) { 3426b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3427b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3428b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3429b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3430dbdd7285SBarry Smith } else if (dof == 11) { 3431dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3432dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3433dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3434dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 34352f7816d4SBarry Smith } else if (dof == 16) { 34362f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 34372f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 34382f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 34392f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3440ed1c418cSBarry Smith } else if (dof == 18) { 3441ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3442ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3443ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3444ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 344582b1193eSBarry Smith } else { 34466861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 34476861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 34486861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 34496861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 345082b1193eSBarry Smith } 3451fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 34529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ)); 3453fdc842d1SBarry Smith #endif 34549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ)); 34559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3456f4a53059SBarry Smith } else { 3457f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3458feb9fe23SJed Brown Mat_MPIMAIJ *b; 3459f4a53059SBarry Smith IS from,to; 3460f4a53059SBarry Smith Vec gvec; 3461f4a53059SBarry Smith 34629566063dSJacob Faibussowitsch PetscCall(MatSetType(B,MATMPIMAIJ)); 346326fbe8dcSKarl Rupp 34640298fd71SBarry Smith B->ops->setup = NULL; 3465d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 34660fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 346726fbe8dcSKarl Rupp 3468b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3469b9b97703SBarry Smith b->dof = dof; 3470b9b97703SBarry Smith b->A = A; 347126fbe8dcSKarl Rupp 34729566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ)); 34739566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ)); 34744cb9d9b8SBarry Smith 34759566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec,&n)); 34769566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF,&b->w)); 34779566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w,n*dof,n*dof)); 34789566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w,dof)); 34799566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w,VECSEQ)); 3480f4a53059SBarry Smith 3481f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 34829566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from)); 34839566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to)); 3484f4a53059SBarry Smith 3485f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 34869566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec)); 3487f4a53059SBarry Smith 3488f4a53059SBarry Smith /* generate the scatter context */ 34899566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec,from,b->w,to,&b->ctx)); 3490f4a53059SBarry Smith 34919566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 34929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 34939566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 3494f4a53059SBarry Smith 3495f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 34964cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 34974cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 34984cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 349926fbe8dcSKarl Rupp 3500fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 35019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ)); 3502fdc842d1SBarry Smith #endif 35039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ)); 35049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3505f4a53059SBarry Smith } 35067dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3507ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 35089566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 3509fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3510fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 3511fdc842d1SBarry Smith { 3512fdc842d1SBarry Smith PetscBool flg; 3513fdc842d1SBarry Smith if (convert) { 35149566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"")); 3515fdc842d1SBarry Smith if (flg) { 35169566063dSJacob Faibussowitsch PetscCall(MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B)); 3517fdc842d1SBarry Smith } 3518fdc842d1SBarry Smith } 3519fdc842d1SBarry Smith } 3520fdc842d1SBarry Smith #endif 3521cd3bbe55SBarry Smith *maij = B; 3522d72c5c08SBarry Smith } 352382b1193eSBarry Smith PetscFunctionReturn(0); 352482b1193eSBarry Smith } 3525