1be1d678aSKris Buschelman 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 2182b1193eSBarry Smith 22b350b9acSSatish Balay /*@ 23ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24ff585edeSJed Brown 25ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26ff585edeSJed Brown 27ff585edeSJed Brown Input Parameter: 28ff585edeSJed Brown . A - the MAIJ matrix 29ff585edeSJed Brown 30ff585edeSJed Brown Output Parameter: 31ff585edeSJed Brown . B - the AIJ matrix 32ff585edeSJed Brown 33ff585edeSJed Brown Level: advanced 34ff585edeSJed Brown 3595452b02SPatrick Sanan Notes: 3695452b02SPatrick Sanan The reference count on the AIJ matrix is not increased so you should not destroy it. 37ff585edeSJed Brown 38ff585edeSJed Brown .seealso: MatCreateMAIJ() 39ff585edeSJed Brown @*/ 407087cfbeSBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41b9b97703SBarry Smith { 42dfbe8321SBarry Smith PetscErrorCode ierr; 43ace3abfcSBarry Smith PetscBool ismpimaij,isseqmaij; 44b9b97703SBarry Smith 45b9b97703SBarry Smith PetscFunctionBegin; 46251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 47251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 48b9b97703SBarry Smith if (ismpimaij) { 49b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 50b9b97703SBarry Smith 51b9b97703SBarry Smith *B = b->A; 52b9b97703SBarry Smith } else if (isseqmaij) { 53b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 54b9b97703SBarry Smith 55b9b97703SBarry Smith *B = b->AIJ; 56b9b97703SBarry Smith } else { 57b9b97703SBarry Smith *B = A; 58b9b97703SBarry Smith } 59b9b97703SBarry Smith PetscFunctionReturn(0); 60b9b97703SBarry Smith } 61b9b97703SBarry Smith 62480dffcfSJed Brown /*@ 63ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 64ff585edeSJed Brown 653f9fe445SBarry Smith Logically Collective 66ff585edeSJed Brown 67d8d19677SJose E. Roman Input Parameters: 68ff585edeSJed Brown + A - the MAIJ matrix 69ff585edeSJed Brown - dof - the block size for the new matrix 70ff585edeSJed Brown 71ff585edeSJed Brown Output Parameter: 72ff585edeSJed Brown . B - the new MAIJ matrix 73ff585edeSJed Brown 74ff585edeSJed Brown Level: advanced 75ff585edeSJed Brown 76ff585edeSJed Brown .seealso: MatCreateMAIJ() 77ff585edeSJed Brown @*/ 787087cfbeSBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 79b9b97703SBarry Smith { 80dfbe8321SBarry Smith PetscErrorCode ierr; 810298fd71SBarry Smith Mat Aij = NULL; 82b9b97703SBarry Smith 83b9b97703SBarry Smith PetscFunctionBegin; 84c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A,dof,2); 85b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 86b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 87b9b97703SBarry Smith PetscFunctionReturn(0); 88b9b97703SBarry Smith } 89b9b97703SBarry Smith 90dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 9182b1193eSBarry Smith { 92dfbe8321SBarry Smith PetscErrorCode ierr; 934cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9482b1193eSBarry Smith 9582b1193eSBarry Smith PetscFunctionBegin; 966bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 97bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 98bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 994222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 1004cb9d9b8SBarry Smith PetscFunctionReturn(0); 1014cb9d9b8SBarry Smith } 1024cb9d9b8SBarry Smith 103356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A) 104356c569eSBarry Smith { 105356c569eSBarry Smith PetscFunctionBegin; 106ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 107356c569eSBarry Smith } 108356c569eSBarry Smith 1090fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1100fd73130SBarry Smith { 1110fd73130SBarry Smith PetscErrorCode ierr; 1120fd73130SBarry Smith Mat B; 1130fd73130SBarry Smith 1140fd73130SBarry Smith PetscFunctionBegin; 115a2ea699eSBarry Smith ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1160fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1176bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1180fd73130SBarry Smith PetscFunctionReturn(0); 1190fd73130SBarry Smith } 1200fd73130SBarry Smith 1210fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1220fd73130SBarry Smith { 1230fd73130SBarry Smith PetscErrorCode ierr; 1240fd73130SBarry Smith Mat B; 1250fd73130SBarry Smith 1260fd73130SBarry Smith PetscFunctionBegin; 127a2ea699eSBarry Smith ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1280fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1296bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1300fd73130SBarry Smith PetscFunctionReturn(0); 1310fd73130SBarry Smith } 1320fd73130SBarry Smith 133dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1344cb9d9b8SBarry Smith { 135dfbe8321SBarry Smith PetscErrorCode ierr; 1364cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1374cb9d9b8SBarry Smith 1384cb9d9b8SBarry Smith PetscFunctionBegin; 1396bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 1406bf464f9SBarry Smith ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 1416bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 1426bf464f9SBarry Smith ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 1436bf464f9SBarry Smith ierr = VecDestroy(&b->w);CHKERRQ(ierr); 144bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 145bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 1464222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 147f4259b30SLisandro Dalcin ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 148b9b97703SBarry Smith PetscFunctionReturn(0); 149b9b97703SBarry Smith } 150b9b97703SBarry Smith 1510bad9183SKris Buschelman /*MC 152fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1530bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1540bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1550bad9183SKris Buschelman 1560bad9183SKris Buschelman Operations provided: 157*67b8a455SSatish Balay .vb 158*67b8a455SSatish Balay MatMult 159*67b8a455SSatish Balay MatMultTranspose 160*67b8a455SSatish Balay MatMultAdd 161*67b8a455SSatish Balay MatMultTransposeAdd 162*67b8a455SSatish Balay .ve 1630bad9183SKris Buschelman 1640bad9183SKris Buschelman Level: advanced 1650bad9183SKris Buschelman 166b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 1670bad9183SKris Buschelman M*/ 1680bad9183SKris Buschelman 1698cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 17082b1193eSBarry Smith { 171dfbe8321SBarry Smith PetscErrorCode ierr; 1724cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 17364b52464SHong Zhang PetscMPIInt size; 17482b1193eSBarry Smith 17582b1193eSBarry Smith PetscFunctionBegin; 176b00a9115SJed Brown ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 177b0a32e0cSBarry Smith A->data = (void*)b; 17826fbe8dcSKarl Rupp 179cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 18026fbe8dcSKarl Rupp 181356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 182f4a53059SBarry Smith 183f4259b30SLisandro Dalcin b->AIJ = NULL; 184cd3bbe55SBarry Smith b->dof = 0; 185f4259b30SLisandro Dalcin b->OAIJ = NULL; 186f4259b30SLisandro Dalcin b->ctx = NULL; 187f4259b30SLisandro Dalcin b->w = NULL; 18855b25c41SPierre Jolivet ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 18964b52464SHong Zhang if (size == 1) { 19064b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 19164b52464SHong Zhang } else { 19264b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 19364b52464SHong Zhang } 19432e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 19532e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 19682b1193eSBarry Smith PetscFunctionReturn(0); 19782b1193eSBarry Smith } 19882b1193eSBarry Smith 199cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 200dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 20182b1193eSBarry Smith { 2024cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 203bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 204d2840e09SBarry Smith const PetscScalar *x,*v; 205d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 206dfbe8321SBarry Smith PetscErrorCode ierr; 207d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 208d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 20982b1193eSBarry Smith 210bcc973b7SBarry Smith PetscFunctionBegin; 2113649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2121ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 213bcc973b7SBarry Smith idx = a->j; 214bcc973b7SBarry Smith v = a->a; 215bcc973b7SBarry Smith ii = a->i; 216bcc973b7SBarry Smith 217bcc973b7SBarry Smith for (i=0; i<m; i++) { 218bcc973b7SBarry Smith jrow = ii[i]; 219bcc973b7SBarry Smith n = ii[i+1] - jrow; 220bcc973b7SBarry Smith sum1 = 0.0; 221bcc973b7SBarry Smith sum2 = 0.0; 22226fbe8dcSKarl Rupp 223b3c51c66SMatthew Knepley nonzerorow += (n>0); 224bcc973b7SBarry Smith for (j=0; j<n; j++) { 225bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 226bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 227bcc973b7SBarry Smith jrow++; 228bcc973b7SBarry Smith } 229bcc973b7SBarry Smith y[2*i] = sum1; 230bcc973b7SBarry Smith y[2*i+1] = sum2; 231bcc973b7SBarry Smith } 232bcc973b7SBarry Smith 233dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 2343649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2351ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 23682b1193eSBarry Smith PetscFunctionReturn(0); 23782b1193eSBarry Smith } 238bcc973b7SBarry Smith 239dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 24082b1193eSBarry Smith { 2414cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 242bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 243d2840e09SBarry Smith const PetscScalar *x,*v; 244d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 245dfbe8321SBarry Smith PetscErrorCode ierr; 246d2840e09SBarry Smith PetscInt n,i; 247d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 24882b1193eSBarry Smith 249bcc973b7SBarry Smith PetscFunctionBegin; 250d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2513649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2521ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 253bfec09a0SHong Zhang 254bcc973b7SBarry Smith for (i=0; i<m; i++) { 255bfec09a0SHong Zhang idx = a->j + a->i[i]; 256bfec09a0SHong Zhang v = a->a + a->i[i]; 257bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 258bcc973b7SBarry Smith alpha1 = x[2*i]; 259bcc973b7SBarry Smith alpha2 = x[2*i+1]; 26026fbe8dcSKarl Rupp while (n-->0) { 26126fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 26226fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 26326fbe8dcSKarl Rupp idx++; v++; 26426fbe8dcSKarl Rupp } 265bcc973b7SBarry Smith } 266dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 2673649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2681ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 26982b1193eSBarry Smith PetscFunctionReturn(0); 27082b1193eSBarry Smith } 271bcc973b7SBarry Smith 272dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 27382b1193eSBarry Smith { 2744cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 275bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 276d2840e09SBarry Smith const PetscScalar *x,*v; 277d2840e09SBarry Smith PetscScalar *y,sum1, sum2; 278dfbe8321SBarry Smith PetscErrorCode ierr; 279b24ad042SBarry Smith PetscInt n,i,jrow,j; 280d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 28182b1193eSBarry Smith 282bcc973b7SBarry Smith PetscFunctionBegin; 283f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2843649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2851ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 286bcc973b7SBarry Smith idx = a->j; 287bcc973b7SBarry Smith v = a->a; 288bcc973b7SBarry Smith ii = a->i; 289bcc973b7SBarry Smith 290bcc973b7SBarry Smith for (i=0; i<m; i++) { 291bcc973b7SBarry Smith jrow = ii[i]; 292bcc973b7SBarry Smith n = ii[i+1] - jrow; 293bcc973b7SBarry Smith sum1 = 0.0; 294bcc973b7SBarry Smith sum2 = 0.0; 295bcc973b7SBarry Smith for (j=0; j<n; j++) { 296bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 297bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 298bcc973b7SBarry Smith jrow++; 299bcc973b7SBarry Smith } 300bcc973b7SBarry Smith y[2*i] += sum1; 301bcc973b7SBarry Smith y[2*i+1] += sum2; 302bcc973b7SBarry Smith } 303bcc973b7SBarry Smith 304dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3061ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 307bcc973b7SBarry Smith PetscFunctionReturn(0); 30882b1193eSBarry Smith } 309dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 31082b1193eSBarry Smith { 3114cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 312bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 313d2840e09SBarry Smith const PetscScalar *x,*v; 314d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 315dfbe8321SBarry Smith PetscErrorCode ierr; 316d2840e09SBarry Smith PetscInt n,i; 317d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 31882b1193eSBarry Smith 319bcc973b7SBarry Smith PetscFunctionBegin; 320f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3213649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3221ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 323bfec09a0SHong Zhang 324bcc973b7SBarry Smith for (i=0; i<m; i++) { 325bfec09a0SHong Zhang idx = a->j + a->i[i]; 326bfec09a0SHong Zhang v = a->a + a->i[i]; 327bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 328bcc973b7SBarry Smith alpha1 = x[2*i]; 329bcc973b7SBarry Smith alpha2 = x[2*i+1]; 33026fbe8dcSKarl Rupp while (n-->0) { 33126fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 33226fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 33326fbe8dcSKarl Rupp idx++; v++; 33426fbe8dcSKarl Rupp } 335bcc973b7SBarry Smith } 336dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3373649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3381ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 339bcc973b7SBarry Smith PetscFunctionReturn(0); 34082b1193eSBarry Smith } 341cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 342dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 343bcc973b7SBarry Smith { 3444cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 345bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 346d2840e09SBarry Smith const PetscScalar *x,*v; 347d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 348dfbe8321SBarry Smith PetscErrorCode ierr; 349d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 350d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 35182b1193eSBarry Smith 352bcc973b7SBarry Smith PetscFunctionBegin; 3533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3541ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 355bcc973b7SBarry Smith idx = a->j; 356bcc973b7SBarry Smith v = a->a; 357bcc973b7SBarry Smith ii = a->i; 358bcc973b7SBarry Smith 359bcc973b7SBarry Smith for (i=0; i<m; i++) { 360bcc973b7SBarry Smith jrow = ii[i]; 361bcc973b7SBarry Smith n = ii[i+1] - jrow; 362bcc973b7SBarry Smith sum1 = 0.0; 363bcc973b7SBarry Smith sum2 = 0.0; 364bcc973b7SBarry Smith sum3 = 0.0; 36526fbe8dcSKarl Rupp 366b3c51c66SMatthew Knepley nonzerorow += (n>0); 367bcc973b7SBarry Smith for (j=0; j<n; j++) { 368bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 369bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 370bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 371bcc973b7SBarry Smith jrow++; 372bcc973b7SBarry Smith } 373bcc973b7SBarry Smith y[3*i] = sum1; 374bcc973b7SBarry Smith y[3*i+1] = sum2; 375bcc973b7SBarry Smith y[3*i+2] = sum3; 376bcc973b7SBarry Smith } 377bcc973b7SBarry Smith 378dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 3793649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3801ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 381bcc973b7SBarry Smith PetscFunctionReturn(0); 382bcc973b7SBarry Smith } 383bcc973b7SBarry Smith 384dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 385bcc973b7SBarry Smith { 3864cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 387bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 388d2840e09SBarry Smith const PetscScalar *x,*v; 389d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 390dfbe8321SBarry Smith PetscErrorCode ierr; 391d2840e09SBarry Smith PetscInt n,i; 392d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 393bcc973b7SBarry Smith 394bcc973b7SBarry Smith PetscFunctionBegin; 395d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 3963649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3971ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 398bfec09a0SHong Zhang 399bcc973b7SBarry Smith for (i=0; i<m; i++) { 400bfec09a0SHong Zhang idx = a->j + a->i[i]; 401bfec09a0SHong Zhang v = a->a + a->i[i]; 402bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 403bcc973b7SBarry Smith alpha1 = x[3*i]; 404bcc973b7SBarry Smith alpha2 = x[3*i+1]; 405bcc973b7SBarry Smith alpha3 = x[3*i+2]; 406bcc973b7SBarry Smith while (n-->0) { 407bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 408bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 409bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 410bcc973b7SBarry Smith idx++; v++; 411bcc973b7SBarry Smith } 412bcc973b7SBarry Smith } 413dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4143649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4151ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 416bcc973b7SBarry Smith PetscFunctionReturn(0); 417bcc973b7SBarry Smith } 418bcc973b7SBarry Smith 419dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 420bcc973b7SBarry Smith { 4214cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 422bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 423d2840e09SBarry Smith const PetscScalar *x,*v; 424d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 425dfbe8321SBarry Smith PetscErrorCode ierr; 426b24ad042SBarry Smith PetscInt n,i,jrow,j; 427d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 428bcc973b7SBarry Smith 429bcc973b7SBarry Smith PetscFunctionBegin; 430f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4313649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4321ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 433bcc973b7SBarry Smith idx = a->j; 434bcc973b7SBarry Smith v = a->a; 435bcc973b7SBarry Smith ii = a->i; 436bcc973b7SBarry Smith 437bcc973b7SBarry Smith for (i=0; i<m; i++) { 438bcc973b7SBarry Smith jrow = ii[i]; 439bcc973b7SBarry Smith n = ii[i+1] - jrow; 440bcc973b7SBarry Smith sum1 = 0.0; 441bcc973b7SBarry Smith sum2 = 0.0; 442bcc973b7SBarry Smith sum3 = 0.0; 443bcc973b7SBarry Smith for (j=0; j<n; j++) { 444bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 445bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 446bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 447bcc973b7SBarry Smith jrow++; 448bcc973b7SBarry Smith } 449bcc973b7SBarry Smith y[3*i] += sum1; 450bcc973b7SBarry Smith y[3*i+1] += sum2; 451bcc973b7SBarry Smith y[3*i+2] += sum3; 452bcc973b7SBarry Smith } 453bcc973b7SBarry Smith 454dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4553649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4561ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 457bcc973b7SBarry Smith PetscFunctionReturn(0); 458bcc973b7SBarry Smith } 459dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 460bcc973b7SBarry Smith { 4614cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 462bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 463d2840e09SBarry Smith const PetscScalar *x,*v; 464d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 465dfbe8321SBarry Smith PetscErrorCode ierr; 466d2840e09SBarry Smith PetscInt n,i; 467d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 468bcc973b7SBarry Smith 469bcc973b7SBarry Smith PetscFunctionBegin; 470f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4713649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4721ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 473bcc973b7SBarry Smith for (i=0; i<m; i++) { 474bfec09a0SHong Zhang idx = a->j + a->i[i]; 475bfec09a0SHong Zhang v = a->a + a->i[i]; 476bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 477bcc973b7SBarry Smith alpha1 = x[3*i]; 478bcc973b7SBarry Smith alpha2 = x[3*i+1]; 479bcc973b7SBarry Smith alpha3 = x[3*i+2]; 480bcc973b7SBarry Smith while (n-->0) { 481bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 482bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 483bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 484bcc973b7SBarry Smith idx++; v++; 485bcc973b7SBarry Smith } 486bcc973b7SBarry Smith } 487dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4883649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4891ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 490bcc973b7SBarry Smith PetscFunctionReturn(0); 491bcc973b7SBarry Smith } 492bcc973b7SBarry Smith 493bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 494dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 495bcc973b7SBarry Smith { 4964cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 497bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 498d2840e09SBarry Smith const PetscScalar *x,*v; 499d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 500dfbe8321SBarry Smith PetscErrorCode ierr; 501d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 502d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 503bcc973b7SBarry Smith 504bcc973b7SBarry Smith PetscFunctionBegin; 5053649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5061ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 507bcc973b7SBarry Smith idx = a->j; 508bcc973b7SBarry Smith v = a->a; 509bcc973b7SBarry Smith ii = a->i; 510bcc973b7SBarry Smith 511bcc973b7SBarry Smith for (i=0; i<m; i++) { 512bcc973b7SBarry Smith jrow = ii[i]; 513bcc973b7SBarry Smith n = ii[i+1] - jrow; 514bcc973b7SBarry Smith sum1 = 0.0; 515bcc973b7SBarry Smith sum2 = 0.0; 516bcc973b7SBarry Smith sum3 = 0.0; 517bcc973b7SBarry Smith sum4 = 0.0; 518b3c51c66SMatthew Knepley nonzerorow += (n>0); 519bcc973b7SBarry Smith for (j=0; j<n; j++) { 520bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 521bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 522bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 523bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 524bcc973b7SBarry Smith jrow++; 525bcc973b7SBarry Smith } 526bcc973b7SBarry Smith y[4*i] = sum1; 527bcc973b7SBarry Smith y[4*i+1] = sum2; 528bcc973b7SBarry Smith y[4*i+2] = sum3; 529bcc973b7SBarry Smith y[4*i+3] = sum4; 530bcc973b7SBarry Smith } 531bcc973b7SBarry Smith 532dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 5333649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5341ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 535bcc973b7SBarry Smith PetscFunctionReturn(0); 536bcc973b7SBarry Smith } 537bcc973b7SBarry Smith 538dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 539bcc973b7SBarry Smith { 5404cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 541bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 542d2840e09SBarry Smith const PetscScalar *x,*v; 543d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 544dfbe8321SBarry Smith PetscErrorCode ierr; 545d2840e09SBarry Smith PetscInt n,i; 546d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 547bcc973b7SBarry Smith 548bcc973b7SBarry Smith PetscFunctionBegin; 549d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 5503649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5511ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 552bcc973b7SBarry Smith for (i=0; i<m; i++) { 553bfec09a0SHong Zhang idx = a->j + a->i[i]; 554bfec09a0SHong Zhang v = a->a + a->i[i]; 555bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 556bcc973b7SBarry Smith alpha1 = x[4*i]; 557bcc973b7SBarry Smith alpha2 = x[4*i+1]; 558bcc973b7SBarry Smith alpha3 = x[4*i+2]; 559bcc973b7SBarry Smith alpha4 = x[4*i+3]; 560bcc973b7SBarry Smith while (n-->0) { 561bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 562bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 563bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 564bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 565bcc973b7SBarry Smith idx++; v++; 566bcc973b7SBarry Smith } 567bcc973b7SBarry Smith } 568dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 5693649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5701ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 571bcc973b7SBarry Smith PetscFunctionReturn(0); 572bcc973b7SBarry Smith } 573bcc973b7SBarry Smith 574dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 575bcc973b7SBarry Smith { 5764cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 577f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 578d2840e09SBarry Smith const PetscScalar *x,*v; 579d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 580dfbe8321SBarry Smith PetscErrorCode ierr; 581b24ad042SBarry Smith PetscInt n,i,jrow,j; 582d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 583f9fae5adSBarry Smith 584f9fae5adSBarry Smith PetscFunctionBegin; 585f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5871ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 588f9fae5adSBarry Smith idx = a->j; 589f9fae5adSBarry Smith v = a->a; 590f9fae5adSBarry Smith ii = a->i; 591f9fae5adSBarry Smith 592f9fae5adSBarry Smith for (i=0; i<m; i++) { 593f9fae5adSBarry Smith jrow = ii[i]; 594f9fae5adSBarry Smith n = ii[i+1] - jrow; 595f9fae5adSBarry Smith sum1 = 0.0; 596f9fae5adSBarry Smith sum2 = 0.0; 597f9fae5adSBarry Smith sum3 = 0.0; 598f9fae5adSBarry Smith sum4 = 0.0; 599f9fae5adSBarry Smith for (j=0; j<n; j++) { 600f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 601f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 602f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 603f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 604f9fae5adSBarry Smith jrow++; 605f9fae5adSBarry Smith } 606f9fae5adSBarry Smith y[4*i] += sum1; 607f9fae5adSBarry Smith y[4*i+1] += sum2; 608f9fae5adSBarry Smith y[4*i+2] += sum3; 609f9fae5adSBarry Smith y[4*i+3] += sum4; 610f9fae5adSBarry Smith } 611f9fae5adSBarry Smith 612dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6133649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6141ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 615f9fae5adSBarry Smith PetscFunctionReturn(0); 616bcc973b7SBarry Smith } 617dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 618bcc973b7SBarry Smith { 6194cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 620f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 621d2840e09SBarry Smith const PetscScalar *x,*v; 622d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 623dfbe8321SBarry Smith PetscErrorCode ierr; 624d2840e09SBarry Smith PetscInt n,i; 625d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 626f9fae5adSBarry Smith 627f9fae5adSBarry Smith PetscFunctionBegin; 628f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6293649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6301ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 631bfec09a0SHong Zhang 632f9fae5adSBarry Smith for (i=0; i<m; i++) { 633bfec09a0SHong Zhang idx = a->j + a->i[i]; 634bfec09a0SHong Zhang v = a->a + a->i[i]; 635f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 636f9fae5adSBarry Smith alpha1 = x[4*i]; 637f9fae5adSBarry Smith alpha2 = x[4*i+1]; 638f9fae5adSBarry Smith alpha3 = x[4*i+2]; 639f9fae5adSBarry Smith alpha4 = x[4*i+3]; 640f9fae5adSBarry Smith while (n-->0) { 641f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 642f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 643f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 644f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 645f9fae5adSBarry Smith idx++; v++; 646f9fae5adSBarry Smith } 647f9fae5adSBarry Smith } 648dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6493649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6501ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 651f9fae5adSBarry Smith PetscFunctionReturn(0); 652f9fae5adSBarry Smith } 653f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6546cd98798SBarry Smith 655dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 656f9fae5adSBarry Smith { 6574cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 658f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 659d2840e09SBarry Smith const PetscScalar *x,*v; 660d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 661dfbe8321SBarry Smith PetscErrorCode ierr; 662d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 663d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 664f9fae5adSBarry Smith 665f9fae5adSBarry Smith PetscFunctionBegin; 6663649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6671ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 668f9fae5adSBarry Smith idx = a->j; 669f9fae5adSBarry Smith v = a->a; 670f9fae5adSBarry Smith ii = a->i; 671f9fae5adSBarry Smith 672f9fae5adSBarry Smith for (i=0; i<m; i++) { 673f9fae5adSBarry Smith jrow = ii[i]; 674f9fae5adSBarry Smith n = ii[i+1] - jrow; 675f9fae5adSBarry Smith sum1 = 0.0; 676f9fae5adSBarry Smith sum2 = 0.0; 677f9fae5adSBarry Smith sum3 = 0.0; 678f9fae5adSBarry Smith sum4 = 0.0; 679f9fae5adSBarry Smith sum5 = 0.0; 68026fbe8dcSKarl Rupp 681b3c51c66SMatthew Knepley nonzerorow += (n>0); 682f9fae5adSBarry Smith for (j=0; j<n; j++) { 683f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 684f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 685f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 686f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 687f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 688f9fae5adSBarry Smith jrow++; 689f9fae5adSBarry Smith } 690f9fae5adSBarry Smith y[5*i] = sum1; 691f9fae5adSBarry Smith y[5*i+1] = sum2; 692f9fae5adSBarry Smith y[5*i+2] = sum3; 693f9fae5adSBarry Smith y[5*i+3] = sum4; 694f9fae5adSBarry Smith y[5*i+4] = sum5; 695f9fae5adSBarry Smith } 696f9fae5adSBarry Smith 697dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 6983649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6991ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 700f9fae5adSBarry Smith PetscFunctionReturn(0); 701f9fae5adSBarry Smith } 702f9fae5adSBarry Smith 703dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 704f9fae5adSBarry Smith { 7054cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 706f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 707d2840e09SBarry Smith const PetscScalar *x,*v; 708d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 709dfbe8321SBarry Smith PetscErrorCode ierr; 710d2840e09SBarry Smith PetscInt n,i; 711d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 712f9fae5adSBarry Smith 713f9fae5adSBarry Smith PetscFunctionBegin; 714d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 7153649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7161ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 717bfec09a0SHong Zhang 718f9fae5adSBarry Smith for (i=0; i<m; i++) { 719bfec09a0SHong Zhang idx = a->j + a->i[i]; 720bfec09a0SHong Zhang v = a->a + a->i[i]; 721f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 722f9fae5adSBarry Smith alpha1 = x[5*i]; 723f9fae5adSBarry Smith alpha2 = x[5*i+1]; 724f9fae5adSBarry Smith alpha3 = x[5*i+2]; 725f9fae5adSBarry Smith alpha4 = x[5*i+3]; 726f9fae5adSBarry Smith alpha5 = x[5*i+4]; 727f9fae5adSBarry Smith while (n-->0) { 728f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 729f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 730f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 731f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 732f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 733f9fae5adSBarry Smith idx++; v++; 734f9fae5adSBarry Smith } 735f9fae5adSBarry Smith } 736dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7373649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7381ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 739f9fae5adSBarry Smith PetscFunctionReturn(0); 740f9fae5adSBarry Smith } 741f9fae5adSBarry Smith 742dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 743f9fae5adSBarry Smith { 7444cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 745f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 746d2840e09SBarry Smith const PetscScalar *x,*v; 747d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 748dfbe8321SBarry Smith PetscErrorCode ierr; 749b24ad042SBarry Smith PetscInt n,i,jrow,j; 750d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 751f9fae5adSBarry Smith 752f9fae5adSBarry Smith PetscFunctionBegin; 753f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7543649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7551ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 756f9fae5adSBarry Smith idx = a->j; 757f9fae5adSBarry Smith v = a->a; 758f9fae5adSBarry Smith ii = a->i; 759f9fae5adSBarry Smith 760f9fae5adSBarry Smith for (i=0; i<m; i++) { 761f9fae5adSBarry Smith jrow = ii[i]; 762f9fae5adSBarry Smith n = ii[i+1] - jrow; 763f9fae5adSBarry Smith sum1 = 0.0; 764f9fae5adSBarry Smith sum2 = 0.0; 765f9fae5adSBarry Smith sum3 = 0.0; 766f9fae5adSBarry Smith sum4 = 0.0; 767f9fae5adSBarry Smith sum5 = 0.0; 768f9fae5adSBarry Smith for (j=0; j<n; j++) { 769f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 770f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 771f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 772f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 773f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 774f9fae5adSBarry Smith jrow++; 775f9fae5adSBarry Smith } 776f9fae5adSBarry Smith y[5*i] += sum1; 777f9fae5adSBarry Smith y[5*i+1] += sum2; 778f9fae5adSBarry Smith y[5*i+2] += sum3; 779f9fae5adSBarry Smith y[5*i+3] += sum4; 780f9fae5adSBarry Smith y[5*i+4] += sum5; 781f9fae5adSBarry Smith } 782f9fae5adSBarry Smith 783dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7843649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7851ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 786f9fae5adSBarry Smith PetscFunctionReturn(0); 787f9fae5adSBarry Smith } 788f9fae5adSBarry Smith 789dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 790f9fae5adSBarry Smith { 7914cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 792f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 793d2840e09SBarry Smith const PetscScalar *x,*v; 794d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 795dfbe8321SBarry Smith PetscErrorCode ierr; 796d2840e09SBarry Smith PetscInt n,i; 797d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 798f9fae5adSBarry Smith 799f9fae5adSBarry Smith PetscFunctionBegin; 800f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8013649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8021ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 803bfec09a0SHong Zhang 804f9fae5adSBarry Smith for (i=0; i<m; i++) { 805bfec09a0SHong Zhang idx = a->j + a->i[i]; 806bfec09a0SHong Zhang v = a->a + a->i[i]; 807f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 808f9fae5adSBarry Smith alpha1 = x[5*i]; 809f9fae5adSBarry Smith alpha2 = x[5*i+1]; 810f9fae5adSBarry Smith alpha3 = x[5*i+2]; 811f9fae5adSBarry Smith alpha4 = x[5*i+3]; 812f9fae5adSBarry Smith alpha5 = x[5*i+4]; 813f9fae5adSBarry Smith while (n-->0) { 814f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 815f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 816f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 817f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 818f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 819f9fae5adSBarry Smith idx++; v++; 820f9fae5adSBarry Smith } 821f9fae5adSBarry Smith } 822dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8233649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8241ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 825f9fae5adSBarry Smith PetscFunctionReturn(0); 826bcc973b7SBarry Smith } 827bcc973b7SBarry Smith 8286cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 829dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8306cd98798SBarry Smith { 8316cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8326cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 833d2840e09SBarry Smith const PetscScalar *x,*v; 834d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 835dfbe8321SBarry Smith PetscErrorCode ierr; 836d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 837d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 8386cd98798SBarry Smith 8396cd98798SBarry Smith PetscFunctionBegin; 8403649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8411ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8426cd98798SBarry Smith idx = a->j; 8436cd98798SBarry Smith v = a->a; 8446cd98798SBarry Smith ii = a->i; 8456cd98798SBarry Smith 8466cd98798SBarry Smith for (i=0; i<m; i++) { 8476cd98798SBarry Smith jrow = ii[i]; 8486cd98798SBarry Smith n = ii[i+1] - jrow; 8496cd98798SBarry Smith sum1 = 0.0; 8506cd98798SBarry Smith sum2 = 0.0; 8516cd98798SBarry Smith sum3 = 0.0; 8526cd98798SBarry Smith sum4 = 0.0; 8536cd98798SBarry Smith sum5 = 0.0; 8546cd98798SBarry Smith sum6 = 0.0; 85526fbe8dcSKarl Rupp 856b3c51c66SMatthew Knepley nonzerorow += (n>0); 8576cd98798SBarry Smith for (j=0; j<n; j++) { 8586cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8596cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8606cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8616cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8626cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8636cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8646cd98798SBarry Smith jrow++; 8656cd98798SBarry Smith } 8666cd98798SBarry Smith y[6*i] = sum1; 8676cd98798SBarry Smith y[6*i+1] = sum2; 8686cd98798SBarry Smith y[6*i+2] = sum3; 8696cd98798SBarry Smith y[6*i+3] = sum4; 8706cd98798SBarry Smith y[6*i+4] = sum5; 8716cd98798SBarry Smith y[6*i+5] = sum6; 8726cd98798SBarry Smith } 8736cd98798SBarry Smith 874dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 8753649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8761ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8776cd98798SBarry Smith PetscFunctionReturn(0); 8786cd98798SBarry Smith } 8796cd98798SBarry Smith 880dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8816cd98798SBarry Smith { 8826cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8836cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 884d2840e09SBarry Smith const PetscScalar *x,*v; 885d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 886dfbe8321SBarry Smith PetscErrorCode ierr; 887d2840e09SBarry Smith PetscInt n,i; 888d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 8896cd98798SBarry Smith 8906cd98798SBarry Smith PetscFunctionBegin; 891d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 8923649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8931ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 894bfec09a0SHong Zhang 8956cd98798SBarry Smith for (i=0; i<m; i++) { 896bfec09a0SHong Zhang idx = a->j + a->i[i]; 897bfec09a0SHong Zhang v = a->a + a->i[i]; 8986cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8996cd98798SBarry Smith alpha1 = x[6*i]; 9006cd98798SBarry Smith alpha2 = x[6*i+1]; 9016cd98798SBarry Smith alpha3 = x[6*i+2]; 9026cd98798SBarry Smith alpha4 = x[6*i+3]; 9036cd98798SBarry Smith alpha5 = x[6*i+4]; 9046cd98798SBarry Smith alpha6 = x[6*i+5]; 9056cd98798SBarry Smith while (n-->0) { 9066cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9076cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9086cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9096cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9106cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9116cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9126cd98798SBarry Smith idx++; v++; 9136cd98798SBarry Smith } 9146cd98798SBarry Smith } 915dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9163649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9171ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9186cd98798SBarry Smith PetscFunctionReturn(0); 9196cd98798SBarry Smith } 9206cd98798SBarry Smith 921dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9226cd98798SBarry Smith { 9236cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9246cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 925d2840e09SBarry Smith const PetscScalar *x,*v; 926d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 927dfbe8321SBarry Smith PetscErrorCode ierr; 928b24ad042SBarry Smith PetscInt n,i,jrow,j; 929d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 9306cd98798SBarry Smith 9316cd98798SBarry Smith PetscFunctionBegin; 9326cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9333649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9341ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9356cd98798SBarry Smith idx = a->j; 9366cd98798SBarry Smith v = a->a; 9376cd98798SBarry Smith ii = a->i; 9386cd98798SBarry Smith 9396cd98798SBarry Smith for (i=0; i<m; i++) { 9406cd98798SBarry Smith jrow = ii[i]; 9416cd98798SBarry Smith n = ii[i+1] - jrow; 9426cd98798SBarry Smith sum1 = 0.0; 9436cd98798SBarry Smith sum2 = 0.0; 9446cd98798SBarry Smith sum3 = 0.0; 9456cd98798SBarry Smith sum4 = 0.0; 9466cd98798SBarry Smith sum5 = 0.0; 9476cd98798SBarry Smith sum6 = 0.0; 9486cd98798SBarry Smith for (j=0; j<n; j++) { 9496cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9506cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9516cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9526cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9536cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9546cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9556cd98798SBarry Smith jrow++; 9566cd98798SBarry Smith } 9576cd98798SBarry Smith y[6*i] += sum1; 9586cd98798SBarry Smith y[6*i+1] += sum2; 9596cd98798SBarry Smith y[6*i+2] += sum3; 9606cd98798SBarry Smith y[6*i+3] += sum4; 9616cd98798SBarry Smith y[6*i+4] += sum5; 9626cd98798SBarry Smith y[6*i+5] += sum6; 9636cd98798SBarry Smith } 9646cd98798SBarry Smith 965dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9663649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9671ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9686cd98798SBarry Smith PetscFunctionReturn(0); 9696cd98798SBarry Smith } 9706cd98798SBarry Smith 971dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9726cd98798SBarry Smith { 9736cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9746cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 975d2840e09SBarry Smith const PetscScalar *x,*v; 976d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 977dfbe8321SBarry Smith PetscErrorCode ierr; 978d2840e09SBarry Smith PetscInt n,i; 979d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 9806cd98798SBarry Smith 9816cd98798SBarry Smith PetscFunctionBegin; 9826cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9833649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9841ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 985bfec09a0SHong Zhang 9866cd98798SBarry Smith for (i=0; i<m; i++) { 987bfec09a0SHong Zhang idx = a->j + a->i[i]; 988bfec09a0SHong Zhang v = a->a + a->i[i]; 9896cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9906cd98798SBarry Smith alpha1 = x[6*i]; 9916cd98798SBarry Smith alpha2 = x[6*i+1]; 9926cd98798SBarry Smith alpha3 = x[6*i+2]; 9936cd98798SBarry Smith alpha4 = x[6*i+3]; 9946cd98798SBarry Smith alpha5 = x[6*i+4]; 9956cd98798SBarry Smith alpha6 = x[6*i+5]; 9966cd98798SBarry Smith while (n-->0) { 9976cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9986cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9996cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 10006cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 10016cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 10026cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 10036cd98798SBarry Smith idx++; v++; 10046cd98798SBarry Smith } 10056cd98798SBarry Smith } 1006dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 10073649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 10081ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10096cd98798SBarry Smith PetscFunctionReturn(0); 10106cd98798SBarry Smith } 10116cd98798SBarry Smith 101266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 1013ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1014ed8eea03SBarry Smith { 1015ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1016ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1017d2840e09SBarry Smith const PetscScalar *x,*v; 1018d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1019ed8eea03SBarry Smith PetscErrorCode ierr; 1020d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1021d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1022ed8eea03SBarry Smith 1023ed8eea03SBarry Smith PetscFunctionBegin; 10243649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1025ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1026ed8eea03SBarry Smith idx = a->j; 1027ed8eea03SBarry Smith v = a->a; 1028ed8eea03SBarry Smith ii = a->i; 1029ed8eea03SBarry Smith 1030ed8eea03SBarry Smith for (i=0; i<m; i++) { 1031ed8eea03SBarry Smith jrow = ii[i]; 1032ed8eea03SBarry Smith n = ii[i+1] - jrow; 1033ed8eea03SBarry Smith sum1 = 0.0; 1034ed8eea03SBarry Smith sum2 = 0.0; 1035ed8eea03SBarry Smith sum3 = 0.0; 1036ed8eea03SBarry Smith sum4 = 0.0; 1037ed8eea03SBarry Smith sum5 = 0.0; 1038ed8eea03SBarry Smith sum6 = 0.0; 1039ed8eea03SBarry Smith sum7 = 0.0; 104026fbe8dcSKarl Rupp 1041b3c51c66SMatthew Knepley nonzerorow += (n>0); 1042ed8eea03SBarry Smith for (j=0; j<n; j++) { 1043ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1044ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1045ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1046ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1047ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1048ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1049ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1050ed8eea03SBarry Smith jrow++; 1051ed8eea03SBarry Smith } 1052ed8eea03SBarry Smith y[7*i] = sum1; 1053ed8eea03SBarry Smith y[7*i+1] = sum2; 1054ed8eea03SBarry Smith y[7*i+2] = sum3; 1055ed8eea03SBarry Smith y[7*i+3] = sum4; 1056ed8eea03SBarry Smith y[7*i+4] = sum5; 1057ed8eea03SBarry Smith y[7*i+5] = sum6; 1058ed8eea03SBarry Smith y[7*i+6] = sum7; 1059ed8eea03SBarry Smith } 1060ed8eea03SBarry Smith 1061dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 10623649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1063ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1064ed8eea03SBarry Smith PetscFunctionReturn(0); 1065ed8eea03SBarry Smith } 1066ed8eea03SBarry Smith 1067ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1068ed8eea03SBarry Smith { 1069ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1070ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1071d2840e09SBarry Smith const PetscScalar *x,*v; 1072d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1073ed8eea03SBarry Smith PetscErrorCode ierr; 1074d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1075d2840e09SBarry Smith PetscInt n,i; 1076ed8eea03SBarry Smith 1077ed8eea03SBarry Smith PetscFunctionBegin; 1078d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 10793649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1080ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1081ed8eea03SBarry Smith 1082ed8eea03SBarry Smith for (i=0; i<m; i++) { 1083ed8eea03SBarry Smith idx = a->j + a->i[i]; 1084ed8eea03SBarry Smith v = a->a + a->i[i]; 1085ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1086ed8eea03SBarry Smith alpha1 = x[7*i]; 1087ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1088ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1089ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1090ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1091ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1092ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1093ed8eea03SBarry Smith while (n-->0) { 1094ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1095ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1096ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1097ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1098ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1099ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1100ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1101ed8eea03SBarry Smith idx++; v++; 1102ed8eea03SBarry Smith } 1103ed8eea03SBarry Smith } 1104dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1106ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1107ed8eea03SBarry Smith PetscFunctionReturn(0); 1108ed8eea03SBarry Smith } 1109ed8eea03SBarry Smith 1110ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1111ed8eea03SBarry Smith { 1112ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1113ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1114d2840e09SBarry Smith const PetscScalar *x,*v; 1115d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1116ed8eea03SBarry Smith PetscErrorCode ierr; 1117d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1118ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1119ed8eea03SBarry Smith 1120ed8eea03SBarry Smith PetscFunctionBegin; 1121ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11223649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1123ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1124ed8eea03SBarry Smith idx = a->j; 1125ed8eea03SBarry Smith v = a->a; 1126ed8eea03SBarry Smith ii = a->i; 1127ed8eea03SBarry Smith 1128ed8eea03SBarry Smith for (i=0; i<m; i++) { 1129ed8eea03SBarry Smith jrow = ii[i]; 1130ed8eea03SBarry Smith n = ii[i+1] - jrow; 1131ed8eea03SBarry Smith sum1 = 0.0; 1132ed8eea03SBarry Smith sum2 = 0.0; 1133ed8eea03SBarry Smith sum3 = 0.0; 1134ed8eea03SBarry Smith sum4 = 0.0; 1135ed8eea03SBarry Smith sum5 = 0.0; 1136ed8eea03SBarry Smith sum6 = 0.0; 1137ed8eea03SBarry Smith sum7 = 0.0; 1138ed8eea03SBarry Smith for (j=0; j<n; j++) { 1139ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1140ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1141ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1142ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1143ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1144ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1145ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1146ed8eea03SBarry Smith jrow++; 1147ed8eea03SBarry Smith } 1148ed8eea03SBarry Smith y[7*i] += sum1; 1149ed8eea03SBarry Smith y[7*i+1] += sum2; 1150ed8eea03SBarry Smith y[7*i+2] += sum3; 1151ed8eea03SBarry Smith y[7*i+3] += sum4; 1152ed8eea03SBarry Smith y[7*i+4] += sum5; 1153ed8eea03SBarry Smith y[7*i+5] += sum6; 1154ed8eea03SBarry Smith y[7*i+6] += sum7; 1155ed8eea03SBarry Smith } 1156ed8eea03SBarry Smith 1157dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11583649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1159ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1160ed8eea03SBarry Smith PetscFunctionReturn(0); 1161ed8eea03SBarry Smith } 1162ed8eea03SBarry Smith 1163ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1164ed8eea03SBarry Smith { 1165ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1166ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1167d2840e09SBarry Smith const PetscScalar *x,*v; 1168d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1169ed8eea03SBarry Smith PetscErrorCode ierr; 1170d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1171d2840e09SBarry Smith PetscInt n,i; 1172ed8eea03SBarry Smith 1173ed8eea03SBarry Smith PetscFunctionBegin; 1174ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11753649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1176ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1177ed8eea03SBarry Smith for (i=0; i<m; i++) { 1178ed8eea03SBarry Smith idx = a->j + a->i[i]; 1179ed8eea03SBarry Smith v = a->a + a->i[i]; 1180ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1181ed8eea03SBarry Smith alpha1 = x[7*i]; 1182ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1183ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1184ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1185ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1186ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1187ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1188ed8eea03SBarry Smith while (n-->0) { 1189ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1190ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1191ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1192ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1193ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1194ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1195ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1196ed8eea03SBarry Smith idx++; v++; 1197ed8eea03SBarry Smith } 1198ed8eea03SBarry Smith } 1199dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 12003649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1201ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1202ed8eea03SBarry Smith PetscFunctionReturn(0); 1203ed8eea03SBarry Smith } 1204ed8eea03SBarry Smith 1205dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 120666ed3db0SBarry Smith { 120766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 120866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1209d2840e09SBarry Smith const PetscScalar *x,*v; 1210d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1211dfbe8321SBarry Smith PetscErrorCode ierr; 1212d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1213d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 121466ed3db0SBarry Smith 121566ed3db0SBarry Smith PetscFunctionBegin; 12163649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12171ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 121866ed3db0SBarry Smith idx = a->j; 121966ed3db0SBarry Smith v = a->a; 122066ed3db0SBarry Smith ii = a->i; 122166ed3db0SBarry Smith 122266ed3db0SBarry Smith for (i=0; i<m; i++) { 122366ed3db0SBarry Smith jrow = ii[i]; 122466ed3db0SBarry Smith n = ii[i+1] - jrow; 122566ed3db0SBarry Smith sum1 = 0.0; 122666ed3db0SBarry Smith sum2 = 0.0; 122766ed3db0SBarry Smith sum3 = 0.0; 122866ed3db0SBarry Smith sum4 = 0.0; 122966ed3db0SBarry Smith sum5 = 0.0; 123066ed3db0SBarry Smith sum6 = 0.0; 123166ed3db0SBarry Smith sum7 = 0.0; 123266ed3db0SBarry Smith sum8 = 0.0; 123326fbe8dcSKarl Rupp 1234b3c51c66SMatthew Knepley nonzerorow += (n>0); 123566ed3db0SBarry Smith for (j=0; j<n; j++) { 123666ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 123766ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 123866ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 123966ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 124066ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 124166ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 124266ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 124366ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 124466ed3db0SBarry Smith jrow++; 124566ed3db0SBarry Smith } 124666ed3db0SBarry Smith y[8*i] = sum1; 124766ed3db0SBarry Smith y[8*i+1] = sum2; 124866ed3db0SBarry Smith y[8*i+2] = sum3; 124966ed3db0SBarry Smith y[8*i+3] = sum4; 125066ed3db0SBarry Smith y[8*i+4] = sum5; 125166ed3db0SBarry Smith y[8*i+5] = sum6; 125266ed3db0SBarry Smith y[8*i+6] = sum7; 125366ed3db0SBarry Smith y[8*i+7] = sum8; 125466ed3db0SBarry Smith } 125566ed3db0SBarry Smith 1256dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 12573649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 12581ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 125966ed3db0SBarry Smith PetscFunctionReturn(0); 126066ed3db0SBarry Smith } 126166ed3db0SBarry Smith 1262dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 126366ed3db0SBarry Smith { 126466ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 126566ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1266d2840e09SBarry Smith const PetscScalar *x,*v; 1267d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1268dfbe8321SBarry Smith PetscErrorCode ierr; 1269d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1270d2840e09SBarry Smith PetscInt n,i; 127166ed3db0SBarry Smith 127266ed3db0SBarry Smith PetscFunctionBegin; 1273d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 12743649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12751ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1276bfec09a0SHong Zhang 127766ed3db0SBarry Smith for (i=0; i<m; i++) { 1278bfec09a0SHong Zhang idx = a->j + a->i[i]; 1279bfec09a0SHong Zhang v = a->a + a->i[i]; 128066ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 128166ed3db0SBarry Smith alpha1 = x[8*i]; 128266ed3db0SBarry Smith alpha2 = x[8*i+1]; 128366ed3db0SBarry Smith alpha3 = x[8*i+2]; 128466ed3db0SBarry Smith alpha4 = x[8*i+3]; 128566ed3db0SBarry Smith alpha5 = x[8*i+4]; 128666ed3db0SBarry Smith alpha6 = x[8*i+5]; 128766ed3db0SBarry Smith alpha7 = x[8*i+6]; 128866ed3db0SBarry Smith alpha8 = x[8*i+7]; 128966ed3db0SBarry Smith while (n-->0) { 129066ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 129166ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 129266ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 129366ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 129466ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 129566ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 129666ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 129766ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 129866ed3db0SBarry Smith idx++; v++; 129966ed3db0SBarry Smith } 130066ed3db0SBarry Smith } 1301dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13023649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13031ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 130466ed3db0SBarry Smith PetscFunctionReturn(0); 130566ed3db0SBarry Smith } 130666ed3db0SBarry Smith 1307dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 130866ed3db0SBarry Smith { 130966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 131066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1311d2840e09SBarry Smith const PetscScalar *x,*v; 1312d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1313dfbe8321SBarry Smith PetscErrorCode ierr; 1314d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1315b24ad042SBarry Smith PetscInt n,i,jrow,j; 131666ed3db0SBarry Smith 131766ed3db0SBarry Smith PetscFunctionBegin; 131866ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13193649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13201ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 132166ed3db0SBarry Smith idx = a->j; 132266ed3db0SBarry Smith v = a->a; 132366ed3db0SBarry Smith ii = a->i; 132466ed3db0SBarry Smith 132566ed3db0SBarry Smith for (i=0; i<m; i++) { 132666ed3db0SBarry Smith jrow = ii[i]; 132766ed3db0SBarry Smith n = ii[i+1] - jrow; 132866ed3db0SBarry Smith sum1 = 0.0; 132966ed3db0SBarry Smith sum2 = 0.0; 133066ed3db0SBarry Smith sum3 = 0.0; 133166ed3db0SBarry Smith sum4 = 0.0; 133266ed3db0SBarry Smith sum5 = 0.0; 133366ed3db0SBarry Smith sum6 = 0.0; 133466ed3db0SBarry Smith sum7 = 0.0; 133566ed3db0SBarry Smith sum8 = 0.0; 133666ed3db0SBarry Smith for (j=0; j<n; j++) { 133766ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 133866ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 133966ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 134066ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 134166ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 134266ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 134366ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 134466ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 134566ed3db0SBarry Smith jrow++; 134666ed3db0SBarry Smith } 134766ed3db0SBarry Smith y[8*i] += sum1; 134866ed3db0SBarry Smith y[8*i+1] += sum2; 134966ed3db0SBarry Smith y[8*i+2] += sum3; 135066ed3db0SBarry Smith y[8*i+3] += sum4; 135166ed3db0SBarry Smith y[8*i+4] += sum5; 135266ed3db0SBarry Smith y[8*i+5] += sum6; 135366ed3db0SBarry Smith y[8*i+6] += sum7; 135466ed3db0SBarry Smith y[8*i+7] += sum8; 135566ed3db0SBarry Smith } 135666ed3db0SBarry Smith 1357dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13583649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13591ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 136066ed3db0SBarry Smith PetscFunctionReturn(0); 136166ed3db0SBarry Smith } 136266ed3db0SBarry Smith 1363dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 136466ed3db0SBarry Smith { 136566ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 136666ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1367d2840e09SBarry Smith const PetscScalar *x,*v; 1368d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1369dfbe8321SBarry Smith PetscErrorCode ierr; 1370d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1371d2840e09SBarry Smith PetscInt n,i; 137266ed3db0SBarry Smith 137366ed3db0SBarry Smith PetscFunctionBegin; 137466ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13753649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13761ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 137766ed3db0SBarry Smith for (i=0; i<m; i++) { 1378bfec09a0SHong Zhang idx = a->j + a->i[i]; 1379bfec09a0SHong Zhang v = a->a + a->i[i]; 138066ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 138166ed3db0SBarry Smith alpha1 = x[8*i]; 138266ed3db0SBarry Smith alpha2 = x[8*i+1]; 138366ed3db0SBarry Smith alpha3 = x[8*i+2]; 138466ed3db0SBarry Smith alpha4 = x[8*i+3]; 138566ed3db0SBarry Smith alpha5 = x[8*i+4]; 138666ed3db0SBarry Smith alpha6 = x[8*i+5]; 138766ed3db0SBarry Smith alpha7 = x[8*i+6]; 138866ed3db0SBarry Smith alpha8 = x[8*i+7]; 138966ed3db0SBarry Smith while (n-->0) { 139066ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 139166ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 139266ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 139366ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 139466ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 139566ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 139666ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 139766ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 139866ed3db0SBarry Smith idx++; v++; 139966ed3db0SBarry Smith } 140066ed3db0SBarry Smith } 1401dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 14023649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14031ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 14042f7816d4SBarry Smith PetscFunctionReturn(0); 14052f7816d4SBarry Smith } 14062f7816d4SBarry Smith 14072b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 1408dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14092b692628SMatthew Knepley { 14102b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14112b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1412d2840e09SBarry Smith const PetscScalar *x,*v; 1413d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1414dfbe8321SBarry Smith PetscErrorCode ierr; 1415d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1416d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 14172b692628SMatthew Knepley 14182b692628SMatthew Knepley PetscFunctionBegin; 14193649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14201ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14212b692628SMatthew Knepley idx = a->j; 14222b692628SMatthew Knepley v = a->a; 14232b692628SMatthew Knepley ii = a->i; 14242b692628SMatthew Knepley 14252b692628SMatthew Knepley for (i=0; i<m; i++) { 14262b692628SMatthew Knepley jrow = ii[i]; 14272b692628SMatthew Knepley n = ii[i+1] - jrow; 14282b692628SMatthew Knepley sum1 = 0.0; 14292b692628SMatthew Knepley sum2 = 0.0; 14302b692628SMatthew Knepley sum3 = 0.0; 14312b692628SMatthew Knepley sum4 = 0.0; 14322b692628SMatthew Knepley sum5 = 0.0; 14332b692628SMatthew Knepley sum6 = 0.0; 14342b692628SMatthew Knepley sum7 = 0.0; 14352b692628SMatthew Knepley sum8 = 0.0; 14362b692628SMatthew Knepley sum9 = 0.0; 143726fbe8dcSKarl Rupp 1438b3c51c66SMatthew Knepley nonzerorow += (n>0); 14392b692628SMatthew Knepley for (j=0; j<n; j++) { 14402b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14412b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14422b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14432b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14442b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14452b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14462b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14472b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14482b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14492b692628SMatthew Knepley jrow++; 14502b692628SMatthew Knepley } 14512b692628SMatthew Knepley y[9*i] = sum1; 14522b692628SMatthew Knepley y[9*i+1] = sum2; 14532b692628SMatthew Knepley y[9*i+2] = sum3; 14542b692628SMatthew Knepley y[9*i+3] = sum4; 14552b692628SMatthew Knepley y[9*i+4] = sum5; 14562b692628SMatthew Knepley y[9*i+5] = sum6; 14572b692628SMatthew Knepley y[9*i+6] = sum7; 14582b692628SMatthew Knepley y[9*i+7] = sum8; 14592b692628SMatthew Knepley y[9*i+8] = sum9; 14602b692628SMatthew Knepley } 14612b692628SMatthew Knepley 1462dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 14633649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14641ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14652b692628SMatthew Knepley PetscFunctionReturn(0); 14662b692628SMatthew Knepley } 14672b692628SMatthew Knepley 1468b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1469b9cda22cSBarry Smith 1470dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14712b692628SMatthew Knepley { 14722b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14732b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1474d2840e09SBarry Smith const PetscScalar *x,*v; 1475d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1476dfbe8321SBarry Smith PetscErrorCode ierr; 1477d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1478d2840e09SBarry Smith PetscInt n,i; 14792b692628SMatthew Knepley 14802b692628SMatthew Knepley PetscFunctionBegin; 1481d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 14823649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14831ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14842b692628SMatthew Knepley 14852b692628SMatthew Knepley for (i=0; i<m; i++) { 14862b692628SMatthew Knepley idx = a->j + a->i[i]; 14872b692628SMatthew Knepley v = a->a + a->i[i]; 14882b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14892b692628SMatthew Knepley alpha1 = x[9*i]; 14902b692628SMatthew Knepley alpha2 = x[9*i+1]; 14912b692628SMatthew Knepley alpha3 = x[9*i+2]; 14922b692628SMatthew Knepley alpha4 = x[9*i+3]; 14932b692628SMatthew Knepley alpha5 = x[9*i+4]; 14942b692628SMatthew Knepley alpha6 = x[9*i+5]; 14952b692628SMatthew Knepley alpha7 = x[9*i+6]; 14962b692628SMatthew Knepley alpha8 = x[9*i+7]; 14972f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14982b692628SMatthew Knepley while (n-->0) { 14992b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15002b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15012b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15022b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15032b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15042b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15052b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15062b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15072b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15082b692628SMatthew Knepley idx++; v++; 15092b692628SMatthew Knepley } 15102b692628SMatthew Knepley } 1511dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 15123649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15131ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15142b692628SMatthew Knepley PetscFunctionReturn(0); 15152b692628SMatthew Knepley } 15162b692628SMatthew Knepley 1517dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15182b692628SMatthew Knepley { 15192b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15202b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1521d2840e09SBarry Smith const PetscScalar *x,*v; 1522d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1523dfbe8321SBarry Smith PetscErrorCode ierr; 1524d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1525b24ad042SBarry Smith PetscInt n,i,jrow,j; 15262b692628SMatthew Knepley 15272b692628SMatthew Knepley PetscFunctionBegin; 15282b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15293649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15301ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15312b692628SMatthew Knepley idx = a->j; 15322b692628SMatthew Knepley v = a->a; 15332b692628SMatthew Knepley ii = a->i; 15342b692628SMatthew Knepley 15352b692628SMatthew Knepley for (i=0; i<m; i++) { 15362b692628SMatthew Knepley jrow = ii[i]; 15372b692628SMatthew Knepley n = ii[i+1] - jrow; 15382b692628SMatthew Knepley sum1 = 0.0; 15392b692628SMatthew Knepley sum2 = 0.0; 15402b692628SMatthew Knepley sum3 = 0.0; 15412b692628SMatthew Knepley sum4 = 0.0; 15422b692628SMatthew Knepley sum5 = 0.0; 15432b692628SMatthew Knepley sum6 = 0.0; 15442b692628SMatthew Knepley sum7 = 0.0; 15452b692628SMatthew Knepley sum8 = 0.0; 15462b692628SMatthew Knepley sum9 = 0.0; 15472b692628SMatthew Knepley for (j=0; j<n; j++) { 15482b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15492b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15502b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15512b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15522b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15532b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15542b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15552b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15562b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15572b692628SMatthew Knepley jrow++; 15582b692628SMatthew Knepley } 15592b692628SMatthew Knepley y[9*i] += sum1; 15602b692628SMatthew Knepley y[9*i+1] += sum2; 15612b692628SMatthew Knepley y[9*i+2] += sum3; 15622b692628SMatthew Knepley y[9*i+3] += sum4; 15632b692628SMatthew Knepley y[9*i+4] += sum5; 15642b692628SMatthew Knepley y[9*i+5] += sum6; 15652b692628SMatthew Knepley y[9*i+6] += sum7; 15662b692628SMatthew Knepley y[9*i+7] += sum8; 15672b692628SMatthew Knepley y[9*i+8] += sum9; 15682b692628SMatthew Knepley } 15692b692628SMatthew Knepley 1570ca0c957dSBarry Smith ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 15713649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15721ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15732b692628SMatthew Knepley PetscFunctionReturn(0); 15742b692628SMatthew Knepley } 15752b692628SMatthew Knepley 1576dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15772b692628SMatthew Knepley { 15782b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15792b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1580d2840e09SBarry Smith const PetscScalar *x,*v; 1581d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1582dfbe8321SBarry Smith PetscErrorCode ierr; 1583d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1584d2840e09SBarry Smith PetscInt n,i; 15852b692628SMatthew Knepley 15862b692628SMatthew Knepley PetscFunctionBegin; 15872b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15883649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15891ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15902b692628SMatthew Knepley for (i=0; i<m; i++) { 15912b692628SMatthew Knepley idx = a->j + a->i[i]; 15922b692628SMatthew Knepley v = a->a + a->i[i]; 15932b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15942b692628SMatthew Knepley alpha1 = x[9*i]; 15952b692628SMatthew Knepley alpha2 = x[9*i+1]; 15962b692628SMatthew Knepley alpha3 = x[9*i+2]; 15972b692628SMatthew Knepley alpha4 = x[9*i+3]; 15982b692628SMatthew Knepley alpha5 = x[9*i+4]; 15992b692628SMatthew Knepley alpha6 = x[9*i+5]; 16002b692628SMatthew Knepley alpha7 = x[9*i+6]; 16012b692628SMatthew Knepley alpha8 = x[9*i+7]; 16022b692628SMatthew Knepley alpha9 = x[9*i+8]; 16032b692628SMatthew Knepley while (n-->0) { 16042b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 16052b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 16062b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 16072b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 16082b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 16092b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 16102b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 16112b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 16122b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 16132b692628SMatthew Knepley idx++; v++; 16142b692628SMatthew Knepley } 16152b692628SMatthew Knepley } 1616dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 16173649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16181ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16192b692628SMatthew Knepley PetscFunctionReturn(0); 16202b692628SMatthew Knepley } 1621b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1622b9cda22cSBarry Smith { 1623b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1624b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1625d2840e09SBarry Smith const PetscScalar *x,*v; 1626d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1627b9cda22cSBarry Smith PetscErrorCode ierr; 1628d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1629d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1630b9cda22cSBarry Smith 1631b9cda22cSBarry Smith PetscFunctionBegin; 16323649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1633b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1634b9cda22cSBarry Smith idx = a->j; 1635b9cda22cSBarry Smith v = a->a; 1636b9cda22cSBarry Smith ii = a->i; 1637b9cda22cSBarry Smith 1638b9cda22cSBarry Smith for (i=0; i<m; i++) { 1639b9cda22cSBarry Smith jrow = ii[i]; 1640b9cda22cSBarry Smith n = ii[i+1] - jrow; 1641b9cda22cSBarry Smith sum1 = 0.0; 1642b9cda22cSBarry Smith sum2 = 0.0; 1643b9cda22cSBarry Smith sum3 = 0.0; 1644b9cda22cSBarry Smith sum4 = 0.0; 1645b9cda22cSBarry Smith sum5 = 0.0; 1646b9cda22cSBarry Smith sum6 = 0.0; 1647b9cda22cSBarry Smith sum7 = 0.0; 1648b9cda22cSBarry Smith sum8 = 0.0; 1649b9cda22cSBarry Smith sum9 = 0.0; 1650b9cda22cSBarry Smith sum10 = 0.0; 165126fbe8dcSKarl Rupp 1652b3c51c66SMatthew Knepley nonzerorow += (n>0); 1653b9cda22cSBarry Smith for (j=0; j<n; j++) { 1654b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1655b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1656b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1657b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1658b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1659b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1660b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1661b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1662b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1663b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1664b9cda22cSBarry Smith jrow++; 1665b9cda22cSBarry Smith } 1666b9cda22cSBarry Smith y[10*i] = sum1; 1667b9cda22cSBarry Smith y[10*i+1] = sum2; 1668b9cda22cSBarry Smith y[10*i+2] = sum3; 1669b9cda22cSBarry Smith y[10*i+3] = sum4; 1670b9cda22cSBarry Smith y[10*i+4] = sum5; 1671b9cda22cSBarry Smith y[10*i+5] = sum6; 1672b9cda22cSBarry Smith y[10*i+6] = sum7; 1673b9cda22cSBarry Smith y[10*i+7] = sum8; 1674b9cda22cSBarry Smith y[10*i+8] = sum9; 1675b9cda22cSBarry Smith y[10*i+9] = sum10; 1676b9cda22cSBarry Smith } 1677b9cda22cSBarry Smith 1678dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 16793649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1680b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1681b9cda22cSBarry Smith PetscFunctionReturn(0); 1682b9cda22cSBarry Smith } 1683b9cda22cSBarry Smith 1684b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1685b9cda22cSBarry Smith { 1686b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1687b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1688d2840e09SBarry Smith const PetscScalar *x,*v; 1689d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1690b9cda22cSBarry Smith PetscErrorCode ierr; 1691d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1692b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1693b9cda22cSBarry Smith 1694b9cda22cSBarry Smith PetscFunctionBegin; 1695b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16963649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1697b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1698b9cda22cSBarry Smith idx = a->j; 1699b9cda22cSBarry Smith v = a->a; 1700b9cda22cSBarry Smith ii = a->i; 1701b9cda22cSBarry Smith 1702b9cda22cSBarry Smith for (i=0; i<m; i++) { 1703b9cda22cSBarry Smith jrow = ii[i]; 1704b9cda22cSBarry Smith n = ii[i+1] - jrow; 1705b9cda22cSBarry Smith sum1 = 0.0; 1706b9cda22cSBarry Smith sum2 = 0.0; 1707b9cda22cSBarry Smith sum3 = 0.0; 1708b9cda22cSBarry Smith sum4 = 0.0; 1709b9cda22cSBarry Smith sum5 = 0.0; 1710b9cda22cSBarry Smith sum6 = 0.0; 1711b9cda22cSBarry Smith sum7 = 0.0; 1712b9cda22cSBarry Smith sum8 = 0.0; 1713b9cda22cSBarry Smith sum9 = 0.0; 1714b9cda22cSBarry Smith sum10 = 0.0; 1715b9cda22cSBarry Smith for (j=0; j<n; j++) { 1716b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1717b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1718b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1719b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1720b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1721b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1722b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1723b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1724b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1725b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1726b9cda22cSBarry Smith jrow++; 1727b9cda22cSBarry Smith } 1728b9cda22cSBarry Smith y[10*i] += sum1; 1729b9cda22cSBarry Smith y[10*i+1] += sum2; 1730b9cda22cSBarry Smith y[10*i+2] += sum3; 1731b9cda22cSBarry Smith y[10*i+3] += sum4; 1732b9cda22cSBarry Smith y[10*i+4] += sum5; 1733b9cda22cSBarry Smith y[10*i+5] += sum6; 1734b9cda22cSBarry Smith y[10*i+6] += sum7; 1735b9cda22cSBarry Smith y[10*i+7] += sum8; 1736b9cda22cSBarry Smith y[10*i+8] += sum9; 1737b9cda22cSBarry Smith y[10*i+9] += sum10; 1738b9cda22cSBarry Smith } 1739b9cda22cSBarry Smith 1740dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 17413649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1742b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1743b9cda22cSBarry Smith PetscFunctionReturn(0); 1744b9cda22cSBarry Smith } 1745b9cda22cSBarry Smith 1746b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1747b9cda22cSBarry Smith { 1748b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1749b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1750d2840e09SBarry Smith const PetscScalar *x,*v; 1751d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1752b9cda22cSBarry Smith PetscErrorCode ierr; 1753d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1754d2840e09SBarry Smith PetscInt n,i; 1755b9cda22cSBarry Smith 1756b9cda22cSBarry Smith PetscFunctionBegin; 1757d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 17583649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1759b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1760b9cda22cSBarry Smith 1761b9cda22cSBarry Smith for (i=0; i<m; i++) { 1762b9cda22cSBarry Smith idx = a->j + a->i[i]; 1763b9cda22cSBarry Smith v = a->a + a->i[i]; 1764b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1765e29fdc22SBarry Smith alpha1 = x[10*i]; 1766e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1767e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1768e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1769e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1770e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1771e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1772e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1773e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1774b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1775b9cda22cSBarry Smith while (n-->0) { 1776e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1777e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1778e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1779e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1780e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1781e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1782e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1783e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1784e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1785b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1786b9cda22cSBarry Smith idx++; v++; 1787b9cda22cSBarry Smith } 1788b9cda22cSBarry Smith } 1789dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 17903649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1791b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1792b9cda22cSBarry Smith PetscFunctionReturn(0); 1793b9cda22cSBarry Smith } 1794b9cda22cSBarry Smith 1795b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1796b9cda22cSBarry Smith { 1797b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1798b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1799d2840e09SBarry Smith const PetscScalar *x,*v; 1800d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1801b9cda22cSBarry Smith PetscErrorCode ierr; 1802d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1803d2840e09SBarry Smith PetscInt n,i; 1804b9cda22cSBarry Smith 1805b9cda22cSBarry Smith PetscFunctionBegin; 1806b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 18073649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1808b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1809b9cda22cSBarry Smith for (i=0; i<m; i++) { 1810b9cda22cSBarry Smith idx = a->j + a->i[i]; 1811b9cda22cSBarry Smith v = a->a + a->i[i]; 1812b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1813b9cda22cSBarry Smith alpha1 = x[10*i]; 1814b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1815b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1816b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1817b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1818b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1819b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1820b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1821b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1822b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1823b9cda22cSBarry Smith while (n-->0) { 1824b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1825b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1826b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1827b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1828b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1829b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1830b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1831b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1832b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1833b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1834b9cda22cSBarry Smith idx++; v++; 1835b9cda22cSBarry Smith } 1836b9cda22cSBarry Smith } 1837dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18383649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1839b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1840b9cda22cSBarry Smith PetscFunctionReturn(0); 1841b9cda22cSBarry Smith } 1842b9cda22cSBarry Smith 18432f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 1844dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1845dbdd7285SBarry Smith { 1846dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1847dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1848d2840e09SBarry Smith const PetscScalar *x,*v; 1849d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1850dbdd7285SBarry Smith PetscErrorCode ierr; 1851d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1852d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1853dbdd7285SBarry Smith 1854dbdd7285SBarry Smith PetscFunctionBegin; 18553649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1856dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1857dbdd7285SBarry Smith idx = a->j; 1858dbdd7285SBarry Smith v = a->a; 1859dbdd7285SBarry Smith ii = a->i; 1860dbdd7285SBarry Smith 1861dbdd7285SBarry Smith for (i=0; i<m; i++) { 1862dbdd7285SBarry Smith jrow = ii[i]; 1863dbdd7285SBarry Smith n = ii[i+1] - jrow; 1864dbdd7285SBarry Smith sum1 = 0.0; 1865dbdd7285SBarry Smith sum2 = 0.0; 1866dbdd7285SBarry Smith sum3 = 0.0; 1867dbdd7285SBarry Smith sum4 = 0.0; 1868dbdd7285SBarry Smith sum5 = 0.0; 1869dbdd7285SBarry Smith sum6 = 0.0; 1870dbdd7285SBarry Smith sum7 = 0.0; 1871dbdd7285SBarry Smith sum8 = 0.0; 1872dbdd7285SBarry Smith sum9 = 0.0; 1873dbdd7285SBarry Smith sum10 = 0.0; 1874dbdd7285SBarry Smith sum11 = 0.0; 187526fbe8dcSKarl Rupp 1876dbdd7285SBarry Smith nonzerorow += (n>0); 1877dbdd7285SBarry Smith for (j=0; j<n; j++) { 1878dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1879dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1880dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1881dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1882dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1883dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1884dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1885dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1886dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1887dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1888dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1889dbdd7285SBarry Smith jrow++; 1890dbdd7285SBarry Smith } 1891dbdd7285SBarry Smith y[11*i] = sum1; 1892dbdd7285SBarry Smith y[11*i+1] = sum2; 1893dbdd7285SBarry Smith y[11*i+2] = sum3; 1894dbdd7285SBarry Smith y[11*i+3] = sum4; 1895dbdd7285SBarry Smith y[11*i+4] = sum5; 1896dbdd7285SBarry Smith y[11*i+5] = sum6; 1897dbdd7285SBarry Smith y[11*i+6] = sum7; 1898dbdd7285SBarry Smith y[11*i+7] = sum8; 1899dbdd7285SBarry Smith y[11*i+8] = sum9; 1900dbdd7285SBarry Smith y[11*i+9] = sum10; 1901dbdd7285SBarry Smith y[11*i+10] = sum11; 1902dbdd7285SBarry Smith } 1903dbdd7285SBarry Smith 1904ca0c957dSBarry Smith ierr = PetscLogFlops(22.0*a->nz - 11*nonzerorow);CHKERRQ(ierr); 19053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1906dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1907dbdd7285SBarry Smith PetscFunctionReturn(0); 1908dbdd7285SBarry Smith } 1909dbdd7285SBarry Smith 1910dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1911dbdd7285SBarry Smith { 1912dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1913dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1914d2840e09SBarry Smith const PetscScalar *x,*v; 1915d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1916dbdd7285SBarry Smith PetscErrorCode ierr; 1917d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1918dbdd7285SBarry Smith PetscInt n,i,jrow,j; 1919dbdd7285SBarry Smith 1920dbdd7285SBarry Smith PetscFunctionBegin; 1921dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19223649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1923dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1924dbdd7285SBarry Smith idx = a->j; 1925dbdd7285SBarry Smith v = a->a; 1926dbdd7285SBarry Smith ii = a->i; 1927dbdd7285SBarry Smith 1928dbdd7285SBarry Smith for (i=0; i<m; i++) { 1929dbdd7285SBarry Smith jrow = ii[i]; 1930dbdd7285SBarry Smith n = ii[i+1] - jrow; 1931dbdd7285SBarry Smith sum1 = 0.0; 1932dbdd7285SBarry Smith sum2 = 0.0; 1933dbdd7285SBarry Smith sum3 = 0.0; 1934dbdd7285SBarry Smith sum4 = 0.0; 1935dbdd7285SBarry Smith sum5 = 0.0; 1936dbdd7285SBarry Smith sum6 = 0.0; 1937dbdd7285SBarry Smith sum7 = 0.0; 1938dbdd7285SBarry Smith sum8 = 0.0; 1939dbdd7285SBarry Smith sum9 = 0.0; 1940dbdd7285SBarry Smith sum10 = 0.0; 1941dbdd7285SBarry Smith sum11 = 0.0; 1942dbdd7285SBarry Smith for (j=0; j<n; j++) { 1943dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1944dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1945dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1946dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1947dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1948dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1949dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1950dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1951dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1952dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1953dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1954dbdd7285SBarry Smith jrow++; 1955dbdd7285SBarry Smith } 1956dbdd7285SBarry Smith y[11*i] += sum1; 1957dbdd7285SBarry Smith y[11*i+1] += sum2; 1958dbdd7285SBarry Smith y[11*i+2] += sum3; 1959dbdd7285SBarry Smith y[11*i+3] += sum4; 1960dbdd7285SBarry Smith y[11*i+4] += sum5; 1961dbdd7285SBarry Smith y[11*i+5] += sum6; 1962dbdd7285SBarry Smith y[11*i+6] += sum7; 1963dbdd7285SBarry Smith y[11*i+7] += sum8; 1964dbdd7285SBarry Smith y[11*i+8] += sum9; 1965dbdd7285SBarry Smith y[11*i+9] += sum10; 1966dbdd7285SBarry Smith y[11*i+10] += sum11; 1967dbdd7285SBarry Smith } 1968dbdd7285SBarry Smith 1969ca0c957dSBarry Smith ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 19703649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1971dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1972dbdd7285SBarry Smith PetscFunctionReturn(0); 1973dbdd7285SBarry Smith } 1974dbdd7285SBarry Smith 1975dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1976dbdd7285SBarry Smith { 1977dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1978dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1979d2840e09SBarry Smith const PetscScalar *x,*v; 1980d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1981dbdd7285SBarry Smith PetscErrorCode ierr; 1982d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1983d2840e09SBarry Smith PetscInt n,i; 1984dbdd7285SBarry Smith 1985dbdd7285SBarry Smith PetscFunctionBegin; 1986d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 19873649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1988dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1989dbdd7285SBarry Smith 1990dbdd7285SBarry Smith for (i=0; i<m; i++) { 1991dbdd7285SBarry Smith idx = a->j + a->i[i]; 1992dbdd7285SBarry Smith v = a->a + a->i[i]; 1993dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 1994dbdd7285SBarry Smith alpha1 = x[11*i]; 1995dbdd7285SBarry Smith alpha2 = x[11*i+1]; 1996dbdd7285SBarry Smith alpha3 = x[11*i+2]; 1997dbdd7285SBarry Smith alpha4 = x[11*i+3]; 1998dbdd7285SBarry Smith alpha5 = x[11*i+4]; 1999dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2000dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2001dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2002dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2003dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2004dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2005dbdd7285SBarry Smith while (n-->0) { 2006dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2007dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2008dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2009dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2010dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2011dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2012dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2013dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2014dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2015dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2016dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2017dbdd7285SBarry Smith idx++; v++; 2018dbdd7285SBarry Smith } 2019dbdd7285SBarry Smith } 2020ca0c957dSBarry Smith ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 20213649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2022dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2023dbdd7285SBarry Smith PetscFunctionReturn(0); 2024dbdd7285SBarry Smith } 2025dbdd7285SBarry Smith 2026dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2027dbdd7285SBarry Smith { 2028dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2029dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2030d2840e09SBarry Smith const PetscScalar *x,*v; 2031d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2032dbdd7285SBarry Smith PetscErrorCode ierr; 2033d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2034d2840e09SBarry Smith PetscInt n,i; 2035dbdd7285SBarry Smith 2036dbdd7285SBarry Smith PetscFunctionBegin; 2037dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20383649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2039dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2040dbdd7285SBarry Smith for (i=0; i<m; i++) { 2041dbdd7285SBarry Smith idx = a->j + a->i[i]; 2042dbdd7285SBarry Smith v = a->a + a->i[i]; 2043dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2044dbdd7285SBarry Smith alpha1 = x[11*i]; 2045dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2046dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2047dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2048dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2049dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2050dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2051dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2052dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2053dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2054dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2055dbdd7285SBarry Smith while (n-->0) { 2056dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2057dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2058dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2059dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2060dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2061dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2062dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2063dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2064dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2065dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2066dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2067dbdd7285SBarry Smith idx++; v++; 2068dbdd7285SBarry Smith } 2069dbdd7285SBarry Smith } 2070ca0c957dSBarry Smith ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 20713649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2072dbdd7285SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2073dbdd7285SBarry Smith PetscFunctionReturn(0); 2074dbdd7285SBarry Smith } 2075dbdd7285SBarry Smith 2076dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2077dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 20782f7816d4SBarry Smith { 20792f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20802f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2081d2840e09SBarry Smith const PetscScalar *x,*v; 2082d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 20832f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2084dfbe8321SBarry Smith PetscErrorCode ierr; 2085d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2086d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 20872f7816d4SBarry Smith 20882f7816d4SBarry Smith PetscFunctionBegin; 20893649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 20901ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 20912f7816d4SBarry Smith idx = a->j; 20922f7816d4SBarry Smith v = a->a; 20932f7816d4SBarry Smith ii = a->i; 20942f7816d4SBarry Smith 20952f7816d4SBarry Smith for (i=0; i<m; i++) { 20962f7816d4SBarry Smith jrow = ii[i]; 20972f7816d4SBarry Smith n = ii[i+1] - jrow; 20982f7816d4SBarry Smith sum1 = 0.0; 20992f7816d4SBarry Smith sum2 = 0.0; 21002f7816d4SBarry Smith sum3 = 0.0; 21012f7816d4SBarry Smith sum4 = 0.0; 21022f7816d4SBarry Smith sum5 = 0.0; 21032f7816d4SBarry Smith sum6 = 0.0; 21042f7816d4SBarry Smith sum7 = 0.0; 21052f7816d4SBarry Smith sum8 = 0.0; 21062f7816d4SBarry Smith sum9 = 0.0; 21072f7816d4SBarry Smith sum10 = 0.0; 21082f7816d4SBarry Smith sum11 = 0.0; 21092f7816d4SBarry Smith sum12 = 0.0; 21102f7816d4SBarry Smith sum13 = 0.0; 21112f7816d4SBarry Smith sum14 = 0.0; 21122f7816d4SBarry Smith sum15 = 0.0; 21132f7816d4SBarry Smith sum16 = 0.0; 211426fbe8dcSKarl Rupp 2115b3c51c66SMatthew Knepley nonzerorow += (n>0); 21162f7816d4SBarry Smith for (j=0; j<n; j++) { 21172f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 21182f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 21192f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 21202f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 21212f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 21222f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 21232f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 21242f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 21252f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 21262f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 21272f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 21282f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 21292f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 21302f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 21312f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 21322f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 21332f7816d4SBarry Smith jrow++; 21342f7816d4SBarry Smith } 21352f7816d4SBarry Smith y[16*i] = sum1; 21362f7816d4SBarry Smith y[16*i+1] = sum2; 21372f7816d4SBarry Smith y[16*i+2] = sum3; 21382f7816d4SBarry Smith y[16*i+3] = sum4; 21392f7816d4SBarry Smith y[16*i+4] = sum5; 21402f7816d4SBarry Smith y[16*i+5] = sum6; 21412f7816d4SBarry Smith y[16*i+6] = sum7; 21422f7816d4SBarry Smith y[16*i+7] = sum8; 21432f7816d4SBarry Smith y[16*i+8] = sum9; 21442f7816d4SBarry Smith y[16*i+9] = sum10; 21452f7816d4SBarry Smith y[16*i+10] = sum11; 21462f7816d4SBarry Smith y[16*i+11] = sum12; 21472f7816d4SBarry Smith y[16*i+12] = sum13; 21482f7816d4SBarry Smith y[16*i+13] = sum14; 21492f7816d4SBarry Smith y[16*i+14] = sum15; 21502f7816d4SBarry Smith y[16*i+15] = sum16; 21512f7816d4SBarry Smith } 21522f7816d4SBarry Smith 2153dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 21543649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 21551ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 21562f7816d4SBarry Smith PetscFunctionReturn(0); 21572f7816d4SBarry Smith } 21582f7816d4SBarry Smith 2159dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 21602f7816d4SBarry Smith { 21612f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21622f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2163d2840e09SBarry Smith const PetscScalar *x,*v; 2164d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 21652f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2166dfbe8321SBarry Smith PetscErrorCode ierr; 2167d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2168d2840e09SBarry Smith PetscInt n,i; 21692f7816d4SBarry Smith 21702f7816d4SBarry Smith PetscFunctionBegin; 2171d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 21723649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 21731ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2174bfec09a0SHong Zhang 21752f7816d4SBarry Smith for (i=0; i<m; i++) { 2176bfec09a0SHong Zhang idx = a->j + a->i[i]; 2177bfec09a0SHong Zhang v = a->a + a->i[i]; 21782f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 21792f7816d4SBarry Smith alpha1 = x[16*i]; 21802f7816d4SBarry Smith alpha2 = x[16*i+1]; 21812f7816d4SBarry Smith alpha3 = x[16*i+2]; 21822f7816d4SBarry Smith alpha4 = x[16*i+3]; 21832f7816d4SBarry Smith alpha5 = x[16*i+4]; 21842f7816d4SBarry Smith alpha6 = x[16*i+5]; 21852f7816d4SBarry Smith alpha7 = x[16*i+6]; 21862f7816d4SBarry Smith alpha8 = x[16*i+7]; 21872f7816d4SBarry Smith alpha9 = x[16*i+8]; 21882f7816d4SBarry Smith alpha10 = x[16*i+9]; 21892f7816d4SBarry Smith alpha11 = x[16*i+10]; 21902f7816d4SBarry Smith alpha12 = x[16*i+11]; 21912f7816d4SBarry Smith alpha13 = x[16*i+12]; 21922f7816d4SBarry Smith alpha14 = x[16*i+13]; 21932f7816d4SBarry Smith alpha15 = x[16*i+14]; 21942f7816d4SBarry Smith alpha16 = x[16*i+15]; 21952f7816d4SBarry Smith while (n-->0) { 21962f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 21972f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 21982f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 21992f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 22002f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 22012f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 22022f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 22032f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 22042f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 22052f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 22062f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 22072f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 22082f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 22092f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 22102f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 22112f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 22122f7816d4SBarry Smith idx++; v++; 22132f7816d4SBarry Smith } 22142f7816d4SBarry Smith } 2215dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 22163649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22171ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 22182f7816d4SBarry Smith PetscFunctionReturn(0); 22192f7816d4SBarry Smith } 22202f7816d4SBarry Smith 2221dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 22222f7816d4SBarry Smith { 22232f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22242f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2225d2840e09SBarry Smith const PetscScalar *x,*v; 2226d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 22272f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2228dfbe8321SBarry Smith PetscErrorCode ierr; 2229d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2230b24ad042SBarry Smith PetscInt n,i,jrow,j; 22312f7816d4SBarry Smith 22322f7816d4SBarry Smith PetscFunctionBegin; 22332f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 22343649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 22351ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 22362f7816d4SBarry Smith idx = a->j; 22372f7816d4SBarry Smith v = a->a; 22382f7816d4SBarry Smith ii = a->i; 22392f7816d4SBarry Smith 22402f7816d4SBarry Smith for (i=0; i<m; i++) { 22412f7816d4SBarry Smith jrow = ii[i]; 22422f7816d4SBarry Smith n = ii[i+1] - jrow; 22432f7816d4SBarry Smith sum1 = 0.0; 22442f7816d4SBarry Smith sum2 = 0.0; 22452f7816d4SBarry Smith sum3 = 0.0; 22462f7816d4SBarry Smith sum4 = 0.0; 22472f7816d4SBarry Smith sum5 = 0.0; 22482f7816d4SBarry Smith sum6 = 0.0; 22492f7816d4SBarry Smith sum7 = 0.0; 22502f7816d4SBarry Smith sum8 = 0.0; 22512f7816d4SBarry Smith sum9 = 0.0; 22522f7816d4SBarry Smith sum10 = 0.0; 22532f7816d4SBarry Smith sum11 = 0.0; 22542f7816d4SBarry Smith sum12 = 0.0; 22552f7816d4SBarry Smith sum13 = 0.0; 22562f7816d4SBarry Smith sum14 = 0.0; 22572f7816d4SBarry Smith sum15 = 0.0; 22582f7816d4SBarry Smith sum16 = 0.0; 22592f7816d4SBarry Smith for (j=0; j<n; j++) { 22602f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 22612f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 22622f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 22632f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 22642f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 22652f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 22662f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 22672f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 22682f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 22692f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 22702f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 22712f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 22722f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 22732f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 22742f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 22752f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 22762f7816d4SBarry Smith jrow++; 22772f7816d4SBarry Smith } 22782f7816d4SBarry Smith y[16*i] += sum1; 22792f7816d4SBarry Smith y[16*i+1] += sum2; 22802f7816d4SBarry Smith y[16*i+2] += sum3; 22812f7816d4SBarry Smith y[16*i+3] += sum4; 22822f7816d4SBarry Smith y[16*i+4] += sum5; 22832f7816d4SBarry Smith y[16*i+5] += sum6; 22842f7816d4SBarry Smith y[16*i+6] += sum7; 22852f7816d4SBarry Smith y[16*i+7] += sum8; 22862f7816d4SBarry Smith y[16*i+8] += sum9; 22872f7816d4SBarry Smith y[16*i+9] += sum10; 22882f7816d4SBarry Smith y[16*i+10] += sum11; 22892f7816d4SBarry Smith y[16*i+11] += sum12; 22902f7816d4SBarry Smith y[16*i+12] += sum13; 22912f7816d4SBarry Smith y[16*i+13] += sum14; 22922f7816d4SBarry Smith y[16*i+14] += sum15; 22932f7816d4SBarry Smith y[16*i+15] += sum16; 22942f7816d4SBarry Smith } 22952f7816d4SBarry Smith 2296dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 22973649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22981ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 22992f7816d4SBarry Smith PetscFunctionReturn(0); 23002f7816d4SBarry Smith } 23012f7816d4SBarry Smith 2302dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 23032f7816d4SBarry Smith { 23042f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 23052f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2306d2840e09SBarry Smith const PetscScalar *x,*v; 2307d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 23082f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2309dfbe8321SBarry Smith PetscErrorCode ierr; 2310d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2311d2840e09SBarry Smith PetscInt n,i; 23122f7816d4SBarry Smith 23132f7816d4SBarry Smith PetscFunctionBegin; 23142f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 23153649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 23161ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 23172f7816d4SBarry Smith for (i=0; i<m; i++) { 2318bfec09a0SHong Zhang idx = a->j + a->i[i]; 2319bfec09a0SHong Zhang v = a->a + a->i[i]; 23202f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 23212f7816d4SBarry Smith alpha1 = x[16*i]; 23222f7816d4SBarry Smith alpha2 = x[16*i+1]; 23232f7816d4SBarry Smith alpha3 = x[16*i+2]; 23242f7816d4SBarry Smith alpha4 = x[16*i+3]; 23252f7816d4SBarry Smith alpha5 = x[16*i+4]; 23262f7816d4SBarry Smith alpha6 = x[16*i+5]; 23272f7816d4SBarry Smith alpha7 = x[16*i+6]; 23282f7816d4SBarry Smith alpha8 = x[16*i+7]; 23292f7816d4SBarry Smith alpha9 = x[16*i+8]; 23302f7816d4SBarry Smith alpha10 = x[16*i+9]; 23312f7816d4SBarry Smith alpha11 = x[16*i+10]; 23322f7816d4SBarry Smith alpha12 = x[16*i+11]; 23332f7816d4SBarry Smith alpha13 = x[16*i+12]; 23342f7816d4SBarry Smith alpha14 = x[16*i+13]; 23352f7816d4SBarry Smith alpha15 = x[16*i+14]; 23362f7816d4SBarry Smith alpha16 = x[16*i+15]; 23372f7816d4SBarry Smith while (n-->0) { 23382f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 23392f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 23402f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 23412f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 23422f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 23432f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 23442f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 23452f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 23462f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 23472f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 23482f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 23492f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 23502f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 23512f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 23522f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 23532f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 23542f7816d4SBarry Smith idx++; v++; 23552f7816d4SBarry Smith } 23562f7816d4SBarry Smith } 2357dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 23583649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 23591ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 236066ed3db0SBarry Smith PetscFunctionReturn(0); 236166ed3db0SBarry Smith } 236266ed3db0SBarry Smith 2363ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2364ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2365ed1c418cSBarry Smith { 2366ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2367ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2368d2840e09SBarry Smith const PetscScalar *x,*v; 2369d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2370ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2371ed1c418cSBarry Smith PetscErrorCode ierr; 2372d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2373d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 2374ed1c418cSBarry Smith 2375ed1c418cSBarry Smith PetscFunctionBegin; 23763649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2377ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2378ed1c418cSBarry Smith idx = a->j; 2379ed1c418cSBarry Smith v = a->a; 2380ed1c418cSBarry Smith ii = a->i; 2381ed1c418cSBarry Smith 2382ed1c418cSBarry Smith for (i=0; i<m; i++) { 2383ed1c418cSBarry Smith jrow = ii[i]; 2384ed1c418cSBarry Smith n = ii[i+1] - jrow; 2385ed1c418cSBarry Smith sum1 = 0.0; 2386ed1c418cSBarry Smith sum2 = 0.0; 2387ed1c418cSBarry Smith sum3 = 0.0; 2388ed1c418cSBarry Smith sum4 = 0.0; 2389ed1c418cSBarry Smith sum5 = 0.0; 2390ed1c418cSBarry Smith sum6 = 0.0; 2391ed1c418cSBarry Smith sum7 = 0.0; 2392ed1c418cSBarry Smith sum8 = 0.0; 2393ed1c418cSBarry Smith sum9 = 0.0; 2394ed1c418cSBarry Smith sum10 = 0.0; 2395ed1c418cSBarry Smith sum11 = 0.0; 2396ed1c418cSBarry Smith sum12 = 0.0; 2397ed1c418cSBarry Smith sum13 = 0.0; 2398ed1c418cSBarry Smith sum14 = 0.0; 2399ed1c418cSBarry Smith sum15 = 0.0; 2400ed1c418cSBarry Smith sum16 = 0.0; 2401ed1c418cSBarry Smith sum17 = 0.0; 2402ed1c418cSBarry Smith sum18 = 0.0; 240326fbe8dcSKarl Rupp 2404ed1c418cSBarry Smith nonzerorow += (n>0); 2405ed1c418cSBarry Smith for (j=0; j<n; j++) { 2406ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2407ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2408ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2409ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2410ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2411ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2412ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2413ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2414ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2415ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2416ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2417ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2418ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2419ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2420ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2421ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2422ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2423ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2424ed1c418cSBarry Smith jrow++; 2425ed1c418cSBarry Smith } 2426ed1c418cSBarry Smith y[18*i] = sum1; 2427ed1c418cSBarry Smith y[18*i+1] = sum2; 2428ed1c418cSBarry Smith y[18*i+2] = sum3; 2429ed1c418cSBarry Smith y[18*i+3] = sum4; 2430ed1c418cSBarry Smith y[18*i+4] = sum5; 2431ed1c418cSBarry Smith y[18*i+5] = sum6; 2432ed1c418cSBarry Smith y[18*i+6] = sum7; 2433ed1c418cSBarry Smith y[18*i+7] = sum8; 2434ed1c418cSBarry Smith y[18*i+8] = sum9; 2435ed1c418cSBarry Smith y[18*i+9] = sum10; 2436ed1c418cSBarry Smith y[18*i+10] = sum11; 2437ed1c418cSBarry Smith y[18*i+11] = sum12; 2438ed1c418cSBarry Smith y[18*i+12] = sum13; 2439ed1c418cSBarry Smith y[18*i+13] = sum14; 2440ed1c418cSBarry Smith y[18*i+14] = sum15; 2441ed1c418cSBarry Smith y[18*i+15] = sum16; 2442ed1c418cSBarry Smith y[18*i+16] = sum17; 2443ed1c418cSBarry Smith y[18*i+17] = sum18; 2444ed1c418cSBarry Smith } 2445ed1c418cSBarry Smith 2446dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 24473649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2448ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2449ed1c418cSBarry Smith PetscFunctionReturn(0); 2450ed1c418cSBarry Smith } 2451ed1c418cSBarry Smith 2452ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2453ed1c418cSBarry Smith { 2454ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2455ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2456d2840e09SBarry Smith const PetscScalar *x,*v; 2457d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2458ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2459ed1c418cSBarry Smith PetscErrorCode ierr; 2460d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2461d2840e09SBarry Smith PetscInt n,i; 2462ed1c418cSBarry Smith 2463ed1c418cSBarry Smith PetscFunctionBegin; 2464d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 24653649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2466ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2467ed1c418cSBarry Smith 2468ed1c418cSBarry Smith for (i=0; i<m; i++) { 2469ed1c418cSBarry Smith idx = a->j + a->i[i]; 2470ed1c418cSBarry Smith v = a->a + a->i[i]; 2471ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2472ed1c418cSBarry Smith alpha1 = x[18*i]; 2473ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2474ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2475ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2476ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2477ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2478ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2479ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2480ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2481ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2482ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2483ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2484ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2485ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2486ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2487ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2488ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2489ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2490ed1c418cSBarry Smith while (n-->0) { 2491ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2492ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2493ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2494ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2495ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2496ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2497ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2498ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2499ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2500ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2501ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2502ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2503ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2504ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2505ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2506ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2507ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2508ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2509ed1c418cSBarry Smith idx++; v++; 2510ed1c418cSBarry Smith } 2511ed1c418cSBarry Smith } 2512dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 25133649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2514ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2515ed1c418cSBarry Smith PetscFunctionReturn(0); 2516ed1c418cSBarry Smith } 2517ed1c418cSBarry Smith 2518ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2519ed1c418cSBarry Smith { 2520ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2521ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2522d2840e09SBarry Smith const PetscScalar *x,*v; 2523d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2524ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2525ed1c418cSBarry Smith PetscErrorCode ierr; 2526d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2527ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2528ed1c418cSBarry Smith 2529ed1c418cSBarry Smith PetscFunctionBegin; 2530ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 25313649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2532ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2533ed1c418cSBarry Smith idx = a->j; 2534ed1c418cSBarry Smith v = a->a; 2535ed1c418cSBarry Smith ii = a->i; 2536ed1c418cSBarry Smith 2537ed1c418cSBarry Smith for (i=0; i<m; i++) { 2538ed1c418cSBarry Smith jrow = ii[i]; 2539ed1c418cSBarry Smith n = ii[i+1] - jrow; 2540ed1c418cSBarry Smith sum1 = 0.0; 2541ed1c418cSBarry Smith sum2 = 0.0; 2542ed1c418cSBarry Smith sum3 = 0.0; 2543ed1c418cSBarry Smith sum4 = 0.0; 2544ed1c418cSBarry Smith sum5 = 0.0; 2545ed1c418cSBarry Smith sum6 = 0.0; 2546ed1c418cSBarry Smith sum7 = 0.0; 2547ed1c418cSBarry Smith sum8 = 0.0; 2548ed1c418cSBarry Smith sum9 = 0.0; 2549ed1c418cSBarry Smith sum10 = 0.0; 2550ed1c418cSBarry Smith sum11 = 0.0; 2551ed1c418cSBarry Smith sum12 = 0.0; 2552ed1c418cSBarry Smith sum13 = 0.0; 2553ed1c418cSBarry Smith sum14 = 0.0; 2554ed1c418cSBarry Smith sum15 = 0.0; 2555ed1c418cSBarry Smith sum16 = 0.0; 2556ed1c418cSBarry Smith sum17 = 0.0; 2557ed1c418cSBarry Smith sum18 = 0.0; 2558ed1c418cSBarry Smith for (j=0; j<n; j++) { 2559ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2560ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2561ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2562ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2563ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2564ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2565ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2566ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2567ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2568ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2569ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2570ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2571ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2572ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2573ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2574ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2575ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2576ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2577ed1c418cSBarry Smith jrow++; 2578ed1c418cSBarry Smith } 2579ed1c418cSBarry Smith y[18*i] += sum1; 2580ed1c418cSBarry Smith y[18*i+1] += sum2; 2581ed1c418cSBarry Smith y[18*i+2] += sum3; 2582ed1c418cSBarry Smith y[18*i+3] += sum4; 2583ed1c418cSBarry Smith y[18*i+4] += sum5; 2584ed1c418cSBarry Smith y[18*i+5] += sum6; 2585ed1c418cSBarry Smith y[18*i+6] += sum7; 2586ed1c418cSBarry Smith y[18*i+7] += sum8; 2587ed1c418cSBarry Smith y[18*i+8] += sum9; 2588ed1c418cSBarry Smith y[18*i+9] += sum10; 2589ed1c418cSBarry Smith y[18*i+10] += sum11; 2590ed1c418cSBarry Smith y[18*i+11] += sum12; 2591ed1c418cSBarry Smith y[18*i+12] += sum13; 2592ed1c418cSBarry Smith y[18*i+13] += sum14; 2593ed1c418cSBarry Smith y[18*i+14] += sum15; 2594ed1c418cSBarry Smith y[18*i+15] += sum16; 2595ed1c418cSBarry Smith y[18*i+16] += sum17; 2596ed1c418cSBarry Smith y[18*i+17] += sum18; 2597ed1c418cSBarry Smith } 2598ed1c418cSBarry Smith 2599dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26003649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2601ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2602ed1c418cSBarry Smith PetscFunctionReturn(0); 2603ed1c418cSBarry Smith } 2604ed1c418cSBarry Smith 2605ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2606ed1c418cSBarry Smith { 2607ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2608ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2609d2840e09SBarry Smith const PetscScalar *x,*v; 2610d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2611ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2612ed1c418cSBarry Smith PetscErrorCode ierr; 2613d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2614d2840e09SBarry Smith PetscInt n,i; 2615ed1c418cSBarry Smith 2616ed1c418cSBarry Smith PetscFunctionBegin; 2617ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 26183649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2619ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2620ed1c418cSBarry Smith for (i=0; i<m; i++) { 2621ed1c418cSBarry Smith idx = a->j + a->i[i]; 2622ed1c418cSBarry Smith v = a->a + a->i[i]; 2623ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2624ed1c418cSBarry Smith alpha1 = x[18*i]; 2625ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2626ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2627ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2628ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2629ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2630ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2631ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2632ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2633ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2634ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2635ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2636ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2637ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2638ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2639ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2640ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2641ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2642ed1c418cSBarry Smith while (n-->0) { 2643ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2644ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2645ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2646ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2647ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2648ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2649ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2650ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2651ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2652ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2653ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2654ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2655ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2656ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2657ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2658ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2659ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2660ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2661ed1c418cSBarry Smith idx++; v++; 2662ed1c418cSBarry Smith } 2663ed1c418cSBarry Smith } 2664dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26653649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2666ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2667ed1c418cSBarry Smith PetscFunctionReturn(0); 2668ed1c418cSBarry Smith } 2669ed1c418cSBarry Smith 26706861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 26716861f193SBarry Smith { 26726861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26736861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26746861f193SBarry Smith const PetscScalar *x,*v; 26756861f193SBarry Smith PetscScalar *y,*sums; 26766861f193SBarry Smith PetscErrorCode ierr; 26776861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 26786861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 26796861f193SBarry Smith 26806861f193SBarry Smith PetscFunctionBegin; 26813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 26826861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 26836861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 26846861f193SBarry Smith idx = a->j; 26856861f193SBarry Smith v = a->a; 26866861f193SBarry Smith ii = a->i; 26876861f193SBarry Smith 26886861f193SBarry Smith for (i=0; i<m; i++) { 26896861f193SBarry Smith jrow = ii[i]; 26906861f193SBarry Smith n = ii[i+1] - jrow; 26916861f193SBarry Smith sums = y + dof*i; 26926861f193SBarry Smith for (j=0; j<n; j++) { 26936861f193SBarry Smith for (k=0; k<dof; k++) { 26946861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 26956861f193SBarry Smith } 26966861f193SBarry Smith jrow++; 26976861f193SBarry Smith } 26986861f193SBarry Smith } 26996861f193SBarry Smith 27006861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27013649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27026861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27036861f193SBarry Smith PetscFunctionReturn(0); 27046861f193SBarry Smith } 27056861f193SBarry Smith 27066861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27076861f193SBarry Smith { 27086861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27096861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27106861f193SBarry Smith const PetscScalar *x,*v; 27116861f193SBarry Smith PetscScalar *y,*sums; 27126861f193SBarry Smith PetscErrorCode ierr; 27136861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27146861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 27156861f193SBarry Smith 27166861f193SBarry Smith PetscFunctionBegin; 27176861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 27183649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27196861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 27206861f193SBarry Smith idx = a->j; 27216861f193SBarry Smith v = a->a; 27226861f193SBarry Smith ii = a->i; 27236861f193SBarry Smith 27246861f193SBarry Smith for (i=0; i<m; i++) { 27256861f193SBarry Smith jrow = ii[i]; 27266861f193SBarry Smith n = ii[i+1] - jrow; 27276861f193SBarry Smith sums = y + dof*i; 27286861f193SBarry Smith for (j=0; j<n; j++) { 27296861f193SBarry Smith for (k=0; k<dof; k++) { 27306861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 27316861f193SBarry Smith } 27326861f193SBarry Smith jrow++; 27336861f193SBarry Smith } 27346861f193SBarry Smith } 27356861f193SBarry Smith 27366861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27373649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27386861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 27396861f193SBarry Smith PetscFunctionReturn(0); 27406861f193SBarry Smith } 27416861f193SBarry Smith 27426861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 27436861f193SBarry Smith { 27446861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27456861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27466861f193SBarry Smith const PetscScalar *x,*v,*alpha; 27476861f193SBarry Smith PetscScalar *y; 27486861f193SBarry Smith PetscErrorCode ierr; 27496861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 27506861f193SBarry Smith PetscInt n,i,k; 27516861f193SBarry Smith 27526861f193SBarry Smith PetscFunctionBegin; 27533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27546861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 27556861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 27566861f193SBarry Smith for (i=0; i<m; i++) { 27576861f193SBarry Smith idx = a->j + a->i[i]; 27586861f193SBarry Smith v = a->a + a->i[i]; 27596861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27606861f193SBarry Smith alpha = x + dof*i; 27616861f193SBarry Smith while (n-->0) { 27626861f193SBarry Smith for (k=0; k<dof; k++) { 27636861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27646861f193SBarry Smith } 276583ce7366SBarry Smith idx++; v++; 27666861f193SBarry Smith } 27676861f193SBarry Smith } 27686861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27693649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27706861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27716861f193SBarry Smith PetscFunctionReturn(0); 27726861f193SBarry Smith } 27736861f193SBarry Smith 27746861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27756861f193SBarry Smith { 27766861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27776861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27786861f193SBarry Smith const PetscScalar *x,*v,*alpha; 27796861f193SBarry Smith PetscScalar *y; 27806861f193SBarry Smith PetscErrorCode ierr; 27816861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 27826861f193SBarry Smith PetscInt n,i,k; 27836861f193SBarry Smith 27846861f193SBarry Smith PetscFunctionBegin; 27856861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 27863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27876861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 27886861f193SBarry Smith for (i=0; i<m; i++) { 27896861f193SBarry Smith idx = a->j + a->i[i]; 27906861f193SBarry Smith v = a->a + a->i[i]; 27916861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27926861f193SBarry Smith alpha = x + dof*i; 27936861f193SBarry Smith while (n-->0) { 27946861f193SBarry Smith for (k=0; k<dof; k++) { 27956861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27966861f193SBarry Smith } 279783ce7366SBarry Smith idx++; v++; 27986861f193SBarry Smith } 27996861f193SBarry Smith } 28006861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28013649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28026861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 28036861f193SBarry Smith PetscFunctionReturn(0); 28046861f193SBarry Smith } 28056861f193SBarry Smith 2806f4a53059SBarry Smith /*===================================================================================*/ 2807dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2808f4a53059SBarry Smith { 28094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2810dfbe8321SBarry Smith PetscErrorCode ierr; 2811f4a53059SBarry Smith 2812b24ad042SBarry Smith PetscFunctionBegin; 2813f4a53059SBarry Smith /* start the scatter */ 2814ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28154cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2816ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28174cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2818f4a53059SBarry Smith PetscFunctionReturn(0); 2819f4a53059SBarry Smith } 2820f4a53059SBarry Smith 2821dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 28224cb9d9b8SBarry Smith { 28234cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2824dfbe8321SBarry Smith PetscErrorCode ierr; 2825b24ad042SBarry Smith 28264cb9d9b8SBarry Smith PetscFunctionBegin; 28274cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 28284cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2829ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28314cb9d9b8SBarry Smith PetscFunctionReturn(0); 28324cb9d9b8SBarry Smith } 28334cb9d9b8SBarry Smith 2834dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 28354cb9d9b8SBarry Smith { 28364cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2837dfbe8321SBarry Smith PetscErrorCode ierr; 28384cb9d9b8SBarry Smith 2839b24ad042SBarry Smith PetscFunctionBegin; 28404cb9d9b8SBarry Smith /* start the scatter */ 2841ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2843ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 28454cb9d9b8SBarry Smith PetscFunctionReturn(0); 28464cb9d9b8SBarry Smith } 28474cb9d9b8SBarry Smith 2848dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 28494cb9d9b8SBarry Smith { 28504cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2851dfbe8321SBarry Smith PetscErrorCode ierr; 2852b24ad042SBarry Smith 28534cb9d9b8SBarry Smith PetscFunctionBegin; 28544cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2855d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2856e4a140f6SJunchao Zhang ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2857ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28584cb9d9b8SBarry Smith PetscFunctionReturn(0); 28594cb9d9b8SBarry Smith } 28604cb9d9b8SBarry Smith 286195ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 28624222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 28634222ddf1SHong Zhang { 28644222ddf1SHong Zhang Mat_Product *product = C->product; 28654222ddf1SHong Zhang 28664222ddf1SHong Zhang PetscFunctionBegin; 28674222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 28684222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 286998921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]); 28704222ddf1SHong Zhang PetscFunctionReturn(0); 28714222ddf1SHong Zhang } 28724222ddf1SHong Zhang 28734222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 28744222ddf1SHong Zhang { 28754222ddf1SHong Zhang PetscErrorCode ierr; 28764222ddf1SHong Zhang Mat_Product *product = C->product; 28774222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 28784222ddf1SHong Zhang Mat A=product->A,P=product->B; 28794222ddf1SHong Zhang PetscInt alg=1; /* set default algorithm */ 28804222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 28814222ddf1SHong Zhang const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"}; 28824222ddf1SHong Zhang PetscInt nalg=4; 28834222ddf1SHong Zhang #else 28844222ddf1SHong Zhang const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"}; 28854222ddf1SHong Zhang PetscInt nalg=5; 28864222ddf1SHong Zhang #endif 28874222ddf1SHong Zhang 28884222ddf1SHong Zhang PetscFunctionBegin; 28892c71b3e2SJacob Faibussowitsch PetscCheckFalse(product->type != MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]); 28904222ddf1SHong Zhang 28914222ddf1SHong Zhang /* PtAP */ 28924222ddf1SHong Zhang /* Check matrix local sizes */ 28932c71b3e2SJacob Faibussowitsch PetscCheckFalse(A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); 28942c71b3e2SJacob Faibussowitsch PetscCheckFalse(A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); 28954222ddf1SHong Zhang 28964222ddf1SHong Zhang /* Set the default algorithm */ 28974222ddf1SHong Zhang ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 28984222ddf1SHong Zhang if (flg) { 28994222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 29004222ddf1SHong Zhang } 29014222ddf1SHong Zhang 29024222ddf1SHong Zhang /* Get runtime option */ 29034222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 29043e662e0bSHong Zhang ierr = PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 29054222ddf1SHong Zhang if (flg) { 29064222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 29074222ddf1SHong Zhang } 29084222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 29094222ddf1SHong Zhang 29104222ddf1SHong Zhang ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr); 29114222ddf1SHong Zhang if (flg) { 29124222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 29134222ddf1SHong Zhang PetscFunctionReturn(0); 29144222ddf1SHong Zhang } 29154222ddf1SHong Zhang 29164222ddf1SHong Zhang ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr); 29174222ddf1SHong Zhang if (flg) { 29184222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 29194222ddf1SHong Zhang PetscFunctionReturn(0); 29204222ddf1SHong Zhang } 29214222ddf1SHong Zhang 29224222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 29236718818eSStefano Zampini ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");CHKERRQ(ierr); 29244222ddf1SHong Zhang ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr); 29256718818eSStefano Zampini ierr = MatProductSetFromOptions(C);CHKERRQ(ierr); 29264222ddf1SHong Zhang PetscFunctionReturn(0); 29274222ddf1SHong Zhang } 29284222ddf1SHong Zhang 29294222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 29304222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C) 29317ba1a0bfSKris Buschelman { 29327ba1a0bfSKris Buschelman PetscErrorCode ierr; 29330298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 29347ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 29357ba1a0bfSKris Buschelman Mat P =pp->AIJ; 29367ba1a0bfSKris Buschelman Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2937d2840e09SBarry Smith PetscInt *pti,*ptj,*ptJ; 29387ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2939d2840e09SBarry Smith const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2940d2840e09SBarry Smith PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 29417ba1a0bfSKris Buschelman MatScalar *ca; 2942d2840e09SBarry Smith const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 29437ba1a0bfSKris Buschelman 29447ba1a0bfSKris Buschelman PetscFunctionBegin; 29457ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 29467ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 29477ba1a0bfSKris Buschelman 29487ba1a0bfSKris Buschelman cn = pn*ppdof; 29497ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 29507ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 2951854ce69bSBarry Smith ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 29527ba1a0bfSKris Buschelman ci[0] = 0; 29537ba1a0bfSKris Buschelman 29547ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 2955dcca6d9dSJed Brown ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2956580bdb30SBarry Smith ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr); 2957580bdb30SBarry Smith ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr); 29587ba1a0bfSKris Buschelman 29597ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 29607ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 29617ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2962f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 29637ba1a0bfSKris Buschelman current_space = free_space; 29647ba1a0bfSKris Buschelman 29657ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 29667ba1a0bfSKris Buschelman for (i=0; i<pn; i++) { 29677ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 29687ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 29697ba1a0bfSKris Buschelman for (dof=0; dof<ppdof; dof++) { 29707ba1a0bfSKris Buschelman ptanzi = 0; 29717ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 29727ba1a0bfSKris Buschelman for (j=0; j<ptnzi; j++) { 29737ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 29747ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 29757ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 29767ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 29777ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 29787ba1a0bfSKris Buschelman for (k=0; k<anzj; k++) { 29797ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 29807ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 29817ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 29827ba1a0bfSKris Buschelman } 29837ba1a0bfSKris Buschelman } 29847ba1a0bfSKris Buschelman } 29857ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 29867ba1a0bfSKris Buschelman ptaj = ptasparserow; 29877ba1a0bfSKris Buschelman cnzi = 0; 29887ba1a0bfSKris Buschelman for (j=0; j<ptanzi; j++) { 29897ba1a0bfSKris Buschelman /* Get offset within block of P */ 29907ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 29917ba1a0bfSKris Buschelman /* Get block row of P */ 29927ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 29937ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 29947ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 29957ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29967ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 29977ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 29987ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 29997ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 30007ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 30017ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 30027ba1a0bfSKris Buschelman } 30037ba1a0bfSKris Buschelman } 30047ba1a0bfSKris Buschelman } 30057ba1a0bfSKris Buschelman 30067ba1a0bfSKris Buschelman /* sort sparserow */ 30077ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 30087ba1a0bfSKris Buschelman 30097ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 30107ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 30117ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 3012f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 30137ba1a0bfSKris Buschelman } 30147ba1a0bfSKris Buschelman 30157ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 3016580bdb30SBarry Smith ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr); 301726fbe8dcSKarl Rupp 30187ba1a0bfSKris Buschelman current_space->array += cnzi; 30197ba1a0bfSKris Buschelman current_space->local_used += cnzi; 30207ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 30217ba1a0bfSKris Buschelman 302226fbe8dcSKarl Rupp for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 302326fbe8dcSKarl Rupp for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 302426fbe8dcSKarl Rupp 30257ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 30267ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 30277ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 30287ba1a0bfSKris Buschelman } 30297ba1a0bfSKris Buschelman } 30307ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 30317ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 30327ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 3033854ce69bSBarry Smith ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 3034a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 303574ed9c26SBarry Smith ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 30367ba1a0bfSKris Buschelman 30377ba1a0bfSKris Buschelman /* Allocate space for ca */ 3038854ce69bSBarry Smith ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 30397ba1a0bfSKris Buschelman 30407ba1a0bfSKris Buschelman /* put together the new matrix */ 3041e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr); 30424222ddf1SHong Zhang ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr); 30437ba1a0bfSKris Buschelman 30447ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 30457ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 30464222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 3047e6b907acSBarry Smith c->free_a = PETSC_TRUE; 3048e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 30497ba1a0bfSKris Buschelman c->nonew = 0; 305026fbe8dcSKarl Rupp 30514222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 30524222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 30537ba1a0bfSKris Buschelman 30547ba1a0bfSKris Buschelman /* Clean up. */ 30557ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 30567ba1a0bfSKris Buschelman PetscFunctionReturn(0); 30577ba1a0bfSKris Buschelman } 30587ba1a0bfSKris Buschelman 30597ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 30607ba1a0bfSKris Buschelman { 30617ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 30627ba1a0bfSKris Buschelman PetscErrorCode ierr; 30637ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 30647ba1a0bfSKris Buschelman Mat P =pp->AIJ; 30657ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 30667ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 30677ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3068a2ea699eSBarry Smith const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3069d2840e09SBarry Smith const PetscInt *ci=c->i,*cj=c->j,*cjj; 3070d2840e09SBarry Smith const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3071d2840e09SBarry Smith PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3072a2ea699eSBarry Smith const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3073d2840e09SBarry Smith MatScalar *ca=c->a,*caj,*apa; 30747ba1a0bfSKris Buschelman 30757ba1a0bfSKris Buschelman PetscFunctionBegin; 30767ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 30771795a4d1SJed Brown ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 30787ba1a0bfSKris Buschelman 30797ba1a0bfSKris Buschelman /* Clear old values in C */ 3080580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 30817ba1a0bfSKris Buschelman 30827ba1a0bfSKris Buschelman for (i=0; i<am; i++) { 30837ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 30847ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 30857ba1a0bfSKris Buschelman apnzj = 0; 30867ba1a0bfSKris Buschelman for (j=0; j<anzi; j++) { 30877ba1a0bfSKris Buschelman /* Get offset within block of P */ 30887ba1a0bfSKris Buschelman pshift = *aj%ppdof; 30897ba1a0bfSKris Buschelman /* Get block row of P */ 30907ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 30917ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 30927ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30937ba1a0bfSKris Buschelman paj = pa + pi[prow]; 30947ba1a0bfSKris Buschelman for (k=0; k<pnzj; k++) { 30957ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 30967ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 30977ba1a0bfSKris Buschelman apjdense[poffset] = -1; 30987ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 30997ba1a0bfSKris Buschelman } 31007ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 31017ba1a0bfSKris Buschelman } 3102dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 31037ba1a0bfSKris Buschelman aa++; 31047ba1a0bfSKris Buschelman } 31057ba1a0bfSKris Buschelman 31067ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 31077ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 31087ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 31097ba1a0bfSKris Buschelman 31107ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 31117ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 31127ba1a0bfSKris Buschelman pshift = i%ppdof; 31137ba1a0bfSKris Buschelman poffset = pi[prow]; 31147ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 31157ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 31167ba1a0bfSKris Buschelman pJ = pj+poffset; 31177ba1a0bfSKris Buschelman pA = pa+poffset; 31187ba1a0bfSKris Buschelman for (j=0; j<pnzi; j++) { 31197ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 31207ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 31217ba1a0bfSKris Buschelman caj = ca + ci[crow]; 31227ba1a0bfSKris Buschelman pJ++; 31237ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 31247ba1a0bfSKris Buschelman for (k=0,nextap=0; nextap<apnzj; k++) { 312526fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 31267ba1a0bfSKris Buschelman } 3127dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 31287ba1a0bfSKris Buschelman pA++; 31297ba1a0bfSKris Buschelman } 31307ba1a0bfSKris Buschelman 31317ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 31327ba1a0bfSKris Buschelman for (j=0; j<apnzj; j++) { 31337ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 31347ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 31357ba1a0bfSKris Buschelman } 31367ba1a0bfSKris Buschelman } 31377ba1a0bfSKris Buschelman 31387ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 31397ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31407ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 314174ed9c26SBarry Smith ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 314295ddefa2SHong Zhang PetscFunctionReturn(0); 314395ddefa2SHong Zhang } 31447ba1a0bfSKris Buschelman 31454222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 31462121bac1SHong Zhang { 31472121bac1SHong Zhang PetscErrorCode ierr; 31484222ddf1SHong Zhang Mat_Product *product = C->product; 31494222ddf1SHong Zhang Mat A=product->A,P=product->B; 31502121bac1SHong Zhang 31512121bac1SHong Zhang PetscFunctionBegin; 31524222ddf1SHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr); 31532121bac1SHong Zhang PetscFunctionReturn(0); 31542121bac1SHong Zhang } 31552121bac1SHong Zhang 3156f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 315795ddefa2SHong Zhang { 315895ddefa2SHong Zhang PetscFunctionBegin; 31593e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 316095ddefa2SHong Zhang } 316195ddefa2SHong Zhang 3162f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 316395ddefa2SHong Zhang { 316495ddefa2SHong Zhang PetscFunctionBegin; 31653e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 31667ba1a0bfSKris Buschelman } 31675c65b9ecSFande Kong 3168bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat); 3169bc8e477aSFande Kong 3170bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C) 3171bc8e477aSFande Kong { 3172bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3173bc8e477aSFande Kong PetscErrorCode ierr; 3174bc8e477aSFande Kong 3175bc8e477aSFande Kong PetscFunctionBegin; 317634bcad68SFande Kong 3177bc8e477aSFande Kong ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr); 3178bc8e477aSFande Kong PetscFunctionReturn(0); 3179bc8e477aSFande Kong } 3180bc8e477aSFande Kong 31814222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat); 3182bc8e477aSFande Kong 31834222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C) 3184bc8e477aSFande Kong { 3185bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3186bc8e477aSFande Kong PetscErrorCode ierr; 3187bc8e477aSFande Kong 3188bc8e477aSFande Kong PetscFunctionBegin; 3189bc8e477aSFande Kong ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr); 31904222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3191bc8e477aSFande Kong PetscFunctionReturn(0); 3192bc8e477aSFande Kong } 3193bc8e477aSFande Kong 3194bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat); 3195bc8e477aSFande Kong 3196bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C) 3197bc8e477aSFande Kong { 3198bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3199bc8e477aSFande Kong PetscErrorCode ierr; 3200bc8e477aSFande Kong 3201bc8e477aSFande Kong PetscFunctionBegin; 320234bcad68SFande Kong 3203bc8e477aSFande Kong ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr); 3204bc8e477aSFande Kong PetscFunctionReturn(0); 3205bc8e477aSFande Kong } 3206bc8e477aSFande Kong 32074222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat); 3208bc8e477aSFande Kong 32094222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C) 3210bc8e477aSFande Kong { 3211bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3212bc8e477aSFande Kong PetscErrorCode ierr; 3213bc8e477aSFande Kong 3214bc8e477aSFande Kong PetscFunctionBegin; 321534bcad68SFande Kong 3216bc8e477aSFande Kong ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr); 32174222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3218bc8e477aSFande Kong PetscFunctionReturn(0); 3219bc8e477aSFande Kong } 3220bc8e477aSFande Kong 32214222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3222bc8e477aSFande Kong { 3223bc8e477aSFande Kong PetscErrorCode ierr; 32244222ddf1SHong Zhang Mat_Product *product = C->product; 32254222ddf1SHong Zhang Mat A=product->A,P=product->B; 32264222ddf1SHong Zhang PetscBool flg; 3227bc8e477aSFande Kong 3228bc8e477aSFande Kong PetscFunctionBegin; 32294222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr); 32304222ddf1SHong Zhang if (flg) { 32314222ddf1SHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr); 32324222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 32334222ddf1SHong Zhang PetscFunctionReturn(0); 3234bc8e477aSFande Kong } 323534bcad68SFande Kong 32364222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr); 32374222ddf1SHong Zhang if (flg) { 32384222ddf1SHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr); 32394222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 32404222ddf1SHong Zhang PetscFunctionReturn(0); 32414222ddf1SHong Zhang } 324234bcad68SFande Kong 32434222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 3244bc8e477aSFande Kong } 3245bc8e477aSFande Kong 3246cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32479c4fc4c7SBarry Smith { 32489c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3249ceb03754SKris Buschelman Mat a = b->AIJ,B; 32509c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 32519c4fc4c7SBarry Smith PetscErrorCode ierr; 32520fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3253ba8c8a56SBarry Smith PetscInt *cols; 3254ba8c8a56SBarry Smith PetscScalar *vals; 32559c4fc4c7SBarry Smith 32569c4fc4c7SBarry Smith PetscFunctionBegin; 32579c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3258785e854fSJed Brown ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 32599c4fc4c7SBarry Smith for (i=0; i<m; i++) { 32609c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 326126fbe8dcSKarl Rupp for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 32629c4fc4c7SBarry Smith } 3263fdc842d1SBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3264fdc842d1SBarry Smith ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr); 3265fdc842d1SBarry Smith ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3266fdc842d1SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr); 32679c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 3268785e854fSJed Brown ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 32699c4fc4c7SBarry Smith ii = 0; 32709c4fc4c7SBarry Smith for (i=0; i<m; i++) { 3271ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32720fd73130SBarry Smith for (j=0; j<dof; j++) { 327326fbe8dcSKarl Rupp for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3274ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 32759c4fc4c7SBarry Smith ii++; 32769c4fc4c7SBarry Smith } 3277ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32789c4fc4c7SBarry Smith } 32799c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 3280ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3281ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3282ceb03754SKris Buschelman 3283511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 328428be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3285c3d102feSKris Buschelman } else { 3286ceb03754SKris Buschelman *newmat = B; 3287c3d102feSKris Buschelman } 32889c4fc4c7SBarry Smith PetscFunctionReturn(0); 32899c4fc4c7SBarry Smith } 32909c4fc4c7SBarry Smith 3291c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3292be1d678aSKris Buschelman 3293cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32940fd73130SBarry Smith { 32950fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3296ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 32970fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 32980fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 32990fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 33000fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 33010298fd71SBarry Smith PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 33020298fd71SBarry Smith PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 33030fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 33040fd73130SBarry Smith PetscErrorCode ierr; 33050fd73130SBarry Smith PetscScalar *vals,*ovals; 33060fd73130SBarry Smith 33070fd73130SBarry Smith PetscFunctionBegin; 3308dcca6d9dSJed Brown ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3309d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 33100fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 33110fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 33120fd73130SBarry Smith for (j=0; j<dof; j++) { 33130fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 33140fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 33150fd73130SBarry Smith } 33160fd73130SBarry Smith } 3317fdc842d1SBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3318fdc842d1SBarry Smith ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3319fdc842d1SBarry Smith ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3320fdc842d1SBarry Smith ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr); 3321f27682edSJed Brown ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 33220fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 33230fd73130SBarry Smith 3324dcca6d9dSJed Brown ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3325d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 3326d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 33270fd73130SBarry Smith garray = mpiaij->garray; 33280fd73130SBarry Smith 33290fd73130SBarry Smith ii = rstart; 3330d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 33310fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 33320fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 33330fd73130SBarry Smith for (j=0; j<dof; j++) { 33340fd73130SBarry Smith for (k=0; k<ncols; k++) { 33350fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 33360fd73130SBarry Smith } 33370fd73130SBarry Smith for (k=0; k<oncols; k++) { 33380fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 33390fd73130SBarry Smith } 3340ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3341ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 33420fd73130SBarry Smith ii++; 33430fd73130SBarry Smith } 33440fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 33450fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 33460fd73130SBarry Smith } 33470fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 33480fd73130SBarry Smith 3349ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3350ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3351ceb03754SKris Buschelman 3352511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 33537adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 33547adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 335526fbe8dcSKarl Rupp 335628be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 335726fbe8dcSKarl Rupp 33587adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3359c3d102feSKris Buschelman } else { 3360ceb03754SKris Buschelman *newmat = B; 3361c3d102feSKris Buschelman } 33620fd73130SBarry Smith PetscFunctionReturn(0); 33630fd73130SBarry Smith } 33640fd73130SBarry Smith 33657dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 33669e516c8fSBarry Smith { 33679e516c8fSBarry Smith PetscErrorCode ierr; 33689e516c8fSBarry Smith Mat A; 33699e516c8fSBarry Smith 33709e516c8fSBarry Smith PetscFunctionBegin; 33719e516c8fSBarry Smith ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 33727dae84e0SHong Zhang ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 33739e516c8fSBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 33749e516c8fSBarry Smith PetscFunctionReturn(0); 33759e516c8fSBarry Smith } 33760fd73130SBarry Smith 3377ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3378ec626240SStefano Zampini { 3379ec626240SStefano Zampini PetscErrorCode ierr; 3380ec626240SStefano Zampini Mat A; 3381ec626240SStefano Zampini 3382ec626240SStefano Zampini PetscFunctionBegin; 3383ec626240SStefano Zampini ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3384ec626240SStefano Zampini ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr); 3385ec626240SStefano Zampini ierr = MatDestroy(&A);CHKERRQ(ierr); 3386ec626240SStefano Zampini PetscFunctionReturn(0); 3387ec626240SStefano Zampini } 3388ec626240SStefano Zampini 3389bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3390480dffcfSJed Brown /*@ 33910bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 33920bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 33930bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 33940bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 33950bad9183SKris Buschelman 3396ff585edeSJed Brown Collective 3397ff585edeSJed Brown 3398ff585edeSJed Brown Input Parameters: 3399ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3400ff585edeSJed Brown - dof - the block size (number of components per node) 3401ff585edeSJed Brown 3402ff585edeSJed Brown Output Parameter: 3403ff585edeSJed Brown . maij - the new MAIJ matrix 3404ff585edeSJed Brown 34050bad9183SKris Buschelman Operations provided: 3406*67b8a455SSatish Balay .vb 3407*67b8a455SSatish Balay MatMult 3408*67b8a455SSatish Balay MatMultTranspose 3409*67b8a455SSatish Balay MatMultAdd 3410*67b8a455SSatish Balay MatMultTransposeAdd 3411*67b8a455SSatish Balay MatView 3412*67b8a455SSatish Balay .ve 34130bad9183SKris Buschelman 34140bad9183SKris Buschelman Level: advanced 34150bad9183SKris Buschelman 3416b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3417ff585edeSJed Brown @*/ 34187087cfbeSBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 341982b1193eSBarry Smith { 3420dfbe8321SBarry Smith PetscErrorCode ierr; 3421b24ad042SBarry Smith PetscMPIInt size; 3422b24ad042SBarry Smith PetscInt n; 342382b1193eSBarry Smith Mat B; 3424fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3425fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3426fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3427fdc842d1SBarry Smith #endif 342882b1193eSBarry Smith 342982b1193eSBarry Smith PetscFunctionBegin; 3430fdc842d1SBarry Smith dof = PetscAbs(dof); 3431d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3432d72c5c08SBarry Smith 343326fbe8dcSKarl Rupp if (dof == 1) *maij = A; 343426fbe8dcSKarl Rupp else { 3435ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3436c248e2fdSStefano Zampini /* propagate vec type */ 3437c248e2fdSStefano Zampini ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr); 3438d0f46423SBarry Smith ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3439bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3440bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3441bccba955SJed Brown ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3442bccba955SJed Brown ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 344326fbe8dcSKarl Rupp 3444cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3445d72c5c08SBarry Smith 3446ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3447f4a53059SBarry Smith if (size == 1) { 3448feb9fe23SJed Brown Mat_SeqMAIJ *b; 3449feb9fe23SJed Brown 3450b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 345126fbe8dcSKarl Rupp 34520298fd71SBarry Smith B->ops->setup = NULL; 34534cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 34540fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 34554222ddf1SHong Zhang 3456feb9fe23SJed Brown b = (Mat_SeqMAIJ*)B->data; 3457b9b97703SBarry Smith b->dof = dof; 34584cb9d9b8SBarry Smith b->AIJ = A; 345926fbe8dcSKarl Rupp 3460d72c5c08SBarry Smith if (dof == 2) { 3461cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3462cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3463cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3464cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3465bcc973b7SBarry Smith } else if (dof == 3) { 3466bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3467bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3468bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3469bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3470bcc973b7SBarry Smith } else if (dof == 4) { 3471bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3472bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3473bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3474bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3475f9fae5adSBarry Smith } else if (dof == 5) { 3476f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3477f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3478f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3479f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 34806cd98798SBarry Smith } else if (dof == 6) { 34816cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 34826cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 34836cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 34846cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3485ed8eea03SBarry Smith } else if (dof == 7) { 3486ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3487ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3488ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3489ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 349066ed3db0SBarry Smith } else if (dof == 8) { 349166ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 349266ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 349366ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 349466ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 34952b692628SMatthew Knepley } else if (dof == 9) { 34962b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 34972b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 34982b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 34992b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3500b9cda22cSBarry Smith } else if (dof == 10) { 3501b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3502b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3503b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3504b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3505dbdd7285SBarry Smith } else if (dof == 11) { 3506dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3507dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3508dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3509dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 35102f7816d4SBarry Smith } else if (dof == 16) { 35112f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 35122f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 35132f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 35142f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3515ed1c418cSBarry Smith } else if (dof == 18) { 3516ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3517ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3518ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3519ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 352082b1193eSBarry Smith } else { 35216861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 35226861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 35236861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 35246861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 352582b1193eSBarry Smith } 3526fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3527fdc842d1SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3528fdc842d1SBarry Smith #endif 3529bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 35304222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3531f4a53059SBarry Smith } else { 3532f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3533feb9fe23SJed Brown Mat_MPIMAIJ *b; 3534f4a53059SBarry Smith IS from,to; 3535f4a53059SBarry Smith Vec gvec; 3536f4a53059SBarry Smith 3537b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 353826fbe8dcSKarl Rupp 35390298fd71SBarry Smith B->ops->setup = NULL; 3540d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 35410fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 354226fbe8dcSKarl Rupp 3543b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3544b9b97703SBarry Smith b->dof = dof; 3545b9b97703SBarry Smith b->A = A; 354626fbe8dcSKarl Rupp 3547fdc842d1SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr); 3548fdc842d1SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr); 35494cb9d9b8SBarry Smith 3550f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3551a34bdc0bSBarry Smith ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3552a34bdc0bSBarry Smith ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 355313288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3554a34bdc0bSBarry Smith ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3555f4a53059SBarry Smith 3556f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 3557ce94432eSBarry Smith ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3558f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3559f4a53059SBarry Smith 3560f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 3561ce94432eSBarry Smith ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3562f4a53059SBarry Smith 3563f4a53059SBarry Smith /* generate the scatter context */ 35649448b7f1SJunchao Zhang ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3565f4a53059SBarry Smith 35666bf464f9SBarry Smith ierr = ISDestroy(&from);CHKERRQ(ierr); 35676bf464f9SBarry Smith ierr = ISDestroy(&to);CHKERRQ(ierr); 35686bf464f9SBarry Smith ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3569f4a53059SBarry Smith 3570f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 35714cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 35724cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 35734cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 357426fbe8dcSKarl Rupp 3575fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3576fdc842d1SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3577fdc842d1SBarry Smith #endif 3578bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 35794222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3580f4a53059SBarry Smith } 35817dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3582ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 35834994cf47SJed Brown ierr = MatSetUp(B);CHKERRQ(ierr); 3584fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3585fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 3586fdc842d1SBarry Smith { 3587fdc842d1SBarry Smith PetscBool flg; 3588fdc842d1SBarry Smith if (convert) { 3589fdc842d1SBarry Smith ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr); 3590fdc842d1SBarry Smith if (flg) { 3591fdc842d1SBarry Smith ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 3592fdc842d1SBarry Smith } 3593fdc842d1SBarry Smith } 3594fdc842d1SBarry Smith } 3595fdc842d1SBarry Smith #endif 3596cd3bbe55SBarry Smith *maij = B; 3597d72c5c08SBarry Smith } 359882b1193eSBarry Smith PetscFunctionReturn(0); 359982b1193eSBarry Smith } 3600