1be1d678aSKris Buschelman 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 2182b1193eSBarry Smith 22b350b9acSSatish Balay /*@ 23ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24ff585edeSJed Brown 25ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26ff585edeSJed Brown 27ff585edeSJed Brown Input Parameter: 28ff585edeSJed Brown . A - the MAIJ matrix 29ff585edeSJed Brown 30ff585edeSJed Brown Output Parameter: 31ff585edeSJed Brown . B - the AIJ matrix 32ff585edeSJed Brown 33ff585edeSJed Brown Level: advanced 34ff585edeSJed Brown 35ff585edeSJed Brown Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 36ff585edeSJed Brown 37ff585edeSJed Brown .seealso: MatCreateMAIJ() 38ff585edeSJed Brown @*/ 397087cfbeSBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 40b9b97703SBarry Smith { 41dfbe8321SBarry Smith PetscErrorCode ierr; 42ace3abfcSBarry Smith PetscBool ismpimaij,isseqmaij; 43b9b97703SBarry Smith 44b9b97703SBarry Smith PetscFunctionBegin; 45251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 46251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 47b9b97703SBarry Smith if (ismpimaij) { 48b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 49b9b97703SBarry Smith 50b9b97703SBarry Smith *B = b->A; 51b9b97703SBarry Smith } else if (isseqmaij) { 52b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 53b9b97703SBarry Smith 54b9b97703SBarry Smith *B = b->AIJ; 55b9b97703SBarry Smith } else { 56b9b97703SBarry Smith *B = A; 57b9b97703SBarry Smith } 58b9b97703SBarry Smith PetscFunctionReturn(0); 59b9b97703SBarry Smith } 60b9b97703SBarry Smith 61ff585edeSJed Brown /*@C 62ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 63ff585edeSJed Brown 643f9fe445SBarry Smith Logically Collective 65ff585edeSJed Brown 66ff585edeSJed Brown Input Parameter: 67ff585edeSJed Brown + A - the MAIJ matrix 68ff585edeSJed Brown - dof - the block size for the new matrix 69ff585edeSJed Brown 70ff585edeSJed Brown Output Parameter: 71ff585edeSJed Brown . B - the new MAIJ matrix 72ff585edeSJed Brown 73ff585edeSJed Brown Level: advanced 74ff585edeSJed Brown 75ff585edeSJed Brown .seealso: MatCreateMAIJ() 76ff585edeSJed Brown @*/ 777087cfbeSBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 78b9b97703SBarry Smith { 79dfbe8321SBarry Smith PetscErrorCode ierr; 800298fd71SBarry Smith Mat Aij = NULL; 81b9b97703SBarry Smith 82b9b97703SBarry Smith PetscFunctionBegin; 83c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A,dof,2); 84b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 85b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 86b9b97703SBarry Smith PetscFunctionReturn(0); 87b9b97703SBarry Smith } 88b9b97703SBarry Smith 89dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 9082b1193eSBarry Smith { 91dfbe8321SBarry Smith PetscErrorCode ierr; 924cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9382b1193eSBarry Smith 9482b1193eSBarry Smith PetscFunctionBegin; 956bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 96bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 97bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 98bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 994cb9d9b8SBarry Smith PetscFunctionReturn(0); 1004cb9d9b8SBarry Smith } 1014cb9d9b8SBarry Smith 102356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A) 103356c569eSBarry Smith { 104356c569eSBarry Smith PetscFunctionBegin; 105ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 106356c569eSBarry Smith PetscFunctionReturn(0); 107356c569eSBarry Smith } 108356c569eSBarry Smith 1090fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1100fd73130SBarry Smith { 1110fd73130SBarry Smith PetscErrorCode ierr; 1120fd73130SBarry Smith Mat B; 1130fd73130SBarry Smith 1140fd73130SBarry Smith PetscFunctionBegin; 115a2ea699eSBarry Smith ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1160fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1176bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1180fd73130SBarry Smith PetscFunctionReturn(0); 1190fd73130SBarry Smith } 1200fd73130SBarry Smith 1210fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1220fd73130SBarry Smith { 1230fd73130SBarry Smith PetscErrorCode ierr; 1240fd73130SBarry Smith Mat B; 1250fd73130SBarry Smith 1260fd73130SBarry Smith PetscFunctionBegin; 127a2ea699eSBarry Smith ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1280fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1296bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1300fd73130SBarry Smith PetscFunctionReturn(0); 1310fd73130SBarry Smith } 1320fd73130SBarry Smith 133dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1344cb9d9b8SBarry Smith { 135dfbe8321SBarry Smith PetscErrorCode ierr; 1364cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1374cb9d9b8SBarry Smith 1384cb9d9b8SBarry Smith PetscFunctionBegin; 1396bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 1406bf464f9SBarry Smith ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 1416bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 1426bf464f9SBarry Smith ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 1436bf464f9SBarry Smith ierr = VecDestroy(&b->w);CHKERRQ(ierr); 144bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 145bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 146bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 147dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 148b9b97703SBarry Smith PetscFunctionReturn(0); 149b9b97703SBarry Smith } 150b9b97703SBarry Smith 1510bad9183SKris Buschelman /*MC 152fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1530bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1540bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1550bad9183SKris Buschelman 1560bad9183SKris Buschelman Operations provided: 1570bad9183SKris Buschelman . MatMult 1580bad9183SKris Buschelman . MatMultTranspose 1590bad9183SKris Buschelman . MatMultAdd 1600bad9183SKris Buschelman . MatMultTransposeAdd 1610bad9183SKris Buschelman 1620bad9183SKris Buschelman Level: advanced 1630bad9183SKris Buschelman 164b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 1650bad9183SKris Buschelman M*/ 1660bad9183SKris Buschelman 1678cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 16882b1193eSBarry Smith { 169dfbe8321SBarry Smith PetscErrorCode ierr; 1704cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 17164b52464SHong Zhang PetscMPIInt size; 17282b1193eSBarry Smith 17382b1193eSBarry Smith PetscFunctionBegin; 174b00a9115SJed Brown ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 175b0a32e0cSBarry Smith A->data = (void*)b; 17626fbe8dcSKarl Rupp 177cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 17826fbe8dcSKarl Rupp 179356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 180f4a53059SBarry Smith 181cd3bbe55SBarry Smith b->AIJ = 0; 182cd3bbe55SBarry Smith b->dof = 0; 183f4a53059SBarry Smith b->OAIJ = 0; 184f4a53059SBarry Smith b->ctx = 0; 185f4a53059SBarry Smith b->w = 0; 186ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 18764b52464SHong Zhang if (size == 1) { 18864b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 18964b52464SHong Zhang } else { 19064b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 19164b52464SHong Zhang } 19232e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 19332e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 19482b1193eSBarry Smith PetscFunctionReturn(0); 19582b1193eSBarry Smith } 19682b1193eSBarry Smith 197cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 198dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 19982b1193eSBarry Smith { 2004cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 201bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 202d2840e09SBarry Smith const PetscScalar *x,*v; 203d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 204dfbe8321SBarry Smith PetscErrorCode ierr; 205d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 206d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 20782b1193eSBarry Smith 208bcc973b7SBarry Smith PetscFunctionBegin; 2093649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2101ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 211bcc973b7SBarry Smith idx = a->j; 212bcc973b7SBarry Smith v = a->a; 213bcc973b7SBarry Smith ii = a->i; 214bcc973b7SBarry Smith 215bcc973b7SBarry Smith for (i=0; i<m; i++) { 216bcc973b7SBarry Smith jrow = ii[i]; 217bcc973b7SBarry Smith n = ii[i+1] - jrow; 218bcc973b7SBarry Smith sum1 = 0.0; 219bcc973b7SBarry Smith sum2 = 0.0; 22026fbe8dcSKarl Rupp 221b3c51c66SMatthew Knepley nonzerorow += (n>0); 222bcc973b7SBarry Smith for (j=0; j<n; j++) { 223bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 224bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 225bcc973b7SBarry Smith jrow++; 226bcc973b7SBarry Smith } 227bcc973b7SBarry Smith y[2*i] = sum1; 228bcc973b7SBarry Smith y[2*i+1] = sum2; 229bcc973b7SBarry Smith } 230bcc973b7SBarry Smith 231dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 2323649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2331ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 23482b1193eSBarry Smith PetscFunctionReturn(0); 23582b1193eSBarry Smith } 236bcc973b7SBarry Smith 237dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 23882b1193eSBarry Smith { 2394cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 240bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 241d2840e09SBarry Smith const PetscScalar *x,*v; 242d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 243dfbe8321SBarry Smith PetscErrorCode ierr; 244d2840e09SBarry Smith PetscInt n,i; 245d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 24682b1193eSBarry Smith 247bcc973b7SBarry Smith PetscFunctionBegin; 248d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2493649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2501ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 251bfec09a0SHong Zhang 252bcc973b7SBarry Smith for (i=0; i<m; i++) { 253bfec09a0SHong Zhang idx = a->j + a->i[i]; 254bfec09a0SHong Zhang v = a->a + a->i[i]; 255bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 256bcc973b7SBarry Smith alpha1 = x[2*i]; 257bcc973b7SBarry Smith alpha2 = x[2*i+1]; 25826fbe8dcSKarl Rupp while (n-->0) { 25926fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 26026fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 26126fbe8dcSKarl Rupp idx++; v++; 26226fbe8dcSKarl Rupp } 263bcc973b7SBarry Smith } 264dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 2653649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2661ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 26782b1193eSBarry Smith PetscFunctionReturn(0); 26882b1193eSBarry Smith } 269bcc973b7SBarry Smith 270dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 27182b1193eSBarry Smith { 2724cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 273bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 274d2840e09SBarry Smith const PetscScalar *x,*v; 275d2840e09SBarry Smith PetscScalar *y,sum1, sum2; 276dfbe8321SBarry Smith PetscErrorCode ierr; 277b24ad042SBarry Smith PetscInt n,i,jrow,j; 278d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27982b1193eSBarry Smith 280bcc973b7SBarry Smith PetscFunctionBegin; 281f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2823649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2831ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 284bcc973b7SBarry Smith idx = a->j; 285bcc973b7SBarry Smith v = a->a; 286bcc973b7SBarry Smith ii = a->i; 287bcc973b7SBarry Smith 288bcc973b7SBarry Smith for (i=0; i<m; i++) { 289bcc973b7SBarry Smith jrow = ii[i]; 290bcc973b7SBarry Smith n = ii[i+1] - jrow; 291bcc973b7SBarry Smith sum1 = 0.0; 292bcc973b7SBarry Smith sum2 = 0.0; 293bcc973b7SBarry Smith for (j=0; j<n; j++) { 294bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 295bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 296bcc973b7SBarry Smith jrow++; 297bcc973b7SBarry Smith } 298bcc973b7SBarry Smith y[2*i] += sum1; 299bcc973b7SBarry Smith y[2*i+1] += sum2; 300bcc973b7SBarry Smith } 301bcc973b7SBarry Smith 302dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3033649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3041ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 305bcc973b7SBarry Smith PetscFunctionReturn(0); 30682b1193eSBarry Smith } 307dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 30882b1193eSBarry Smith { 3094cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 310bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 311d2840e09SBarry Smith const PetscScalar *x,*v; 312d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 313dfbe8321SBarry Smith PetscErrorCode ierr; 314d2840e09SBarry Smith PetscInt n,i; 315d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 31682b1193eSBarry Smith 317bcc973b7SBarry Smith PetscFunctionBegin; 318f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3193649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3201ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 321bfec09a0SHong Zhang 322bcc973b7SBarry Smith for (i=0; i<m; i++) { 323bfec09a0SHong Zhang idx = a->j + a->i[i]; 324bfec09a0SHong Zhang v = a->a + a->i[i]; 325bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 326bcc973b7SBarry Smith alpha1 = x[2*i]; 327bcc973b7SBarry Smith alpha2 = x[2*i+1]; 32826fbe8dcSKarl Rupp while (n-->0) { 32926fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 33026fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 33126fbe8dcSKarl Rupp idx++; v++; 33226fbe8dcSKarl Rupp } 333bcc973b7SBarry Smith } 334dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3353649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3361ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 337bcc973b7SBarry Smith PetscFunctionReturn(0); 33882b1193eSBarry Smith } 339cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 340dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 341bcc973b7SBarry Smith { 3424cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 343bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 344d2840e09SBarry Smith const PetscScalar *x,*v; 345d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 346dfbe8321SBarry Smith PetscErrorCode ierr; 347d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 348d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 34982b1193eSBarry Smith 350bcc973b7SBarry Smith PetscFunctionBegin; 3513649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3521ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 353bcc973b7SBarry Smith idx = a->j; 354bcc973b7SBarry Smith v = a->a; 355bcc973b7SBarry Smith ii = a->i; 356bcc973b7SBarry Smith 357bcc973b7SBarry Smith for (i=0; i<m; i++) { 358bcc973b7SBarry Smith jrow = ii[i]; 359bcc973b7SBarry Smith n = ii[i+1] - jrow; 360bcc973b7SBarry Smith sum1 = 0.0; 361bcc973b7SBarry Smith sum2 = 0.0; 362bcc973b7SBarry Smith sum3 = 0.0; 36326fbe8dcSKarl Rupp 364b3c51c66SMatthew Knepley nonzerorow += (n>0); 365bcc973b7SBarry Smith for (j=0; j<n; j++) { 366bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 367bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 368bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 369bcc973b7SBarry Smith jrow++; 370bcc973b7SBarry Smith } 371bcc973b7SBarry Smith y[3*i] = sum1; 372bcc973b7SBarry Smith y[3*i+1] = sum2; 373bcc973b7SBarry Smith y[3*i+2] = sum3; 374bcc973b7SBarry Smith } 375bcc973b7SBarry Smith 376dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 3773649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3781ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 379bcc973b7SBarry Smith PetscFunctionReturn(0); 380bcc973b7SBarry Smith } 381bcc973b7SBarry Smith 382dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 383bcc973b7SBarry Smith { 3844cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 385bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 386d2840e09SBarry Smith const PetscScalar *x,*v; 387d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 388dfbe8321SBarry Smith PetscErrorCode ierr; 389d2840e09SBarry Smith PetscInt n,i; 390d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 391bcc973b7SBarry Smith 392bcc973b7SBarry Smith PetscFunctionBegin; 393d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 3943649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3951ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 396bfec09a0SHong Zhang 397bcc973b7SBarry Smith for (i=0; i<m; i++) { 398bfec09a0SHong Zhang idx = a->j + a->i[i]; 399bfec09a0SHong Zhang v = a->a + a->i[i]; 400bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 401bcc973b7SBarry Smith alpha1 = x[3*i]; 402bcc973b7SBarry Smith alpha2 = x[3*i+1]; 403bcc973b7SBarry Smith alpha3 = x[3*i+2]; 404bcc973b7SBarry Smith while (n-->0) { 405bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 406bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 407bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 408bcc973b7SBarry Smith idx++; v++; 409bcc973b7SBarry Smith } 410bcc973b7SBarry Smith } 411dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4123649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4131ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 414bcc973b7SBarry Smith PetscFunctionReturn(0); 415bcc973b7SBarry Smith } 416bcc973b7SBarry Smith 417dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 418bcc973b7SBarry Smith { 4194cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 420bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 421d2840e09SBarry Smith const PetscScalar *x,*v; 422d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 423dfbe8321SBarry Smith PetscErrorCode ierr; 424b24ad042SBarry Smith PetscInt n,i,jrow,j; 425d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 426bcc973b7SBarry Smith 427bcc973b7SBarry Smith PetscFunctionBegin; 428f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4293649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4301ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 431bcc973b7SBarry Smith idx = a->j; 432bcc973b7SBarry Smith v = a->a; 433bcc973b7SBarry Smith ii = a->i; 434bcc973b7SBarry Smith 435bcc973b7SBarry Smith for (i=0; i<m; i++) { 436bcc973b7SBarry Smith jrow = ii[i]; 437bcc973b7SBarry Smith n = ii[i+1] - jrow; 438bcc973b7SBarry Smith sum1 = 0.0; 439bcc973b7SBarry Smith sum2 = 0.0; 440bcc973b7SBarry Smith sum3 = 0.0; 441bcc973b7SBarry Smith for (j=0; j<n; j++) { 442bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 443bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 444bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 445bcc973b7SBarry Smith jrow++; 446bcc973b7SBarry Smith } 447bcc973b7SBarry Smith y[3*i] += sum1; 448bcc973b7SBarry Smith y[3*i+1] += sum2; 449bcc973b7SBarry Smith y[3*i+2] += sum3; 450bcc973b7SBarry Smith } 451bcc973b7SBarry Smith 452dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4533649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4541ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 455bcc973b7SBarry Smith PetscFunctionReturn(0); 456bcc973b7SBarry Smith } 457dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 458bcc973b7SBarry Smith { 4594cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 460bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 461d2840e09SBarry Smith const PetscScalar *x,*v; 462d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 463dfbe8321SBarry Smith PetscErrorCode ierr; 464d2840e09SBarry Smith PetscInt n,i; 465d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 466bcc973b7SBarry Smith 467bcc973b7SBarry Smith PetscFunctionBegin; 468f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4693649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4701ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 471bcc973b7SBarry Smith for (i=0; i<m; i++) { 472bfec09a0SHong Zhang idx = a->j + a->i[i]; 473bfec09a0SHong Zhang v = a->a + a->i[i]; 474bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 475bcc973b7SBarry Smith alpha1 = x[3*i]; 476bcc973b7SBarry Smith alpha2 = x[3*i+1]; 477bcc973b7SBarry Smith alpha3 = x[3*i+2]; 478bcc973b7SBarry Smith while (n-->0) { 479bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 480bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 481bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 482bcc973b7SBarry Smith idx++; v++; 483bcc973b7SBarry Smith } 484bcc973b7SBarry Smith } 485dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4863649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4871ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 488bcc973b7SBarry Smith PetscFunctionReturn(0); 489bcc973b7SBarry Smith } 490bcc973b7SBarry Smith 491bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 492dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 493bcc973b7SBarry Smith { 4944cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 495bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 496d2840e09SBarry Smith const PetscScalar *x,*v; 497d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 498dfbe8321SBarry Smith PetscErrorCode ierr; 499d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 500d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 501bcc973b7SBarry Smith 502bcc973b7SBarry Smith PetscFunctionBegin; 5033649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5041ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 505bcc973b7SBarry Smith idx = a->j; 506bcc973b7SBarry Smith v = a->a; 507bcc973b7SBarry Smith ii = a->i; 508bcc973b7SBarry Smith 509bcc973b7SBarry Smith for (i=0; i<m; i++) { 510bcc973b7SBarry Smith jrow = ii[i]; 511bcc973b7SBarry Smith n = ii[i+1] - jrow; 512bcc973b7SBarry Smith sum1 = 0.0; 513bcc973b7SBarry Smith sum2 = 0.0; 514bcc973b7SBarry Smith sum3 = 0.0; 515bcc973b7SBarry Smith sum4 = 0.0; 516b3c51c66SMatthew Knepley nonzerorow += (n>0); 517bcc973b7SBarry Smith for (j=0; j<n; j++) { 518bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 519bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 520bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 521bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 522bcc973b7SBarry Smith jrow++; 523bcc973b7SBarry Smith } 524bcc973b7SBarry Smith y[4*i] = sum1; 525bcc973b7SBarry Smith y[4*i+1] = sum2; 526bcc973b7SBarry Smith y[4*i+2] = sum3; 527bcc973b7SBarry Smith y[4*i+3] = sum4; 528bcc973b7SBarry Smith } 529bcc973b7SBarry Smith 530dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 5313649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5321ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 533bcc973b7SBarry Smith PetscFunctionReturn(0); 534bcc973b7SBarry Smith } 535bcc973b7SBarry Smith 536dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 537bcc973b7SBarry Smith { 5384cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 539bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 540d2840e09SBarry Smith const PetscScalar *x,*v; 541d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 542dfbe8321SBarry Smith PetscErrorCode ierr; 543d2840e09SBarry Smith PetscInt n,i; 544d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 545bcc973b7SBarry Smith 546bcc973b7SBarry Smith PetscFunctionBegin; 547d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 5483649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5491ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 550bcc973b7SBarry Smith for (i=0; i<m; i++) { 551bfec09a0SHong Zhang idx = a->j + a->i[i]; 552bfec09a0SHong Zhang v = a->a + a->i[i]; 553bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 554bcc973b7SBarry Smith alpha1 = x[4*i]; 555bcc973b7SBarry Smith alpha2 = x[4*i+1]; 556bcc973b7SBarry Smith alpha3 = x[4*i+2]; 557bcc973b7SBarry Smith alpha4 = x[4*i+3]; 558bcc973b7SBarry Smith while (n-->0) { 559bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 560bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 561bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 562bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 563bcc973b7SBarry Smith idx++; v++; 564bcc973b7SBarry Smith } 565bcc973b7SBarry Smith } 566dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 5673649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5681ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 569bcc973b7SBarry Smith PetscFunctionReturn(0); 570bcc973b7SBarry Smith } 571bcc973b7SBarry Smith 572dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 573bcc973b7SBarry Smith { 5744cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 575f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 576d2840e09SBarry Smith const PetscScalar *x,*v; 577d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 578dfbe8321SBarry Smith PetscErrorCode ierr; 579b24ad042SBarry Smith PetscInt n,i,jrow,j; 580d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 581f9fae5adSBarry Smith 582f9fae5adSBarry Smith PetscFunctionBegin; 583f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5843649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5851ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 586f9fae5adSBarry Smith idx = a->j; 587f9fae5adSBarry Smith v = a->a; 588f9fae5adSBarry Smith ii = a->i; 589f9fae5adSBarry Smith 590f9fae5adSBarry Smith for (i=0; i<m; i++) { 591f9fae5adSBarry Smith jrow = ii[i]; 592f9fae5adSBarry Smith n = ii[i+1] - jrow; 593f9fae5adSBarry Smith sum1 = 0.0; 594f9fae5adSBarry Smith sum2 = 0.0; 595f9fae5adSBarry Smith sum3 = 0.0; 596f9fae5adSBarry Smith sum4 = 0.0; 597f9fae5adSBarry Smith for (j=0; j<n; j++) { 598f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 599f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 600f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 601f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 602f9fae5adSBarry Smith jrow++; 603f9fae5adSBarry Smith } 604f9fae5adSBarry Smith y[4*i] += sum1; 605f9fae5adSBarry Smith y[4*i+1] += sum2; 606f9fae5adSBarry Smith y[4*i+2] += sum3; 607f9fae5adSBarry Smith y[4*i+3] += sum4; 608f9fae5adSBarry Smith } 609f9fae5adSBarry Smith 610dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6113649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6121ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 613f9fae5adSBarry Smith PetscFunctionReturn(0); 614bcc973b7SBarry Smith } 615dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 616bcc973b7SBarry Smith { 6174cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 618f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 619d2840e09SBarry Smith const PetscScalar *x,*v; 620d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 621dfbe8321SBarry Smith PetscErrorCode ierr; 622d2840e09SBarry Smith PetscInt n,i; 623d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 624f9fae5adSBarry Smith 625f9fae5adSBarry Smith PetscFunctionBegin; 626f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6273649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6281ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 629bfec09a0SHong Zhang 630f9fae5adSBarry Smith for (i=0; i<m; i++) { 631bfec09a0SHong Zhang idx = a->j + a->i[i]; 632bfec09a0SHong Zhang v = a->a + a->i[i]; 633f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 634f9fae5adSBarry Smith alpha1 = x[4*i]; 635f9fae5adSBarry Smith alpha2 = x[4*i+1]; 636f9fae5adSBarry Smith alpha3 = x[4*i+2]; 637f9fae5adSBarry Smith alpha4 = x[4*i+3]; 638f9fae5adSBarry Smith while (n-->0) { 639f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 640f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 641f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 642f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 643f9fae5adSBarry Smith idx++; v++; 644f9fae5adSBarry Smith } 645f9fae5adSBarry Smith } 646dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6473649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6481ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 649f9fae5adSBarry Smith PetscFunctionReturn(0); 650f9fae5adSBarry Smith } 651f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6526cd98798SBarry Smith 653dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 654f9fae5adSBarry Smith { 6554cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 656f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 657d2840e09SBarry Smith const PetscScalar *x,*v; 658d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 659dfbe8321SBarry Smith PetscErrorCode ierr; 660d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 661d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 662f9fae5adSBarry Smith 663f9fae5adSBarry Smith PetscFunctionBegin; 6643649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6651ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 666f9fae5adSBarry Smith idx = a->j; 667f9fae5adSBarry Smith v = a->a; 668f9fae5adSBarry Smith ii = a->i; 669f9fae5adSBarry Smith 670f9fae5adSBarry Smith for (i=0; i<m; i++) { 671f9fae5adSBarry Smith jrow = ii[i]; 672f9fae5adSBarry Smith n = ii[i+1] - jrow; 673f9fae5adSBarry Smith sum1 = 0.0; 674f9fae5adSBarry Smith sum2 = 0.0; 675f9fae5adSBarry Smith sum3 = 0.0; 676f9fae5adSBarry Smith sum4 = 0.0; 677f9fae5adSBarry Smith sum5 = 0.0; 67826fbe8dcSKarl Rupp 679b3c51c66SMatthew Knepley nonzerorow += (n>0); 680f9fae5adSBarry Smith for (j=0; j<n; j++) { 681f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 682f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 683f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 684f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 685f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 686f9fae5adSBarry Smith jrow++; 687f9fae5adSBarry Smith } 688f9fae5adSBarry Smith y[5*i] = sum1; 689f9fae5adSBarry Smith y[5*i+1] = sum2; 690f9fae5adSBarry Smith y[5*i+2] = sum3; 691f9fae5adSBarry Smith y[5*i+3] = sum4; 692f9fae5adSBarry Smith y[5*i+4] = sum5; 693f9fae5adSBarry Smith } 694f9fae5adSBarry Smith 695dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 6963649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6971ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 698f9fae5adSBarry Smith PetscFunctionReturn(0); 699f9fae5adSBarry Smith } 700f9fae5adSBarry Smith 701dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 702f9fae5adSBarry Smith { 7034cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 704f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 705d2840e09SBarry Smith const PetscScalar *x,*v; 706d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 707dfbe8321SBarry Smith PetscErrorCode ierr; 708d2840e09SBarry Smith PetscInt n,i; 709d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 710f9fae5adSBarry Smith 711f9fae5adSBarry Smith PetscFunctionBegin; 712d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 7133649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7141ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 715bfec09a0SHong Zhang 716f9fae5adSBarry Smith for (i=0; i<m; i++) { 717bfec09a0SHong Zhang idx = a->j + a->i[i]; 718bfec09a0SHong Zhang v = a->a + a->i[i]; 719f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 720f9fae5adSBarry Smith alpha1 = x[5*i]; 721f9fae5adSBarry Smith alpha2 = x[5*i+1]; 722f9fae5adSBarry Smith alpha3 = x[5*i+2]; 723f9fae5adSBarry Smith alpha4 = x[5*i+3]; 724f9fae5adSBarry Smith alpha5 = x[5*i+4]; 725f9fae5adSBarry Smith while (n-->0) { 726f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 727f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 728f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 729f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 730f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 731f9fae5adSBarry Smith idx++; v++; 732f9fae5adSBarry Smith } 733f9fae5adSBarry Smith } 734dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7353649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7361ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 737f9fae5adSBarry Smith PetscFunctionReturn(0); 738f9fae5adSBarry Smith } 739f9fae5adSBarry Smith 740dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 741f9fae5adSBarry Smith { 7424cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 743f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 744d2840e09SBarry Smith const PetscScalar *x,*v; 745d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 746dfbe8321SBarry Smith PetscErrorCode ierr; 747b24ad042SBarry Smith PetscInt n,i,jrow,j; 748d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 749f9fae5adSBarry Smith 750f9fae5adSBarry Smith PetscFunctionBegin; 751f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7523649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7531ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 754f9fae5adSBarry Smith idx = a->j; 755f9fae5adSBarry Smith v = a->a; 756f9fae5adSBarry Smith ii = a->i; 757f9fae5adSBarry Smith 758f9fae5adSBarry Smith for (i=0; i<m; i++) { 759f9fae5adSBarry Smith jrow = ii[i]; 760f9fae5adSBarry Smith n = ii[i+1] - jrow; 761f9fae5adSBarry Smith sum1 = 0.0; 762f9fae5adSBarry Smith sum2 = 0.0; 763f9fae5adSBarry Smith sum3 = 0.0; 764f9fae5adSBarry Smith sum4 = 0.0; 765f9fae5adSBarry Smith sum5 = 0.0; 766f9fae5adSBarry Smith for (j=0; j<n; j++) { 767f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 768f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 769f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 770f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 771f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 772f9fae5adSBarry Smith jrow++; 773f9fae5adSBarry Smith } 774f9fae5adSBarry Smith y[5*i] += sum1; 775f9fae5adSBarry Smith y[5*i+1] += sum2; 776f9fae5adSBarry Smith y[5*i+2] += sum3; 777f9fae5adSBarry Smith y[5*i+3] += sum4; 778f9fae5adSBarry Smith y[5*i+4] += sum5; 779f9fae5adSBarry Smith } 780f9fae5adSBarry Smith 781dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7823649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7831ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 784f9fae5adSBarry Smith PetscFunctionReturn(0); 785f9fae5adSBarry Smith } 786f9fae5adSBarry Smith 787dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 788f9fae5adSBarry Smith { 7894cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 790f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 791d2840e09SBarry Smith const PetscScalar *x,*v; 792d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 793dfbe8321SBarry Smith PetscErrorCode ierr; 794d2840e09SBarry Smith PetscInt n,i; 795d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 796f9fae5adSBarry Smith 797f9fae5adSBarry Smith PetscFunctionBegin; 798f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7993649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8001ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 801bfec09a0SHong Zhang 802f9fae5adSBarry Smith for (i=0; i<m; i++) { 803bfec09a0SHong Zhang idx = a->j + a->i[i]; 804bfec09a0SHong Zhang v = a->a + a->i[i]; 805f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 806f9fae5adSBarry Smith alpha1 = x[5*i]; 807f9fae5adSBarry Smith alpha2 = x[5*i+1]; 808f9fae5adSBarry Smith alpha3 = x[5*i+2]; 809f9fae5adSBarry Smith alpha4 = x[5*i+3]; 810f9fae5adSBarry Smith alpha5 = x[5*i+4]; 811f9fae5adSBarry Smith while (n-->0) { 812f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 813f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 814f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 815f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 816f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 817f9fae5adSBarry Smith idx++; v++; 818f9fae5adSBarry Smith } 819f9fae5adSBarry Smith } 820dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8213649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8221ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 823f9fae5adSBarry Smith PetscFunctionReturn(0); 824bcc973b7SBarry Smith } 825bcc973b7SBarry Smith 8266cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 827dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8286cd98798SBarry Smith { 8296cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8306cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 831d2840e09SBarry Smith const PetscScalar *x,*v; 832d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 833dfbe8321SBarry Smith PetscErrorCode ierr; 834d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 835d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 8366cd98798SBarry Smith 8376cd98798SBarry Smith PetscFunctionBegin; 8383649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8391ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8406cd98798SBarry Smith idx = a->j; 8416cd98798SBarry Smith v = a->a; 8426cd98798SBarry Smith ii = a->i; 8436cd98798SBarry Smith 8446cd98798SBarry Smith for (i=0; i<m; i++) { 8456cd98798SBarry Smith jrow = ii[i]; 8466cd98798SBarry Smith n = ii[i+1] - jrow; 8476cd98798SBarry Smith sum1 = 0.0; 8486cd98798SBarry Smith sum2 = 0.0; 8496cd98798SBarry Smith sum3 = 0.0; 8506cd98798SBarry Smith sum4 = 0.0; 8516cd98798SBarry Smith sum5 = 0.0; 8526cd98798SBarry Smith sum6 = 0.0; 85326fbe8dcSKarl Rupp 854b3c51c66SMatthew Knepley nonzerorow += (n>0); 8556cd98798SBarry Smith for (j=0; j<n; j++) { 8566cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8576cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8586cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8596cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8606cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8616cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8626cd98798SBarry Smith jrow++; 8636cd98798SBarry Smith } 8646cd98798SBarry Smith y[6*i] = sum1; 8656cd98798SBarry Smith y[6*i+1] = sum2; 8666cd98798SBarry Smith y[6*i+2] = sum3; 8676cd98798SBarry Smith y[6*i+3] = sum4; 8686cd98798SBarry Smith y[6*i+4] = sum5; 8696cd98798SBarry Smith y[6*i+5] = sum6; 8706cd98798SBarry Smith } 8716cd98798SBarry Smith 872dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 8733649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8741ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8756cd98798SBarry Smith PetscFunctionReturn(0); 8766cd98798SBarry Smith } 8776cd98798SBarry Smith 878dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8796cd98798SBarry Smith { 8806cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8816cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 882d2840e09SBarry Smith const PetscScalar *x,*v; 883d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 884dfbe8321SBarry Smith PetscErrorCode ierr; 885d2840e09SBarry Smith PetscInt n,i; 886d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 8876cd98798SBarry Smith 8886cd98798SBarry Smith PetscFunctionBegin; 889d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 8903649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8911ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 892bfec09a0SHong Zhang 8936cd98798SBarry Smith for (i=0; i<m; i++) { 894bfec09a0SHong Zhang idx = a->j + a->i[i]; 895bfec09a0SHong Zhang v = a->a + a->i[i]; 8966cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8976cd98798SBarry Smith alpha1 = x[6*i]; 8986cd98798SBarry Smith alpha2 = x[6*i+1]; 8996cd98798SBarry Smith alpha3 = x[6*i+2]; 9006cd98798SBarry Smith alpha4 = x[6*i+3]; 9016cd98798SBarry Smith alpha5 = x[6*i+4]; 9026cd98798SBarry Smith alpha6 = x[6*i+5]; 9036cd98798SBarry Smith while (n-->0) { 9046cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9056cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9066cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9076cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9086cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9096cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9106cd98798SBarry Smith idx++; v++; 9116cd98798SBarry Smith } 9126cd98798SBarry Smith } 913dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9143649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9151ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9166cd98798SBarry Smith PetscFunctionReturn(0); 9176cd98798SBarry Smith } 9186cd98798SBarry Smith 919dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9206cd98798SBarry Smith { 9216cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9226cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 923d2840e09SBarry Smith const PetscScalar *x,*v; 924d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 925dfbe8321SBarry Smith PetscErrorCode ierr; 926b24ad042SBarry Smith PetscInt n,i,jrow,j; 927d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 9286cd98798SBarry Smith 9296cd98798SBarry Smith PetscFunctionBegin; 9306cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9313649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9321ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9336cd98798SBarry Smith idx = a->j; 9346cd98798SBarry Smith v = a->a; 9356cd98798SBarry Smith ii = a->i; 9366cd98798SBarry Smith 9376cd98798SBarry Smith for (i=0; i<m; i++) { 9386cd98798SBarry Smith jrow = ii[i]; 9396cd98798SBarry Smith n = ii[i+1] - jrow; 9406cd98798SBarry Smith sum1 = 0.0; 9416cd98798SBarry Smith sum2 = 0.0; 9426cd98798SBarry Smith sum3 = 0.0; 9436cd98798SBarry Smith sum4 = 0.0; 9446cd98798SBarry Smith sum5 = 0.0; 9456cd98798SBarry Smith sum6 = 0.0; 9466cd98798SBarry Smith for (j=0; j<n; j++) { 9476cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9486cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9496cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9506cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9516cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9526cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9536cd98798SBarry Smith jrow++; 9546cd98798SBarry Smith } 9556cd98798SBarry Smith y[6*i] += sum1; 9566cd98798SBarry Smith y[6*i+1] += sum2; 9576cd98798SBarry Smith y[6*i+2] += sum3; 9586cd98798SBarry Smith y[6*i+3] += sum4; 9596cd98798SBarry Smith y[6*i+4] += sum5; 9606cd98798SBarry Smith y[6*i+5] += sum6; 9616cd98798SBarry Smith } 9626cd98798SBarry Smith 963dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9643649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9651ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9666cd98798SBarry Smith PetscFunctionReturn(0); 9676cd98798SBarry Smith } 9686cd98798SBarry Smith 969dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9706cd98798SBarry Smith { 9716cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9726cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 973d2840e09SBarry Smith const PetscScalar *x,*v; 974d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 975dfbe8321SBarry Smith PetscErrorCode ierr; 976d2840e09SBarry Smith PetscInt n,i; 977d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 9786cd98798SBarry Smith 9796cd98798SBarry Smith PetscFunctionBegin; 9806cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9821ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 983bfec09a0SHong Zhang 9846cd98798SBarry Smith for (i=0; i<m; i++) { 985bfec09a0SHong Zhang idx = a->j + a->i[i]; 986bfec09a0SHong Zhang v = a->a + a->i[i]; 9876cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9886cd98798SBarry Smith alpha1 = x[6*i]; 9896cd98798SBarry Smith alpha2 = x[6*i+1]; 9906cd98798SBarry Smith alpha3 = x[6*i+2]; 9916cd98798SBarry Smith alpha4 = x[6*i+3]; 9926cd98798SBarry Smith alpha5 = x[6*i+4]; 9936cd98798SBarry Smith alpha6 = x[6*i+5]; 9946cd98798SBarry Smith while (n-->0) { 9956cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9966cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9976cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9986cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9996cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 10006cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 10016cd98798SBarry Smith idx++; v++; 10026cd98798SBarry Smith } 10036cd98798SBarry Smith } 1004dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 10053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 10061ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10076cd98798SBarry Smith PetscFunctionReturn(0); 10086cd98798SBarry Smith } 10096cd98798SBarry Smith 101066ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 1011ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1012ed8eea03SBarry Smith { 1013ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1014ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1015d2840e09SBarry Smith const PetscScalar *x,*v; 1016d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1017ed8eea03SBarry Smith PetscErrorCode ierr; 1018d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1019d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1020ed8eea03SBarry Smith 1021ed8eea03SBarry Smith PetscFunctionBegin; 10223649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1023ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1024ed8eea03SBarry Smith idx = a->j; 1025ed8eea03SBarry Smith v = a->a; 1026ed8eea03SBarry Smith ii = a->i; 1027ed8eea03SBarry Smith 1028ed8eea03SBarry Smith for (i=0; i<m; i++) { 1029ed8eea03SBarry Smith jrow = ii[i]; 1030ed8eea03SBarry Smith n = ii[i+1] - jrow; 1031ed8eea03SBarry Smith sum1 = 0.0; 1032ed8eea03SBarry Smith sum2 = 0.0; 1033ed8eea03SBarry Smith sum3 = 0.0; 1034ed8eea03SBarry Smith sum4 = 0.0; 1035ed8eea03SBarry Smith sum5 = 0.0; 1036ed8eea03SBarry Smith sum6 = 0.0; 1037ed8eea03SBarry Smith sum7 = 0.0; 103826fbe8dcSKarl Rupp 1039b3c51c66SMatthew Knepley nonzerorow += (n>0); 1040ed8eea03SBarry Smith for (j=0; j<n; j++) { 1041ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1042ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1043ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1044ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1045ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1046ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1047ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1048ed8eea03SBarry Smith jrow++; 1049ed8eea03SBarry Smith } 1050ed8eea03SBarry Smith y[7*i] = sum1; 1051ed8eea03SBarry Smith y[7*i+1] = sum2; 1052ed8eea03SBarry Smith y[7*i+2] = sum3; 1053ed8eea03SBarry Smith y[7*i+3] = sum4; 1054ed8eea03SBarry Smith y[7*i+4] = sum5; 1055ed8eea03SBarry Smith y[7*i+5] = sum6; 1056ed8eea03SBarry Smith y[7*i+6] = sum7; 1057ed8eea03SBarry Smith } 1058ed8eea03SBarry Smith 1059dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 10603649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1061ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1062ed8eea03SBarry Smith PetscFunctionReturn(0); 1063ed8eea03SBarry Smith } 1064ed8eea03SBarry Smith 1065ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1066ed8eea03SBarry Smith { 1067ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1068ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1069d2840e09SBarry Smith const PetscScalar *x,*v; 1070d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1071ed8eea03SBarry Smith PetscErrorCode ierr; 1072d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1073d2840e09SBarry Smith PetscInt n,i; 1074ed8eea03SBarry Smith 1075ed8eea03SBarry Smith PetscFunctionBegin; 1076d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 10773649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1078ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1079ed8eea03SBarry Smith 1080ed8eea03SBarry Smith for (i=0; i<m; i++) { 1081ed8eea03SBarry Smith idx = a->j + a->i[i]; 1082ed8eea03SBarry Smith v = a->a + a->i[i]; 1083ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1084ed8eea03SBarry Smith alpha1 = x[7*i]; 1085ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1086ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1087ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1088ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1089ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1090ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1091ed8eea03SBarry Smith while (n-->0) { 1092ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1093ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1094ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1095ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1096ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1097ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1098ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1099ed8eea03SBarry Smith idx++; v++; 1100ed8eea03SBarry Smith } 1101ed8eea03SBarry Smith } 1102dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11033649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1104ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1105ed8eea03SBarry Smith PetscFunctionReturn(0); 1106ed8eea03SBarry Smith } 1107ed8eea03SBarry Smith 1108ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1109ed8eea03SBarry Smith { 1110ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1111ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1112d2840e09SBarry Smith const PetscScalar *x,*v; 1113d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1114ed8eea03SBarry Smith PetscErrorCode ierr; 1115d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1116ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1117ed8eea03SBarry Smith 1118ed8eea03SBarry Smith PetscFunctionBegin; 1119ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11203649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1121ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1122ed8eea03SBarry Smith idx = a->j; 1123ed8eea03SBarry Smith v = a->a; 1124ed8eea03SBarry Smith ii = a->i; 1125ed8eea03SBarry Smith 1126ed8eea03SBarry Smith for (i=0; i<m; i++) { 1127ed8eea03SBarry Smith jrow = ii[i]; 1128ed8eea03SBarry Smith n = ii[i+1] - jrow; 1129ed8eea03SBarry Smith sum1 = 0.0; 1130ed8eea03SBarry Smith sum2 = 0.0; 1131ed8eea03SBarry Smith sum3 = 0.0; 1132ed8eea03SBarry Smith sum4 = 0.0; 1133ed8eea03SBarry Smith sum5 = 0.0; 1134ed8eea03SBarry Smith sum6 = 0.0; 1135ed8eea03SBarry Smith sum7 = 0.0; 1136ed8eea03SBarry Smith for (j=0; j<n; j++) { 1137ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1138ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1139ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1140ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1141ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1142ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1143ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1144ed8eea03SBarry Smith jrow++; 1145ed8eea03SBarry Smith } 1146ed8eea03SBarry Smith y[7*i] += sum1; 1147ed8eea03SBarry Smith y[7*i+1] += sum2; 1148ed8eea03SBarry Smith y[7*i+2] += sum3; 1149ed8eea03SBarry Smith y[7*i+3] += sum4; 1150ed8eea03SBarry Smith y[7*i+4] += sum5; 1151ed8eea03SBarry Smith y[7*i+5] += sum6; 1152ed8eea03SBarry Smith y[7*i+6] += sum7; 1153ed8eea03SBarry Smith } 1154ed8eea03SBarry Smith 1155dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11563649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1157ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1158ed8eea03SBarry Smith PetscFunctionReturn(0); 1159ed8eea03SBarry Smith } 1160ed8eea03SBarry Smith 1161ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1162ed8eea03SBarry Smith { 1163ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1164ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1165d2840e09SBarry Smith const PetscScalar *x,*v; 1166d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1167ed8eea03SBarry Smith PetscErrorCode ierr; 1168d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1169d2840e09SBarry Smith PetscInt n,i; 1170ed8eea03SBarry Smith 1171ed8eea03SBarry Smith PetscFunctionBegin; 1172ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11733649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1174ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1175ed8eea03SBarry Smith for (i=0; i<m; i++) { 1176ed8eea03SBarry Smith idx = a->j + a->i[i]; 1177ed8eea03SBarry Smith v = a->a + a->i[i]; 1178ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1179ed8eea03SBarry Smith alpha1 = x[7*i]; 1180ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1181ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1182ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1183ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1184ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1185ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1186ed8eea03SBarry Smith while (n-->0) { 1187ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1188ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1189ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1190ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1191ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1192ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1193ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1194ed8eea03SBarry Smith idx++; v++; 1195ed8eea03SBarry Smith } 1196ed8eea03SBarry Smith } 1197dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11983649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1199ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1200ed8eea03SBarry Smith PetscFunctionReturn(0); 1201ed8eea03SBarry Smith } 1202ed8eea03SBarry Smith 1203dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 120466ed3db0SBarry Smith { 120566ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 120666ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1207d2840e09SBarry Smith const PetscScalar *x,*v; 1208d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1209dfbe8321SBarry Smith PetscErrorCode ierr; 1210d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1211d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 121266ed3db0SBarry Smith 121366ed3db0SBarry Smith PetscFunctionBegin; 12143649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12151ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 121666ed3db0SBarry Smith idx = a->j; 121766ed3db0SBarry Smith v = a->a; 121866ed3db0SBarry Smith ii = a->i; 121966ed3db0SBarry Smith 122066ed3db0SBarry Smith for (i=0; i<m; i++) { 122166ed3db0SBarry Smith jrow = ii[i]; 122266ed3db0SBarry Smith n = ii[i+1] - jrow; 122366ed3db0SBarry Smith sum1 = 0.0; 122466ed3db0SBarry Smith sum2 = 0.0; 122566ed3db0SBarry Smith sum3 = 0.0; 122666ed3db0SBarry Smith sum4 = 0.0; 122766ed3db0SBarry Smith sum5 = 0.0; 122866ed3db0SBarry Smith sum6 = 0.0; 122966ed3db0SBarry Smith sum7 = 0.0; 123066ed3db0SBarry Smith sum8 = 0.0; 123126fbe8dcSKarl Rupp 1232b3c51c66SMatthew Knepley nonzerorow += (n>0); 123366ed3db0SBarry Smith for (j=0; j<n; j++) { 123466ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 123566ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 123666ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 123766ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 123866ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 123966ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 124066ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 124166ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 124266ed3db0SBarry Smith jrow++; 124366ed3db0SBarry Smith } 124466ed3db0SBarry Smith y[8*i] = sum1; 124566ed3db0SBarry Smith y[8*i+1] = sum2; 124666ed3db0SBarry Smith y[8*i+2] = sum3; 124766ed3db0SBarry Smith y[8*i+3] = sum4; 124866ed3db0SBarry Smith y[8*i+4] = sum5; 124966ed3db0SBarry Smith y[8*i+5] = sum6; 125066ed3db0SBarry Smith y[8*i+6] = sum7; 125166ed3db0SBarry Smith y[8*i+7] = sum8; 125266ed3db0SBarry Smith } 125366ed3db0SBarry Smith 1254dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 12553649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 12561ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 125766ed3db0SBarry Smith PetscFunctionReturn(0); 125866ed3db0SBarry Smith } 125966ed3db0SBarry Smith 1260dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 126166ed3db0SBarry Smith { 126266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 126366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1264d2840e09SBarry Smith const PetscScalar *x,*v; 1265d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1266dfbe8321SBarry Smith PetscErrorCode ierr; 1267d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1268d2840e09SBarry Smith PetscInt n,i; 126966ed3db0SBarry Smith 127066ed3db0SBarry Smith PetscFunctionBegin; 1271d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 12723649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12731ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1274bfec09a0SHong Zhang 127566ed3db0SBarry Smith for (i=0; i<m; i++) { 1276bfec09a0SHong Zhang idx = a->j + a->i[i]; 1277bfec09a0SHong Zhang v = a->a + a->i[i]; 127866ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 127966ed3db0SBarry Smith alpha1 = x[8*i]; 128066ed3db0SBarry Smith alpha2 = x[8*i+1]; 128166ed3db0SBarry Smith alpha3 = x[8*i+2]; 128266ed3db0SBarry Smith alpha4 = x[8*i+3]; 128366ed3db0SBarry Smith alpha5 = x[8*i+4]; 128466ed3db0SBarry Smith alpha6 = x[8*i+5]; 128566ed3db0SBarry Smith alpha7 = x[8*i+6]; 128666ed3db0SBarry Smith alpha8 = x[8*i+7]; 128766ed3db0SBarry Smith while (n-->0) { 128866ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 128966ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 129066ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 129166ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 129266ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 129366ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 129466ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 129566ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 129666ed3db0SBarry Smith idx++; v++; 129766ed3db0SBarry Smith } 129866ed3db0SBarry Smith } 1299dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13003649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13011ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 130266ed3db0SBarry Smith PetscFunctionReturn(0); 130366ed3db0SBarry Smith } 130466ed3db0SBarry Smith 1305dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 130666ed3db0SBarry Smith { 130766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 130866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1309d2840e09SBarry Smith const PetscScalar *x,*v; 1310d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1311dfbe8321SBarry Smith PetscErrorCode ierr; 1312d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1313b24ad042SBarry Smith PetscInt n,i,jrow,j; 131466ed3db0SBarry Smith 131566ed3db0SBarry Smith PetscFunctionBegin; 131666ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13173649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13181ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 131966ed3db0SBarry Smith idx = a->j; 132066ed3db0SBarry Smith v = a->a; 132166ed3db0SBarry Smith ii = a->i; 132266ed3db0SBarry Smith 132366ed3db0SBarry Smith for (i=0; i<m; i++) { 132466ed3db0SBarry Smith jrow = ii[i]; 132566ed3db0SBarry Smith n = ii[i+1] - jrow; 132666ed3db0SBarry Smith sum1 = 0.0; 132766ed3db0SBarry Smith sum2 = 0.0; 132866ed3db0SBarry Smith sum3 = 0.0; 132966ed3db0SBarry Smith sum4 = 0.0; 133066ed3db0SBarry Smith sum5 = 0.0; 133166ed3db0SBarry Smith sum6 = 0.0; 133266ed3db0SBarry Smith sum7 = 0.0; 133366ed3db0SBarry Smith sum8 = 0.0; 133466ed3db0SBarry Smith for (j=0; j<n; j++) { 133566ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 133666ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 133766ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 133866ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 133966ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 134066ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 134166ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 134266ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 134366ed3db0SBarry Smith jrow++; 134466ed3db0SBarry Smith } 134566ed3db0SBarry Smith y[8*i] += sum1; 134666ed3db0SBarry Smith y[8*i+1] += sum2; 134766ed3db0SBarry Smith y[8*i+2] += sum3; 134866ed3db0SBarry Smith y[8*i+3] += sum4; 134966ed3db0SBarry Smith y[8*i+4] += sum5; 135066ed3db0SBarry Smith y[8*i+5] += sum6; 135166ed3db0SBarry Smith y[8*i+6] += sum7; 135266ed3db0SBarry Smith y[8*i+7] += sum8; 135366ed3db0SBarry Smith } 135466ed3db0SBarry Smith 1355dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13563649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13571ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 135866ed3db0SBarry Smith PetscFunctionReturn(0); 135966ed3db0SBarry Smith } 136066ed3db0SBarry Smith 1361dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 136266ed3db0SBarry Smith { 136366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 136466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1365d2840e09SBarry Smith const PetscScalar *x,*v; 1366d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1367dfbe8321SBarry Smith PetscErrorCode ierr; 1368d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1369d2840e09SBarry Smith PetscInt n,i; 137066ed3db0SBarry Smith 137166ed3db0SBarry Smith PetscFunctionBegin; 137266ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13733649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13741ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 137566ed3db0SBarry Smith for (i=0; i<m; i++) { 1376bfec09a0SHong Zhang idx = a->j + a->i[i]; 1377bfec09a0SHong Zhang v = a->a + a->i[i]; 137866ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 137966ed3db0SBarry Smith alpha1 = x[8*i]; 138066ed3db0SBarry Smith alpha2 = x[8*i+1]; 138166ed3db0SBarry Smith alpha3 = x[8*i+2]; 138266ed3db0SBarry Smith alpha4 = x[8*i+3]; 138366ed3db0SBarry Smith alpha5 = x[8*i+4]; 138466ed3db0SBarry Smith alpha6 = x[8*i+5]; 138566ed3db0SBarry Smith alpha7 = x[8*i+6]; 138666ed3db0SBarry Smith alpha8 = x[8*i+7]; 138766ed3db0SBarry Smith while (n-->0) { 138866ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 138966ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 139066ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 139166ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 139266ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 139366ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 139466ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 139566ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 139666ed3db0SBarry Smith idx++; v++; 139766ed3db0SBarry Smith } 139866ed3db0SBarry Smith } 1399dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 14003649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14011ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 14022f7816d4SBarry Smith PetscFunctionReturn(0); 14032f7816d4SBarry Smith } 14042f7816d4SBarry Smith 14052b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 1406dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14072b692628SMatthew Knepley { 14082b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14092b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1410d2840e09SBarry Smith const PetscScalar *x,*v; 1411d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1412dfbe8321SBarry Smith PetscErrorCode ierr; 1413d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1414d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 14152b692628SMatthew Knepley 14162b692628SMatthew Knepley PetscFunctionBegin; 14173649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14181ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14192b692628SMatthew Knepley idx = a->j; 14202b692628SMatthew Knepley v = a->a; 14212b692628SMatthew Knepley ii = a->i; 14222b692628SMatthew Knepley 14232b692628SMatthew Knepley for (i=0; i<m; i++) { 14242b692628SMatthew Knepley jrow = ii[i]; 14252b692628SMatthew Knepley n = ii[i+1] - jrow; 14262b692628SMatthew Knepley sum1 = 0.0; 14272b692628SMatthew Knepley sum2 = 0.0; 14282b692628SMatthew Knepley sum3 = 0.0; 14292b692628SMatthew Knepley sum4 = 0.0; 14302b692628SMatthew Knepley sum5 = 0.0; 14312b692628SMatthew Knepley sum6 = 0.0; 14322b692628SMatthew Knepley sum7 = 0.0; 14332b692628SMatthew Knepley sum8 = 0.0; 14342b692628SMatthew Knepley sum9 = 0.0; 143526fbe8dcSKarl Rupp 1436b3c51c66SMatthew Knepley nonzerorow += (n>0); 14372b692628SMatthew Knepley for (j=0; j<n; j++) { 14382b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14392b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14402b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14412b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14422b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14432b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14442b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14452b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14462b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14472b692628SMatthew Knepley jrow++; 14482b692628SMatthew Knepley } 14492b692628SMatthew Knepley y[9*i] = sum1; 14502b692628SMatthew Knepley y[9*i+1] = sum2; 14512b692628SMatthew Knepley y[9*i+2] = sum3; 14522b692628SMatthew Knepley y[9*i+3] = sum4; 14532b692628SMatthew Knepley y[9*i+4] = sum5; 14542b692628SMatthew Knepley y[9*i+5] = sum6; 14552b692628SMatthew Knepley y[9*i+6] = sum7; 14562b692628SMatthew Knepley y[9*i+7] = sum8; 14572b692628SMatthew Knepley y[9*i+8] = sum9; 14582b692628SMatthew Knepley } 14592b692628SMatthew Knepley 1460dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 14613649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14621ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14632b692628SMatthew Knepley PetscFunctionReturn(0); 14642b692628SMatthew Knepley } 14652b692628SMatthew Knepley 1466b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1467b9cda22cSBarry Smith 1468dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14692b692628SMatthew Knepley { 14702b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14712b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1472d2840e09SBarry Smith const PetscScalar *x,*v; 1473d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1474dfbe8321SBarry Smith PetscErrorCode ierr; 1475d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1476d2840e09SBarry Smith PetscInt n,i; 14772b692628SMatthew Knepley 14782b692628SMatthew Knepley PetscFunctionBegin; 1479d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 14803649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14811ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14822b692628SMatthew Knepley 14832b692628SMatthew Knepley for (i=0; i<m; i++) { 14842b692628SMatthew Knepley idx = a->j + a->i[i]; 14852b692628SMatthew Knepley v = a->a + a->i[i]; 14862b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14872b692628SMatthew Knepley alpha1 = x[9*i]; 14882b692628SMatthew Knepley alpha2 = x[9*i+1]; 14892b692628SMatthew Knepley alpha3 = x[9*i+2]; 14902b692628SMatthew Knepley alpha4 = x[9*i+3]; 14912b692628SMatthew Knepley alpha5 = x[9*i+4]; 14922b692628SMatthew Knepley alpha6 = x[9*i+5]; 14932b692628SMatthew Knepley alpha7 = x[9*i+6]; 14942b692628SMatthew Knepley alpha8 = x[9*i+7]; 14952f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14962b692628SMatthew Knepley while (n-->0) { 14972b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 14982b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 14992b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15002b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15012b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15022b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15032b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15042b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15052b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15062b692628SMatthew Knepley idx++; v++; 15072b692628SMatthew Knepley } 15082b692628SMatthew Knepley } 1509dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 15103649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15111ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15122b692628SMatthew Knepley PetscFunctionReturn(0); 15132b692628SMatthew Knepley } 15142b692628SMatthew Knepley 1515dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15162b692628SMatthew Knepley { 15172b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15182b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1519d2840e09SBarry Smith const PetscScalar *x,*v; 1520d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1521dfbe8321SBarry Smith PetscErrorCode ierr; 1522d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1523b24ad042SBarry Smith PetscInt n,i,jrow,j; 15242b692628SMatthew Knepley 15252b692628SMatthew Knepley PetscFunctionBegin; 15262b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15273649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15281ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15292b692628SMatthew Knepley idx = a->j; 15302b692628SMatthew Knepley v = a->a; 15312b692628SMatthew Knepley ii = a->i; 15322b692628SMatthew Knepley 15332b692628SMatthew Knepley for (i=0; i<m; i++) { 15342b692628SMatthew Knepley jrow = ii[i]; 15352b692628SMatthew Knepley n = ii[i+1] - jrow; 15362b692628SMatthew Knepley sum1 = 0.0; 15372b692628SMatthew Knepley sum2 = 0.0; 15382b692628SMatthew Knepley sum3 = 0.0; 15392b692628SMatthew Knepley sum4 = 0.0; 15402b692628SMatthew Knepley sum5 = 0.0; 15412b692628SMatthew Knepley sum6 = 0.0; 15422b692628SMatthew Knepley sum7 = 0.0; 15432b692628SMatthew Knepley sum8 = 0.0; 15442b692628SMatthew Knepley sum9 = 0.0; 15452b692628SMatthew Knepley for (j=0; j<n; j++) { 15462b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15472b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15482b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15492b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15502b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15512b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15522b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15532b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15542b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15552b692628SMatthew Knepley jrow++; 15562b692628SMatthew Knepley } 15572b692628SMatthew Knepley y[9*i] += sum1; 15582b692628SMatthew Knepley y[9*i+1] += sum2; 15592b692628SMatthew Knepley y[9*i+2] += sum3; 15602b692628SMatthew Knepley y[9*i+3] += sum4; 15612b692628SMatthew Knepley y[9*i+4] += sum5; 15622b692628SMatthew Knepley y[9*i+5] += sum6; 15632b692628SMatthew Knepley y[9*i+6] += sum7; 15642b692628SMatthew Knepley y[9*i+7] += sum8; 15652b692628SMatthew Knepley y[9*i+8] += sum9; 15662b692628SMatthew Knepley } 15672b692628SMatthew Knepley 1568efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15693649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15701ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15712b692628SMatthew Knepley PetscFunctionReturn(0); 15722b692628SMatthew Knepley } 15732b692628SMatthew Knepley 1574dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15752b692628SMatthew Knepley { 15762b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15772b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1578d2840e09SBarry Smith const PetscScalar *x,*v; 1579d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1580dfbe8321SBarry Smith PetscErrorCode ierr; 1581d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1582d2840e09SBarry Smith PetscInt n,i; 15832b692628SMatthew Knepley 15842b692628SMatthew Knepley PetscFunctionBegin; 15852b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15871ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15882b692628SMatthew Knepley for (i=0; i<m; i++) { 15892b692628SMatthew Knepley idx = a->j + a->i[i]; 15902b692628SMatthew Knepley v = a->a + a->i[i]; 15912b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15922b692628SMatthew Knepley alpha1 = x[9*i]; 15932b692628SMatthew Knepley alpha2 = x[9*i+1]; 15942b692628SMatthew Knepley alpha3 = x[9*i+2]; 15952b692628SMatthew Knepley alpha4 = x[9*i+3]; 15962b692628SMatthew Knepley alpha5 = x[9*i+4]; 15972b692628SMatthew Knepley alpha6 = x[9*i+5]; 15982b692628SMatthew Knepley alpha7 = x[9*i+6]; 15992b692628SMatthew Knepley alpha8 = x[9*i+7]; 16002b692628SMatthew Knepley alpha9 = x[9*i+8]; 16012b692628SMatthew Knepley while (n-->0) { 16022b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 16032b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 16042b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 16052b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 16062b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 16072b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 16082b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 16092b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 16102b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 16112b692628SMatthew Knepley idx++; v++; 16122b692628SMatthew Knepley } 16132b692628SMatthew Knepley } 1614dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 16153649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16161ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16172b692628SMatthew Knepley PetscFunctionReturn(0); 16182b692628SMatthew Knepley } 1619b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1620b9cda22cSBarry Smith { 1621b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1622b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1623d2840e09SBarry Smith const PetscScalar *x,*v; 1624d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1625b9cda22cSBarry Smith PetscErrorCode ierr; 1626d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1627d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1628b9cda22cSBarry Smith 1629b9cda22cSBarry Smith PetscFunctionBegin; 16303649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1631b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1632b9cda22cSBarry Smith idx = a->j; 1633b9cda22cSBarry Smith v = a->a; 1634b9cda22cSBarry Smith ii = a->i; 1635b9cda22cSBarry Smith 1636b9cda22cSBarry Smith for (i=0; i<m; i++) { 1637b9cda22cSBarry Smith jrow = ii[i]; 1638b9cda22cSBarry Smith n = ii[i+1] - jrow; 1639b9cda22cSBarry Smith sum1 = 0.0; 1640b9cda22cSBarry Smith sum2 = 0.0; 1641b9cda22cSBarry Smith sum3 = 0.0; 1642b9cda22cSBarry Smith sum4 = 0.0; 1643b9cda22cSBarry Smith sum5 = 0.0; 1644b9cda22cSBarry Smith sum6 = 0.0; 1645b9cda22cSBarry Smith sum7 = 0.0; 1646b9cda22cSBarry Smith sum8 = 0.0; 1647b9cda22cSBarry Smith sum9 = 0.0; 1648b9cda22cSBarry Smith sum10 = 0.0; 164926fbe8dcSKarl Rupp 1650b3c51c66SMatthew Knepley nonzerorow += (n>0); 1651b9cda22cSBarry Smith for (j=0; j<n; j++) { 1652b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1653b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1654b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1655b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1656b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1657b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1658b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1659b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1660b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1661b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1662b9cda22cSBarry Smith jrow++; 1663b9cda22cSBarry Smith } 1664b9cda22cSBarry Smith y[10*i] = sum1; 1665b9cda22cSBarry Smith y[10*i+1] = sum2; 1666b9cda22cSBarry Smith y[10*i+2] = sum3; 1667b9cda22cSBarry Smith y[10*i+3] = sum4; 1668b9cda22cSBarry Smith y[10*i+4] = sum5; 1669b9cda22cSBarry Smith y[10*i+5] = sum6; 1670b9cda22cSBarry Smith y[10*i+6] = sum7; 1671b9cda22cSBarry Smith y[10*i+7] = sum8; 1672b9cda22cSBarry Smith y[10*i+8] = sum9; 1673b9cda22cSBarry Smith y[10*i+9] = sum10; 1674b9cda22cSBarry Smith } 1675b9cda22cSBarry Smith 1676dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 16773649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1678b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1679b9cda22cSBarry Smith PetscFunctionReturn(0); 1680b9cda22cSBarry Smith } 1681b9cda22cSBarry Smith 1682b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1683b9cda22cSBarry Smith { 1684b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1685b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1686d2840e09SBarry Smith const PetscScalar *x,*v; 1687d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1688b9cda22cSBarry Smith PetscErrorCode ierr; 1689d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1690b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1691b9cda22cSBarry Smith 1692b9cda22cSBarry Smith PetscFunctionBegin; 1693b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16943649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1695b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1696b9cda22cSBarry Smith idx = a->j; 1697b9cda22cSBarry Smith v = a->a; 1698b9cda22cSBarry Smith ii = a->i; 1699b9cda22cSBarry Smith 1700b9cda22cSBarry Smith for (i=0; i<m; i++) { 1701b9cda22cSBarry Smith jrow = ii[i]; 1702b9cda22cSBarry Smith n = ii[i+1] - jrow; 1703b9cda22cSBarry Smith sum1 = 0.0; 1704b9cda22cSBarry Smith sum2 = 0.0; 1705b9cda22cSBarry Smith sum3 = 0.0; 1706b9cda22cSBarry Smith sum4 = 0.0; 1707b9cda22cSBarry Smith sum5 = 0.0; 1708b9cda22cSBarry Smith sum6 = 0.0; 1709b9cda22cSBarry Smith sum7 = 0.0; 1710b9cda22cSBarry Smith sum8 = 0.0; 1711b9cda22cSBarry Smith sum9 = 0.0; 1712b9cda22cSBarry Smith sum10 = 0.0; 1713b9cda22cSBarry Smith for (j=0; j<n; j++) { 1714b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1715b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1716b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1717b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1718b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1719b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1720b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1721b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1722b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1723b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1724b9cda22cSBarry Smith jrow++; 1725b9cda22cSBarry Smith } 1726b9cda22cSBarry Smith y[10*i] += sum1; 1727b9cda22cSBarry Smith y[10*i+1] += sum2; 1728b9cda22cSBarry Smith y[10*i+2] += sum3; 1729b9cda22cSBarry Smith y[10*i+3] += sum4; 1730b9cda22cSBarry Smith y[10*i+4] += sum5; 1731b9cda22cSBarry Smith y[10*i+5] += sum6; 1732b9cda22cSBarry Smith y[10*i+6] += sum7; 1733b9cda22cSBarry Smith y[10*i+7] += sum8; 1734b9cda22cSBarry Smith y[10*i+8] += sum9; 1735b9cda22cSBarry Smith y[10*i+9] += sum10; 1736b9cda22cSBarry Smith } 1737b9cda22cSBarry Smith 1738dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 17393649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1740b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1741b9cda22cSBarry Smith PetscFunctionReturn(0); 1742b9cda22cSBarry Smith } 1743b9cda22cSBarry Smith 1744b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1745b9cda22cSBarry Smith { 1746b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1747b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1748d2840e09SBarry Smith const PetscScalar *x,*v; 1749d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1750b9cda22cSBarry Smith PetscErrorCode ierr; 1751d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1752d2840e09SBarry Smith PetscInt n,i; 1753b9cda22cSBarry Smith 1754b9cda22cSBarry Smith PetscFunctionBegin; 1755d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 17563649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1757b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1758b9cda22cSBarry Smith 1759b9cda22cSBarry Smith for (i=0; i<m; i++) { 1760b9cda22cSBarry Smith idx = a->j + a->i[i]; 1761b9cda22cSBarry Smith v = a->a + a->i[i]; 1762b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1763e29fdc22SBarry Smith alpha1 = x[10*i]; 1764e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1765e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1766e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1767e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1768e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1769e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1770e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1771e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1772b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1773b9cda22cSBarry Smith while (n-->0) { 1774e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1775e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1776e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1777e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1778e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1779e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1780e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1781e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1782e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1783b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1784b9cda22cSBarry Smith idx++; v++; 1785b9cda22cSBarry Smith } 1786b9cda22cSBarry Smith } 1787dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 17883649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1789b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1790b9cda22cSBarry Smith PetscFunctionReturn(0); 1791b9cda22cSBarry Smith } 1792b9cda22cSBarry Smith 1793b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1794b9cda22cSBarry Smith { 1795b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1796b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1797d2840e09SBarry Smith const PetscScalar *x,*v; 1798d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1799b9cda22cSBarry Smith PetscErrorCode ierr; 1800d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1801d2840e09SBarry Smith PetscInt n,i; 1802b9cda22cSBarry Smith 1803b9cda22cSBarry Smith PetscFunctionBegin; 1804b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 18053649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1806b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1807b9cda22cSBarry Smith for (i=0; i<m; i++) { 1808b9cda22cSBarry Smith idx = a->j + a->i[i]; 1809b9cda22cSBarry Smith v = a->a + a->i[i]; 1810b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1811b9cda22cSBarry Smith alpha1 = x[10*i]; 1812b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1813b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1814b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1815b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1816b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1817b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1818b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1819b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1820b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1821b9cda22cSBarry Smith while (n-->0) { 1822b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1823b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1824b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1825b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1826b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1827b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1828b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1829b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1830b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1831b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1832b9cda22cSBarry Smith idx++; v++; 1833b9cda22cSBarry Smith } 1834b9cda22cSBarry Smith } 1835dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18363649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1837b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1838b9cda22cSBarry Smith PetscFunctionReturn(0); 1839b9cda22cSBarry Smith } 1840b9cda22cSBarry Smith 18412b692628SMatthew Knepley 18422f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 1843dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1844dbdd7285SBarry Smith { 1845dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1846dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1847d2840e09SBarry Smith const PetscScalar *x,*v; 1848d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1849dbdd7285SBarry Smith PetscErrorCode ierr; 1850d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1851d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1852dbdd7285SBarry Smith 1853dbdd7285SBarry Smith PetscFunctionBegin; 18543649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1855dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1856dbdd7285SBarry Smith idx = a->j; 1857dbdd7285SBarry Smith v = a->a; 1858dbdd7285SBarry Smith ii = a->i; 1859dbdd7285SBarry Smith 1860dbdd7285SBarry Smith for (i=0; i<m; i++) { 1861dbdd7285SBarry Smith jrow = ii[i]; 1862dbdd7285SBarry Smith n = ii[i+1] - jrow; 1863dbdd7285SBarry Smith sum1 = 0.0; 1864dbdd7285SBarry Smith sum2 = 0.0; 1865dbdd7285SBarry Smith sum3 = 0.0; 1866dbdd7285SBarry Smith sum4 = 0.0; 1867dbdd7285SBarry Smith sum5 = 0.0; 1868dbdd7285SBarry Smith sum6 = 0.0; 1869dbdd7285SBarry Smith sum7 = 0.0; 1870dbdd7285SBarry Smith sum8 = 0.0; 1871dbdd7285SBarry Smith sum9 = 0.0; 1872dbdd7285SBarry Smith sum10 = 0.0; 1873dbdd7285SBarry Smith sum11 = 0.0; 187426fbe8dcSKarl Rupp 1875dbdd7285SBarry Smith nonzerorow += (n>0); 1876dbdd7285SBarry Smith for (j=0; j<n; j++) { 1877dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1878dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1879dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1880dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1881dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1882dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1883dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1884dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1885dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1886dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1887dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1888dbdd7285SBarry Smith jrow++; 1889dbdd7285SBarry Smith } 1890dbdd7285SBarry Smith y[11*i] = sum1; 1891dbdd7285SBarry Smith y[11*i+1] = sum2; 1892dbdd7285SBarry Smith y[11*i+2] = sum3; 1893dbdd7285SBarry Smith y[11*i+3] = sum4; 1894dbdd7285SBarry Smith y[11*i+4] = sum5; 1895dbdd7285SBarry Smith y[11*i+5] = sum6; 1896dbdd7285SBarry Smith y[11*i+6] = sum7; 1897dbdd7285SBarry Smith y[11*i+7] = sum8; 1898dbdd7285SBarry Smith y[11*i+8] = sum9; 1899dbdd7285SBarry Smith y[11*i+9] = sum10; 1900dbdd7285SBarry Smith y[11*i+10] = sum11; 1901dbdd7285SBarry Smith } 1902dbdd7285SBarry Smith 1903dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 19043649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1905dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1906dbdd7285SBarry Smith PetscFunctionReturn(0); 1907dbdd7285SBarry Smith } 1908dbdd7285SBarry Smith 1909dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1910dbdd7285SBarry Smith { 1911dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1912dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1913d2840e09SBarry Smith const PetscScalar *x,*v; 1914d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1915dbdd7285SBarry Smith PetscErrorCode ierr; 1916d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1917dbdd7285SBarry Smith PetscInt n,i,jrow,j; 1918dbdd7285SBarry Smith 1919dbdd7285SBarry Smith PetscFunctionBegin; 1920dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19213649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1922dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1923dbdd7285SBarry Smith idx = a->j; 1924dbdd7285SBarry Smith v = a->a; 1925dbdd7285SBarry Smith ii = a->i; 1926dbdd7285SBarry Smith 1927dbdd7285SBarry Smith for (i=0; i<m; i++) { 1928dbdd7285SBarry Smith jrow = ii[i]; 1929dbdd7285SBarry Smith n = ii[i+1] - jrow; 1930dbdd7285SBarry Smith sum1 = 0.0; 1931dbdd7285SBarry Smith sum2 = 0.0; 1932dbdd7285SBarry Smith sum3 = 0.0; 1933dbdd7285SBarry Smith sum4 = 0.0; 1934dbdd7285SBarry Smith sum5 = 0.0; 1935dbdd7285SBarry Smith sum6 = 0.0; 1936dbdd7285SBarry Smith sum7 = 0.0; 1937dbdd7285SBarry Smith sum8 = 0.0; 1938dbdd7285SBarry Smith sum9 = 0.0; 1939dbdd7285SBarry Smith sum10 = 0.0; 1940dbdd7285SBarry Smith sum11 = 0.0; 1941dbdd7285SBarry Smith for (j=0; j<n; j++) { 1942dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1943dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1944dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1945dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1946dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1947dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1948dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1949dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1950dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1951dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1952dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1953dbdd7285SBarry Smith jrow++; 1954dbdd7285SBarry Smith } 1955dbdd7285SBarry Smith y[11*i] += sum1; 1956dbdd7285SBarry Smith y[11*i+1] += sum2; 1957dbdd7285SBarry Smith y[11*i+2] += sum3; 1958dbdd7285SBarry Smith y[11*i+3] += sum4; 1959dbdd7285SBarry Smith y[11*i+4] += sum5; 1960dbdd7285SBarry Smith y[11*i+5] += sum6; 1961dbdd7285SBarry Smith y[11*i+6] += sum7; 1962dbdd7285SBarry Smith y[11*i+7] += sum8; 1963dbdd7285SBarry Smith y[11*i+8] += sum9; 1964dbdd7285SBarry Smith y[11*i+9] += sum10; 1965dbdd7285SBarry Smith y[11*i+10] += sum11; 1966dbdd7285SBarry Smith } 1967dbdd7285SBarry Smith 1968dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 19693649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1970dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1971dbdd7285SBarry Smith PetscFunctionReturn(0); 1972dbdd7285SBarry Smith } 1973dbdd7285SBarry Smith 1974dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1975dbdd7285SBarry Smith { 1976dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1977dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1978d2840e09SBarry Smith const PetscScalar *x,*v; 1979d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1980dbdd7285SBarry Smith PetscErrorCode ierr; 1981d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1982d2840e09SBarry Smith PetscInt n,i; 1983dbdd7285SBarry Smith 1984dbdd7285SBarry Smith PetscFunctionBegin; 1985d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 19863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1987dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1988dbdd7285SBarry Smith 1989dbdd7285SBarry Smith for (i=0; i<m; i++) { 1990dbdd7285SBarry Smith idx = a->j + a->i[i]; 1991dbdd7285SBarry Smith v = a->a + a->i[i]; 1992dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 1993dbdd7285SBarry Smith alpha1 = x[11*i]; 1994dbdd7285SBarry Smith alpha2 = x[11*i+1]; 1995dbdd7285SBarry Smith alpha3 = x[11*i+2]; 1996dbdd7285SBarry Smith alpha4 = x[11*i+3]; 1997dbdd7285SBarry Smith alpha5 = x[11*i+4]; 1998dbdd7285SBarry Smith alpha6 = x[11*i+5]; 1999dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2000dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2001dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2002dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2003dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2004dbdd7285SBarry Smith while (n-->0) { 2005dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2006dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2007dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2008dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2009dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2010dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2011dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2012dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2013dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2014dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2015dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2016dbdd7285SBarry Smith idx++; v++; 2017dbdd7285SBarry Smith } 2018dbdd7285SBarry Smith } 2019dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20203649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2021dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2022dbdd7285SBarry Smith PetscFunctionReturn(0); 2023dbdd7285SBarry Smith } 2024dbdd7285SBarry Smith 2025dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2026dbdd7285SBarry Smith { 2027dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2028dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2029d2840e09SBarry Smith const PetscScalar *x,*v; 2030d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2031dbdd7285SBarry Smith PetscErrorCode ierr; 2032d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2033d2840e09SBarry Smith PetscInt n,i; 2034dbdd7285SBarry Smith 2035dbdd7285SBarry Smith PetscFunctionBegin; 2036dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20373649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2038dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2039dbdd7285SBarry Smith for (i=0; i<m; i++) { 2040dbdd7285SBarry Smith idx = a->j + a->i[i]; 2041dbdd7285SBarry Smith v = a->a + a->i[i]; 2042dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2043dbdd7285SBarry Smith alpha1 = x[11*i]; 2044dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2045dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2046dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2047dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2048dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2049dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2050dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2051dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2052dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2053dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2054dbdd7285SBarry Smith while (n-->0) { 2055dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2056dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2057dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2058dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2059dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2060dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2061dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2062dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2063dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2064dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2065dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2066dbdd7285SBarry Smith idx++; v++; 2067dbdd7285SBarry Smith } 2068dbdd7285SBarry Smith } 2069dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20703649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2071dbdd7285SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2072dbdd7285SBarry Smith PetscFunctionReturn(0); 2073dbdd7285SBarry Smith } 2074dbdd7285SBarry Smith 2075dbdd7285SBarry Smith 2076dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2077dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 20782f7816d4SBarry Smith { 20792f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20802f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2081d2840e09SBarry Smith const PetscScalar *x,*v; 2082d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 20832f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2084dfbe8321SBarry Smith PetscErrorCode ierr; 2085d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2086d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 20872f7816d4SBarry Smith 20882f7816d4SBarry Smith PetscFunctionBegin; 20893649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 20901ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 20912f7816d4SBarry Smith idx = a->j; 20922f7816d4SBarry Smith v = a->a; 20932f7816d4SBarry Smith ii = a->i; 20942f7816d4SBarry Smith 20952f7816d4SBarry Smith for (i=0; i<m; i++) { 20962f7816d4SBarry Smith jrow = ii[i]; 20972f7816d4SBarry Smith n = ii[i+1] - jrow; 20982f7816d4SBarry Smith sum1 = 0.0; 20992f7816d4SBarry Smith sum2 = 0.0; 21002f7816d4SBarry Smith sum3 = 0.0; 21012f7816d4SBarry Smith sum4 = 0.0; 21022f7816d4SBarry Smith sum5 = 0.0; 21032f7816d4SBarry Smith sum6 = 0.0; 21042f7816d4SBarry Smith sum7 = 0.0; 21052f7816d4SBarry Smith sum8 = 0.0; 21062f7816d4SBarry Smith sum9 = 0.0; 21072f7816d4SBarry Smith sum10 = 0.0; 21082f7816d4SBarry Smith sum11 = 0.0; 21092f7816d4SBarry Smith sum12 = 0.0; 21102f7816d4SBarry Smith sum13 = 0.0; 21112f7816d4SBarry Smith sum14 = 0.0; 21122f7816d4SBarry Smith sum15 = 0.0; 21132f7816d4SBarry Smith sum16 = 0.0; 211426fbe8dcSKarl Rupp 2115b3c51c66SMatthew Knepley nonzerorow += (n>0); 21162f7816d4SBarry Smith for (j=0; j<n; j++) { 21172f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 21182f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 21192f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 21202f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 21212f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 21222f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 21232f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 21242f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 21252f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 21262f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 21272f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 21282f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 21292f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 21302f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 21312f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 21322f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 21332f7816d4SBarry Smith jrow++; 21342f7816d4SBarry Smith } 21352f7816d4SBarry Smith y[16*i] = sum1; 21362f7816d4SBarry Smith y[16*i+1] = sum2; 21372f7816d4SBarry Smith y[16*i+2] = sum3; 21382f7816d4SBarry Smith y[16*i+3] = sum4; 21392f7816d4SBarry Smith y[16*i+4] = sum5; 21402f7816d4SBarry Smith y[16*i+5] = sum6; 21412f7816d4SBarry Smith y[16*i+6] = sum7; 21422f7816d4SBarry Smith y[16*i+7] = sum8; 21432f7816d4SBarry Smith y[16*i+8] = sum9; 21442f7816d4SBarry Smith y[16*i+9] = sum10; 21452f7816d4SBarry Smith y[16*i+10] = sum11; 21462f7816d4SBarry Smith y[16*i+11] = sum12; 21472f7816d4SBarry Smith y[16*i+12] = sum13; 21482f7816d4SBarry Smith y[16*i+13] = sum14; 21492f7816d4SBarry Smith y[16*i+14] = sum15; 21502f7816d4SBarry Smith y[16*i+15] = sum16; 21512f7816d4SBarry Smith } 21522f7816d4SBarry Smith 2153dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 21543649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 21551ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 21562f7816d4SBarry Smith PetscFunctionReturn(0); 21572f7816d4SBarry Smith } 21582f7816d4SBarry Smith 2159dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 21602f7816d4SBarry Smith { 21612f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21622f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2163d2840e09SBarry Smith const PetscScalar *x,*v; 2164d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 21652f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2166dfbe8321SBarry Smith PetscErrorCode ierr; 2167d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2168d2840e09SBarry Smith PetscInt n,i; 21692f7816d4SBarry Smith 21702f7816d4SBarry Smith PetscFunctionBegin; 2171d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 21723649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 21731ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2174bfec09a0SHong Zhang 21752f7816d4SBarry Smith for (i=0; i<m; i++) { 2176bfec09a0SHong Zhang idx = a->j + a->i[i]; 2177bfec09a0SHong Zhang v = a->a + a->i[i]; 21782f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 21792f7816d4SBarry Smith alpha1 = x[16*i]; 21802f7816d4SBarry Smith alpha2 = x[16*i+1]; 21812f7816d4SBarry Smith alpha3 = x[16*i+2]; 21822f7816d4SBarry Smith alpha4 = x[16*i+3]; 21832f7816d4SBarry Smith alpha5 = x[16*i+4]; 21842f7816d4SBarry Smith alpha6 = x[16*i+5]; 21852f7816d4SBarry Smith alpha7 = x[16*i+6]; 21862f7816d4SBarry Smith alpha8 = x[16*i+7]; 21872f7816d4SBarry Smith alpha9 = x[16*i+8]; 21882f7816d4SBarry Smith alpha10 = x[16*i+9]; 21892f7816d4SBarry Smith alpha11 = x[16*i+10]; 21902f7816d4SBarry Smith alpha12 = x[16*i+11]; 21912f7816d4SBarry Smith alpha13 = x[16*i+12]; 21922f7816d4SBarry Smith alpha14 = x[16*i+13]; 21932f7816d4SBarry Smith alpha15 = x[16*i+14]; 21942f7816d4SBarry Smith alpha16 = x[16*i+15]; 21952f7816d4SBarry Smith while (n-->0) { 21962f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 21972f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 21982f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 21992f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 22002f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 22012f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 22022f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 22032f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 22042f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 22052f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 22062f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 22072f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 22082f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 22092f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 22102f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 22112f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 22122f7816d4SBarry Smith idx++; v++; 22132f7816d4SBarry Smith } 22142f7816d4SBarry Smith } 2215dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 22163649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22171ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 22182f7816d4SBarry Smith PetscFunctionReturn(0); 22192f7816d4SBarry Smith } 22202f7816d4SBarry Smith 2221dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 22222f7816d4SBarry Smith { 22232f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22242f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2225d2840e09SBarry Smith const PetscScalar *x,*v; 2226d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 22272f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2228dfbe8321SBarry Smith PetscErrorCode ierr; 2229d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2230b24ad042SBarry Smith PetscInt n,i,jrow,j; 22312f7816d4SBarry Smith 22322f7816d4SBarry Smith PetscFunctionBegin; 22332f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 22343649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 22351ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 22362f7816d4SBarry Smith idx = a->j; 22372f7816d4SBarry Smith v = a->a; 22382f7816d4SBarry Smith ii = a->i; 22392f7816d4SBarry Smith 22402f7816d4SBarry Smith for (i=0; i<m; i++) { 22412f7816d4SBarry Smith jrow = ii[i]; 22422f7816d4SBarry Smith n = ii[i+1] - jrow; 22432f7816d4SBarry Smith sum1 = 0.0; 22442f7816d4SBarry Smith sum2 = 0.0; 22452f7816d4SBarry Smith sum3 = 0.0; 22462f7816d4SBarry Smith sum4 = 0.0; 22472f7816d4SBarry Smith sum5 = 0.0; 22482f7816d4SBarry Smith sum6 = 0.0; 22492f7816d4SBarry Smith sum7 = 0.0; 22502f7816d4SBarry Smith sum8 = 0.0; 22512f7816d4SBarry Smith sum9 = 0.0; 22522f7816d4SBarry Smith sum10 = 0.0; 22532f7816d4SBarry Smith sum11 = 0.0; 22542f7816d4SBarry Smith sum12 = 0.0; 22552f7816d4SBarry Smith sum13 = 0.0; 22562f7816d4SBarry Smith sum14 = 0.0; 22572f7816d4SBarry Smith sum15 = 0.0; 22582f7816d4SBarry Smith sum16 = 0.0; 22592f7816d4SBarry Smith for (j=0; j<n; j++) { 22602f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 22612f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 22622f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 22632f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 22642f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 22652f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 22662f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 22672f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 22682f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 22692f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 22702f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 22712f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 22722f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 22732f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 22742f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 22752f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 22762f7816d4SBarry Smith jrow++; 22772f7816d4SBarry Smith } 22782f7816d4SBarry Smith y[16*i] += sum1; 22792f7816d4SBarry Smith y[16*i+1] += sum2; 22802f7816d4SBarry Smith y[16*i+2] += sum3; 22812f7816d4SBarry Smith y[16*i+3] += sum4; 22822f7816d4SBarry Smith y[16*i+4] += sum5; 22832f7816d4SBarry Smith y[16*i+5] += sum6; 22842f7816d4SBarry Smith y[16*i+6] += sum7; 22852f7816d4SBarry Smith y[16*i+7] += sum8; 22862f7816d4SBarry Smith y[16*i+8] += sum9; 22872f7816d4SBarry Smith y[16*i+9] += sum10; 22882f7816d4SBarry Smith y[16*i+10] += sum11; 22892f7816d4SBarry Smith y[16*i+11] += sum12; 22902f7816d4SBarry Smith y[16*i+12] += sum13; 22912f7816d4SBarry Smith y[16*i+13] += sum14; 22922f7816d4SBarry Smith y[16*i+14] += sum15; 22932f7816d4SBarry Smith y[16*i+15] += sum16; 22942f7816d4SBarry Smith } 22952f7816d4SBarry Smith 2296dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 22973649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22981ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 22992f7816d4SBarry Smith PetscFunctionReturn(0); 23002f7816d4SBarry Smith } 23012f7816d4SBarry Smith 2302dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 23032f7816d4SBarry Smith { 23042f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 23052f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2306d2840e09SBarry Smith const PetscScalar *x,*v; 2307d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 23082f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2309dfbe8321SBarry Smith PetscErrorCode ierr; 2310d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2311d2840e09SBarry Smith PetscInt n,i; 23122f7816d4SBarry Smith 23132f7816d4SBarry Smith PetscFunctionBegin; 23142f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 23153649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 23161ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 23172f7816d4SBarry Smith for (i=0; i<m; i++) { 2318bfec09a0SHong Zhang idx = a->j + a->i[i]; 2319bfec09a0SHong Zhang v = a->a + a->i[i]; 23202f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 23212f7816d4SBarry Smith alpha1 = x[16*i]; 23222f7816d4SBarry Smith alpha2 = x[16*i+1]; 23232f7816d4SBarry Smith alpha3 = x[16*i+2]; 23242f7816d4SBarry Smith alpha4 = x[16*i+3]; 23252f7816d4SBarry Smith alpha5 = x[16*i+4]; 23262f7816d4SBarry Smith alpha6 = x[16*i+5]; 23272f7816d4SBarry Smith alpha7 = x[16*i+6]; 23282f7816d4SBarry Smith alpha8 = x[16*i+7]; 23292f7816d4SBarry Smith alpha9 = x[16*i+8]; 23302f7816d4SBarry Smith alpha10 = x[16*i+9]; 23312f7816d4SBarry Smith alpha11 = x[16*i+10]; 23322f7816d4SBarry Smith alpha12 = x[16*i+11]; 23332f7816d4SBarry Smith alpha13 = x[16*i+12]; 23342f7816d4SBarry Smith alpha14 = x[16*i+13]; 23352f7816d4SBarry Smith alpha15 = x[16*i+14]; 23362f7816d4SBarry Smith alpha16 = x[16*i+15]; 23372f7816d4SBarry Smith while (n-->0) { 23382f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 23392f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 23402f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 23412f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 23422f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 23432f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 23442f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 23452f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 23462f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 23472f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 23482f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 23492f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 23502f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 23512f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 23522f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 23532f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 23542f7816d4SBarry Smith idx++; v++; 23552f7816d4SBarry Smith } 23562f7816d4SBarry Smith } 2357dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 23583649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 23591ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 236066ed3db0SBarry Smith PetscFunctionReturn(0); 236166ed3db0SBarry Smith } 236266ed3db0SBarry Smith 2363ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2364ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2365ed1c418cSBarry Smith { 2366ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2367ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2368d2840e09SBarry Smith const PetscScalar *x,*v; 2369d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2370ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2371ed1c418cSBarry Smith PetscErrorCode ierr; 2372d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2373d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 2374ed1c418cSBarry Smith 2375ed1c418cSBarry Smith PetscFunctionBegin; 23763649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2377ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2378ed1c418cSBarry Smith idx = a->j; 2379ed1c418cSBarry Smith v = a->a; 2380ed1c418cSBarry Smith ii = a->i; 2381ed1c418cSBarry Smith 2382ed1c418cSBarry Smith for (i=0; i<m; i++) { 2383ed1c418cSBarry Smith jrow = ii[i]; 2384ed1c418cSBarry Smith n = ii[i+1] - jrow; 2385ed1c418cSBarry Smith sum1 = 0.0; 2386ed1c418cSBarry Smith sum2 = 0.0; 2387ed1c418cSBarry Smith sum3 = 0.0; 2388ed1c418cSBarry Smith sum4 = 0.0; 2389ed1c418cSBarry Smith sum5 = 0.0; 2390ed1c418cSBarry Smith sum6 = 0.0; 2391ed1c418cSBarry Smith sum7 = 0.0; 2392ed1c418cSBarry Smith sum8 = 0.0; 2393ed1c418cSBarry Smith sum9 = 0.0; 2394ed1c418cSBarry Smith sum10 = 0.0; 2395ed1c418cSBarry Smith sum11 = 0.0; 2396ed1c418cSBarry Smith sum12 = 0.0; 2397ed1c418cSBarry Smith sum13 = 0.0; 2398ed1c418cSBarry Smith sum14 = 0.0; 2399ed1c418cSBarry Smith sum15 = 0.0; 2400ed1c418cSBarry Smith sum16 = 0.0; 2401ed1c418cSBarry Smith sum17 = 0.0; 2402ed1c418cSBarry Smith sum18 = 0.0; 240326fbe8dcSKarl Rupp 2404ed1c418cSBarry Smith nonzerorow += (n>0); 2405ed1c418cSBarry Smith for (j=0; j<n; j++) { 2406ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2407ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2408ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2409ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2410ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2411ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2412ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2413ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2414ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2415ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2416ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2417ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2418ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2419ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2420ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2421ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2422ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2423ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2424ed1c418cSBarry Smith jrow++; 2425ed1c418cSBarry Smith } 2426ed1c418cSBarry Smith y[18*i] = sum1; 2427ed1c418cSBarry Smith y[18*i+1] = sum2; 2428ed1c418cSBarry Smith y[18*i+2] = sum3; 2429ed1c418cSBarry Smith y[18*i+3] = sum4; 2430ed1c418cSBarry Smith y[18*i+4] = sum5; 2431ed1c418cSBarry Smith y[18*i+5] = sum6; 2432ed1c418cSBarry Smith y[18*i+6] = sum7; 2433ed1c418cSBarry Smith y[18*i+7] = sum8; 2434ed1c418cSBarry Smith y[18*i+8] = sum9; 2435ed1c418cSBarry Smith y[18*i+9] = sum10; 2436ed1c418cSBarry Smith y[18*i+10] = sum11; 2437ed1c418cSBarry Smith y[18*i+11] = sum12; 2438ed1c418cSBarry Smith y[18*i+12] = sum13; 2439ed1c418cSBarry Smith y[18*i+13] = sum14; 2440ed1c418cSBarry Smith y[18*i+14] = sum15; 2441ed1c418cSBarry Smith y[18*i+15] = sum16; 2442ed1c418cSBarry Smith y[18*i+16] = sum17; 2443ed1c418cSBarry Smith y[18*i+17] = sum18; 2444ed1c418cSBarry Smith } 2445ed1c418cSBarry Smith 2446dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 24473649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2448ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2449ed1c418cSBarry Smith PetscFunctionReturn(0); 2450ed1c418cSBarry Smith } 2451ed1c418cSBarry Smith 2452ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2453ed1c418cSBarry Smith { 2454ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2455ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2456d2840e09SBarry Smith const PetscScalar *x,*v; 2457d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2458ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2459ed1c418cSBarry Smith PetscErrorCode ierr; 2460d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2461d2840e09SBarry Smith PetscInt n,i; 2462ed1c418cSBarry Smith 2463ed1c418cSBarry Smith PetscFunctionBegin; 2464d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 24653649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2466ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2467ed1c418cSBarry Smith 2468ed1c418cSBarry Smith for (i=0; i<m; i++) { 2469ed1c418cSBarry Smith idx = a->j + a->i[i]; 2470ed1c418cSBarry Smith v = a->a + a->i[i]; 2471ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2472ed1c418cSBarry Smith alpha1 = x[18*i]; 2473ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2474ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2475ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2476ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2477ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2478ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2479ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2480ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2481ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2482ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2483ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2484ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2485ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2486ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2487ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2488ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2489ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2490ed1c418cSBarry Smith while (n-->0) { 2491ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2492ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2493ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2494ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2495ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2496ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2497ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2498ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2499ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2500ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2501ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2502ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2503ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2504ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2505ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2506ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2507ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2508ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2509ed1c418cSBarry Smith idx++; v++; 2510ed1c418cSBarry Smith } 2511ed1c418cSBarry Smith } 2512dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 25133649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2514ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2515ed1c418cSBarry Smith PetscFunctionReturn(0); 2516ed1c418cSBarry Smith } 2517ed1c418cSBarry Smith 2518ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2519ed1c418cSBarry Smith { 2520ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2521ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2522d2840e09SBarry Smith const PetscScalar *x,*v; 2523d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2524ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2525ed1c418cSBarry Smith PetscErrorCode ierr; 2526d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2527ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2528ed1c418cSBarry Smith 2529ed1c418cSBarry Smith PetscFunctionBegin; 2530ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 25313649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2532ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2533ed1c418cSBarry Smith idx = a->j; 2534ed1c418cSBarry Smith v = a->a; 2535ed1c418cSBarry Smith ii = a->i; 2536ed1c418cSBarry Smith 2537ed1c418cSBarry Smith for (i=0; i<m; i++) { 2538ed1c418cSBarry Smith jrow = ii[i]; 2539ed1c418cSBarry Smith n = ii[i+1] - jrow; 2540ed1c418cSBarry Smith sum1 = 0.0; 2541ed1c418cSBarry Smith sum2 = 0.0; 2542ed1c418cSBarry Smith sum3 = 0.0; 2543ed1c418cSBarry Smith sum4 = 0.0; 2544ed1c418cSBarry Smith sum5 = 0.0; 2545ed1c418cSBarry Smith sum6 = 0.0; 2546ed1c418cSBarry Smith sum7 = 0.0; 2547ed1c418cSBarry Smith sum8 = 0.0; 2548ed1c418cSBarry Smith sum9 = 0.0; 2549ed1c418cSBarry Smith sum10 = 0.0; 2550ed1c418cSBarry Smith sum11 = 0.0; 2551ed1c418cSBarry Smith sum12 = 0.0; 2552ed1c418cSBarry Smith sum13 = 0.0; 2553ed1c418cSBarry Smith sum14 = 0.0; 2554ed1c418cSBarry Smith sum15 = 0.0; 2555ed1c418cSBarry Smith sum16 = 0.0; 2556ed1c418cSBarry Smith sum17 = 0.0; 2557ed1c418cSBarry Smith sum18 = 0.0; 2558ed1c418cSBarry Smith for (j=0; j<n; j++) { 2559ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2560ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2561ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2562ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2563ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2564ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2565ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2566ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2567ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2568ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2569ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2570ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2571ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2572ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2573ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2574ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2575ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2576ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2577ed1c418cSBarry Smith jrow++; 2578ed1c418cSBarry Smith } 2579ed1c418cSBarry Smith y[18*i] += sum1; 2580ed1c418cSBarry Smith y[18*i+1] += sum2; 2581ed1c418cSBarry Smith y[18*i+2] += sum3; 2582ed1c418cSBarry Smith y[18*i+3] += sum4; 2583ed1c418cSBarry Smith y[18*i+4] += sum5; 2584ed1c418cSBarry Smith y[18*i+5] += sum6; 2585ed1c418cSBarry Smith y[18*i+6] += sum7; 2586ed1c418cSBarry Smith y[18*i+7] += sum8; 2587ed1c418cSBarry Smith y[18*i+8] += sum9; 2588ed1c418cSBarry Smith y[18*i+9] += sum10; 2589ed1c418cSBarry Smith y[18*i+10] += sum11; 2590ed1c418cSBarry Smith y[18*i+11] += sum12; 2591ed1c418cSBarry Smith y[18*i+12] += sum13; 2592ed1c418cSBarry Smith y[18*i+13] += sum14; 2593ed1c418cSBarry Smith y[18*i+14] += sum15; 2594ed1c418cSBarry Smith y[18*i+15] += sum16; 2595ed1c418cSBarry Smith y[18*i+16] += sum17; 2596ed1c418cSBarry Smith y[18*i+17] += sum18; 2597ed1c418cSBarry Smith } 2598ed1c418cSBarry Smith 2599dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26003649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2601ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2602ed1c418cSBarry Smith PetscFunctionReturn(0); 2603ed1c418cSBarry Smith } 2604ed1c418cSBarry Smith 2605ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2606ed1c418cSBarry Smith { 2607ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2608ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2609d2840e09SBarry Smith const PetscScalar *x,*v; 2610d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2611ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2612ed1c418cSBarry Smith PetscErrorCode ierr; 2613d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2614d2840e09SBarry Smith PetscInt n,i; 2615ed1c418cSBarry Smith 2616ed1c418cSBarry Smith PetscFunctionBegin; 2617ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 26183649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2619ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2620ed1c418cSBarry Smith for (i=0; i<m; i++) { 2621ed1c418cSBarry Smith idx = a->j + a->i[i]; 2622ed1c418cSBarry Smith v = a->a + a->i[i]; 2623ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2624ed1c418cSBarry Smith alpha1 = x[18*i]; 2625ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2626ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2627ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2628ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2629ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2630ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2631ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2632ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2633ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2634ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2635ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2636ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2637ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2638ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2639ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2640ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2641ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2642ed1c418cSBarry Smith while (n-->0) { 2643ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2644ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2645ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2646ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2647ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2648ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2649ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2650ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2651ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2652ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2653ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2654ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2655ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2656ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2657ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2658ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2659ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2660ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2661ed1c418cSBarry Smith idx++; v++; 2662ed1c418cSBarry Smith } 2663ed1c418cSBarry Smith } 2664dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26653649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2666ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2667ed1c418cSBarry Smith PetscFunctionReturn(0); 2668ed1c418cSBarry Smith } 2669ed1c418cSBarry Smith 26706861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 26716861f193SBarry Smith { 26726861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26736861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26746861f193SBarry Smith const PetscScalar *x,*v; 26756861f193SBarry Smith PetscScalar *y,*sums; 26766861f193SBarry Smith PetscErrorCode ierr; 26776861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 26786861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 26796861f193SBarry Smith 26806861f193SBarry Smith PetscFunctionBegin; 26813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 26826861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 26836861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 26846861f193SBarry Smith idx = a->j; 26856861f193SBarry Smith v = a->a; 26866861f193SBarry Smith ii = a->i; 26876861f193SBarry Smith 26886861f193SBarry Smith for (i=0; i<m; i++) { 26896861f193SBarry Smith jrow = ii[i]; 26906861f193SBarry Smith n = ii[i+1] - jrow; 26916861f193SBarry Smith sums = y + dof*i; 26926861f193SBarry Smith for (j=0; j<n; j++) { 26936861f193SBarry Smith for (k=0; k<dof; k++) { 26946861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 26956861f193SBarry Smith } 26966861f193SBarry Smith jrow++; 26976861f193SBarry Smith } 26986861f193SBarry Smith } 26996861f193SBarry Smith 27006861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27013649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27026861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27036861f193SBarry Smith PetscFunctionReturn(0); 27046861f193SBarry Smith } 27056861f193SBarry Smith 27066861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27076861f193SBarry Smith { 27086861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27096861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27106861f193SBarry Smith const PetscScalar *x,*v; 27116861f193SBarry Smith PetscScalar *y,*sums; 27126861f193SBarry Smith PetscErrorCode ierr; 27136861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27146861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 27156861f193SBarry Smith 27166861f193SBarry Smith PetscFunctionBegin; 27176861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 27183649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27196861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 27206861f193SBarry Smith idx = a->j; 27216861f193SBarry Smith v = a->a; 27226861f193SBarry Smith ii = a->i; 27236861f193SBarry Smith 27246861f193SBarry Smith for (i=0; i<m; i++) { 27256861f193SBarry Smith jrow = ii[i]; 27266861f193SBarry Smith n = ii[i+1] - jrow; 27276861f193SBarry Smith sums = y + dof*i; 27286861f193SBarry Smith for (j=0; j<n; j++) { 27296861f193SBarry Smith for (k=0; k<dof; k++) { 27306861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 27316861f193SBarry Smith } 27326861f193SBarry Smith jrow++; 27336861f193SBarry Smith } 27346861f193SBarry Smith } 27356861f193SBarry Smith 27366861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27373649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27386861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 27396861f193SBarry Smith PetscFunctionReturn(0); 27406861f193SBarry Smith } 27416861f193SBarry Smith 27426861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 27436861f193SBarry Smith { 27446861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27456861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27466861f193SBarry Smith const PetscScalar *x,*v,*alpha; 27476861f193SBarry Smith PetscScalar *y; 27486861f193SBarry Smith PetscErrorCode ierr; 27496861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 27506861f193SBarry Smith PetscInt n,i,k; 27516861f193SBarry Smith 27526861f193SBarry Smith PetscFunctionBegin; 27533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27546861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 27556861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 27566861f193SBarry Smith for (i=0; i<m; i++) { 27576861f193SBarry Smith idx = a->j + a->i[i]; 27586861f193SBarry Smith v = a->a + a->i[i]; 27596861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27606861f193SBarry Smith alpha = x + dof*i; 27616861f193SBarry Smith while (n-->0) { 27626861f193SBarry Smith for (k=0; k<dof; k++) { 27636861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27646861f193SBarry Smith } 276583ce7366SBarry Smith idx++; v++; 27666861f193SBarry Smith } 27676861f193SBarry Smith } 27686861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27693649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27706861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27716861f193SBarry Smith PetscFunctionReturn(0); 27726861f193SBarry Smith } 27736861f193SBarry Smith 27746861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27756861f193SBarry Smith { 27766861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27776861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27786861f193SBarry Smith const PetscScalar *x,*v,*alpha; 27796861f193SBarry Smith PetscScalar *y; 27806861f193SBarry Smith PetscErrorCode ierr; 27816861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 27826861f193SBarry Smith PetscInt n,i,k; 27836861f193SBarry Smith 27846861f193SBarry Smith PetscFunctionBegin; 27856861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 27863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27876861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 27886861f193SBarry Smith for (i=0; i<m; i++) { 27896861f193SBarry Smith idx = a->j + a->i[i]; 27906861f193SBarry Smith v = a->a + a->i[i]; 27916861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27926861f193SBarry Smith alpha = x + dof*i; 27936861f193SBarry Smith while (n-->0) { 27946861f193SBarry Smith for (k=0; k<dof; k++) { 27956861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27966861f193SBarry Smith } 279783ce7366SBarry Smith idx++; v++; 27986861f193SBarry Smith } 27996861f193SBarry Smith } 28006861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28013649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28026861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 28036861f193SBarry Smith PetscFunctionReturn(0); 28046861f193SBarry Smith } 28056861f193SBarry Smith 2806f4a53059SBarry Smith /*===================================================================================*/ 2807dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2808f4a53059SBarry Smith { 28094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2810dfbe8321SBarry Smith PetscErrorCode ierr; 2811f4a53059SBarry Smith 2812b24ad042SBarry Smith PetscFunctionBegin; 2813f4a53059SBarry Smith /* start the scatter */ 2814ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28154cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2816ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28174cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2818f4a53059SBarry Smith PetscFunctionReturn(0); 2819f4a53059SBarry Smith } 2820f4a53059SBarry Smith 2821dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 28224cb9d9b8SBarry Smith { 28234cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2824dfbe8321SBarry Smith PetscErrorCode ierr; 2825b24ad042SBarry Smith 28264cb9d9b8SBarry Smith PetscFunctionBegin; 28274cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 28284cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2829ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28314cb9d9b8SBarry Smith PetscFunctionReturn(0); 28324cb9d9b8SBarry Smith } 28334cb9d9b8SBarry Smith 2834dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 28354cb9d9b8SBarry Smith { 28364cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2837dfbe8321SBarry Smith PetscErrorCode ierr; 28384cb9d9b8SBarry Smith 2839b24ad042SBarry Smith PetscFunctionBegin; 28404cb9d9b8SBarry Smith /* start the scatter */ 2841ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2843ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 28454cb9d9b8SBarry Smith PetscFunctionReturn(0); 28464cb9d9b8SBarry Smith } 28474cb9d9b8SBarry Smith 2848dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 28494cb9d9b8SBarry Smith { 28504cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2851dfbe8321SBarry Smith PetscErrorCode ierr; 2852b24ad042SBarry Smith 28534cb9d9b8SBarry Smith PetscFunctionBegin; 28544cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2855ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2856d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2857ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28584cb9d9b8SBarry Smith PetscFunctionReturn(0); 28594cb9d9b8SBarry Smith } 28604cb9d9b8SBarry Smith 286195ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 28627ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 28637ba1a0bfSKris Buschelman { 28647ba1a0bfSKris Buschelman PetscErrorCode ierr; 28650298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 28667ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 28677ba1a0bfSKris Buschelman Mat P =pp->AIJ; 28687ba1a0bfSKris Buschelman Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2869d2840e09SBarry Smith PetscInt *pti,*ptj,*ptJ; 28707ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2871d2840e09SBarry Smith const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2872d2840e09SBarry Smith PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 28737ba1a0bfSKris Buschelman MatScalar *ca; 2874d2840e09SBarry Smith const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 28757ba1a0bfSKris Buschelman 28767ba1a0bfSKris Buschelman PetscFunctionBegin; 28777ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 28787ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 28797ba1a0bfSKris Buschelman 28807ba1a0bfSKris Buschelman cn = pn*ppdof; 28817ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 28827ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 2883854ce69bSBarry Smith ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 28847ba1a0bfSKris Buschelman ci[0] = 0; 28857ba1a0bfSKris Buschelman 28867ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 2887dcca6d9dSJed Brown ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2888c43dabc9SBarry Smith ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2889c43dabc9SBarry Smith ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 28907ba1a0bfSKris Buschelman 28917ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 28927ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 28937ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2894f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 28957ba1a0bfSKris Buschelman current_space = free_space; 28967ba1a0bfSKris Buschelman 28977ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 28987ba1a0bfSKris Buschelman for (i=0; i<pn; i++) { 28997ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 29007ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 29017ba1a0bfSKris Buschelman for (dof=0; dof<ppdof; dof++) { 29027ba1a0bfSKris Buschelman ptanzi = 0; 29037ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 29047ba1a0bfSKris Buschelman for (j=0; j<ptnzi; j++) { 29057ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 29067ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 29077ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 29087ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 29097ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 29107ba1a0bfSKris Buschelman for (k=0; k<anzj; k++) { 29117ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 29127ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 29137ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 29147ba1a0bfSKris Buschelman } 29157ba1a0bfSKris Buschelman } 29167ba1a0bfSKris Buschelman } 29177ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 29187ba1a0bfSKris Buschelman ptaj = ptasparserow; 29197ba1a0bfSKris Buschelman cnzi = 0; 29207ba1a0bfSKris Buschelman for (j=0; j<ptanzi; j++) { 29217ba1a0bfSKris Buschelman /* Get offset within block of P */ 29227ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 29237ba1a0bfSKris Buschelman /* Get block row of P */ 29247ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 29257ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 29267ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 29277ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29287ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 29297ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 29307ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 29317ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 29327ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 29337ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 29347ba1a0bfSKris Buschelman } 29357ba1a0bfSKris Buschelman } 29367ba1a0bfSKris Buschelman } 29377ba1a0bfSKris Buschelman 29387ba1a0bfSKris Buschelman /* sort sparserow */ 29397ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 29407ba1a0bfSKris Buschelman 29417ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 29427ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 29437ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 2944f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 29457ba1a0bfSKris Buschelman } 29467ba1a0bfSKris Buschelman 29477ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 29487ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 294926fbe8dcSKarl Rupp 29507ba1a0bfSKris Buschelman current_space->array += cnzi; 29517ba1a0bfSKris Buschelman current_space->local_used += cnzi; 29527ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 29537ba1a0bfSKris Buschelman 295426fbe8dcSKarl Rupp for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 295526fbe8dcSKarl Rupp for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 295626fbe8dcSKarl Rupp 29577ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 29587ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 29597ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 29607ba1a0bfSKris Buschelman } 29617ba1a0bfSKris Buschelman } 29627ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 29637ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 29647ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 2965854ce69bSBarry Smith ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 2966a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 296774ed9c26SBarry Smith ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 29687ba1a0bfSKris Buschelman 29697ba1a0bfSKris Buschelman /* Allocate space for ca */ 2970854ce69bSBarry Smith ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 29717ba1a0bfSKris Buschelman 29727ba1a0bfSKris Buschelman /* put together the new matrix */ 2973ce94432eSBarry Smith ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2974f27682edSJed Brown ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 29757ba1a0bfSKris Buschelman 29767ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 29777ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 29787ba1a0bfSKris Buschelman c = (Mat_SeqAIJ*)((*C)->data); 2979e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2980e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 29817ba1a0bfSKris Buschelman c->nonew = 0; 298226fbe8dcSKarl Rupp 2983d76021d8SHong Zhang (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29847ba1a0bfSKris Buschelman 29857ba1a0bfSKris Buschelman /* Clean up. */ 29867ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 29877ba1a0bfSKris Buschelman PetscFunctionReturn(0); 29887ba1a0bfSKris Buschelman } 29897ba1a0bfSKris Buschelman 29907ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 29917ba1a0bfSKris Buschelman { 29927ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 29937ba1a0bfSKris Buschelman PetscErrorCode ierr; 29947ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 29957ba1a0bfSKris Buschelman Mat P =pp->AIJ; 29967ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 29977ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 29987ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 2999a2ea699eSBarry Smith const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3000d2840e09SBarry Smith const PetscInt *ci=c->i,*cj=c->j,*cjj; 3001d2840e09SBarry Smith const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3002d2840e09SBarry Smith PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3003a2ea699eSBarry Smith const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3004d2840e09SBarry Smith MatScalar *ca=c->a,*caj,*apa; 30057ba1a0bfSKris Buschelman 30067ba1a0bfSKris Buschelman PetscFunctionBegin; 30077ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 30081795a4d1SJed Brown ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 30097ba1a0bfSKris Buschelman 30107ba1a0bfSKris Buschelman /* Clear old values in C */ 30117ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 30127ba1a0bfSKris Buschelman 30137ba1a0bfSKris Buschelman for (i=0; i<am; i++) { 30147ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 30157ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 30167ba1a0bfSKris Buschelman apnzj = 0; 30177ba1a0bfSKris Buschelman for (j=0; j<anzi; j++) { 30187ba1a0bfSKris Buschelman /* Get offset within block of P */ 30197ba1a0bfSKris Buschelman pshift = *aj%ppdof; 30207ba1a0bfSKris Buschelman /* Get block row of P */ 30217ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 30227ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 30237ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30247ba1a0bfSKris Buschelman paj = pa + pi[prow]; 30257ba1a0bfSKris Buschelman for (k=0; k<pnzj; k++) { 30267ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 30277ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 30287ba1a0bfSKris Buschelman apjdense[poffset] = -1; 30297ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 30307ba1a0bfSKris Buschelman } 30317ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 30327ba1a0bfSKris Buschelman } 3033dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 30347ba1a0bfSKris Buschelman aa++; 30357ba1a0bfSKris Buschelman } 30367ba1a0bfSKris Buschelman 30377ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 30387ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 30397ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 30407ba1a0bfSKris Buschelman 30417ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 30427ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 30437ba1a0bfSKris Buschelman pshift = i%ppdof; 30447ba1a0bfSKris Buschelman poffset = pi[prow]; 30457ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 30467ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 30477ba1a0bfSKris Buschelman pJ = pj+poffset; 30487ba1a0bfSKris Buschelman pA = pa+poffset; 30497ba1a0bfSKris Buschelman for (j=0; j<pnzi; j++) { 30507ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 30517ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 30527ba1a0bfSKris Buschelman caj = ca + ci[crow]; 30537ba1a0bfSKris Buschelman pJ++; 30547ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 30557ba1a0bfSKris Buschelman for (k=0,nextap=0; nextap<apnzj; k++) { 305626fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 30577ba1a0bfSKris Buschelman } 3058dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 30597ba1a0bfSKris Buschelman pA++; 30607ba1a0bfSKris Buschelman } 30617ba1a0bfSKris Buschelman 30627ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 30637ba1a0bfSKris Buschelman for (j=0; j<apnzj; j++) { 30647ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 30657ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 30667ba1a0bfSKris Buschelman } 30677ba1a0bfSKris Buschelman } 30687ba1a0bfSKris Buschelman 30697ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 30707ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 30717ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 307274ed9c26SBarry Smith ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 307395ddefa2SHong Zhang PetscFunctionReturn(0); 307495ddefa2SHong Zhang } 30757ba1a0bfSKris Buschelman 3076150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 30772121bac1SHong Zhang { 30782121bac1SHong Zhang PetscErrorCode ierr; 30792121bac1SHong Zhang 30802121bac1SHong Zhang PetscFunctionBegin; 30812121bac1SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 30823ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 30832121bac1SHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 30843ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 30852121bac1SHong Zhang } 30863ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 30872121bac1SHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 30883ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 30892121bac1SHong Zhang PetscFunctionReturn(0); 30902121bac1SHong Zhang } 30912121bac1SHong Zhang 3092f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 309395ddefa2SHong Zhang { 309495ddefa2SHong Zhang PetscErrorCode ierr; 309595ddefa2SHong Zhang 309695ddefa2SHong Zhang PetscFunctionBegin; 309795ddefa2SHong Zhang /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3098511c6705SHong Zhang ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr); 309995ddefa2SHong Zhang ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 310095ddefa2SHong Zhang PetscFunctionReturn(0); 310195ddefa2SHong Zhang } 310295ddefa2SHong Zhang 3103f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 310495ddefa2SHong Zhang { 310595ddefa2SHong Zhang PetscFunctionBegin; 3106e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 31077ba1a0bfSKris Buschelman PetscFunctionReturn(0); 31087ba1a0bfSKris Buschelman } 31097ba1a0bfSKris Buschelman 3110150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 31119e03dfbbSHong Zhang { 31129e03dfbbSHong Zhang PetscErrorCode ierr; 31139e03dfbbSHong Zhang 31149e03dfbbSHong Zhang PetscFunctionBegin; 31159e03dfbbSHong Zhang if (scall == MAT_INITIAL_MATRIX) { 31163ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 31179e03dfbbSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 31183ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 31199e03dfbbSHong Zhang } 31203ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 31219e03dfbbSHong Zhang ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 31223ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 31239e03dfbbSHong Zhang PetscFunctionReturn(0); 31249e03dfbbSHong Zhang } 31259e03dfbbSHong Zhang 3126cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 31279c4fc4c7SBarry Smith { 31289c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3129ceb03754SKris Buschelman Mat a = b->AIJ,B; 31309c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 31319c4fc4c7SBarry Smith PetscErrorCode ierr; 31320fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3133ba8c8a56SBarry Smith PetscInt *cols; 3134ba8c8a56SBarry Smith PetscScalar *vals; 31359c4fc4c7SBarry Smith 31369c4fc4c7SBarry Smith PetscFunctionBegin; 31379c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3138785e854fSJed Brown ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 31399c4fc4c7SBarry Smith for (i=0; i<m; i++) { 31409c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 314126fbe8dcSKarl Rupp for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 31429c4fc4c7SBarry Smith } 3143ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 31449c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 3145785e854fSJed Brown ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 31469c4fc4c7SBarry Smith ii = 0; 31479c4fc4c7SBarry Smith for (i=0; i<m; i++) { 3148ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 31490fd73130SBarry Smith for (j=0; j<dof; j++) { 315026fbe8dcSKarl Rupp for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3151ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 31529c4fc4c7SBarry Smith ii++; 31539c4fc4c7SBarry Smith } 3154ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 31559c4fc4c7SBarry Smith } 31569c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 3157ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3158ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159ceb03754SKris Buschelman 3160511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 316128be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3162c3d102feSKris Buschelman } else { 3163ceb03754SKris Buschelman *newmat = B; 3164c3d102feSKris Buschelman } 31659c4fc4c7SBarry Smith PetscFunctionReturn(0); 31669c4fc4c7SBarry Smith } 31679c4fc4c7SBarry Smith 3168c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3169be1d678aSKris Buschelman 3170cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 31710fd73130SBarry Smith { 31720fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3173ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 31740fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 31750fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 31760fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 31770fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 31780298fd71SBarry Smith PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 31790298fd71SBarry Smith PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 31800fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 31810fd73130SBarry Smith PetscErrorCode ierr; 31820fd73130SBarry Smith PetscScalar *vals,*ovals; 31830fd73130SBarry Smith 31840fd73130SBarry Smith PetscFunctionBegin; 3185dcca6d9dSJed Brown ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3186d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 31870fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 31880fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 31890fd73130SBarry Smith for (j=0; j<dof; j++) { 31900fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 31910fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 31920fd73130SBarry Smith } 31930fd73130SBarry Smith } 3194ce94432eSBarry Smith ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3195f27682edSJed Brown ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 31960fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 31970fd73130SBarry Smith 3198dcca6d9dSJed Brown ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3199d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 3200d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 32010fd73130SBarry Smith garray = mpiaij->garray; 32020fd73130SBarry Smith 32030fd73130SBarry Smith ii = rstart; 3204d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32050fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32060fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 32070fd73130SBarry Smith for (j=0; j<dof; j++) { 32080fd73130SBarry Smith for (k=0; k<ncols; k++) { 32090fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 32100fd73130SBarry Smith } 32110fd73130SBarry Smith for (k=0; k<oncols; k++) { 32120fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 32130fd73130SBarry Smith } 3214ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3215ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 32160fd73130SBarry Smith ii++; 32170fd73130SBarry Smith } 32180fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32190fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 32200fd73130SBarry Smith } 32210fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 32220fd73130SBarry Smith 3223ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3224ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3225ceb03754SKris Buschelman 3226511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32277adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 32287adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 322926fbe8dcSKarl Rupp 323028be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 323126fbe8dcSKarl Rupp 32327adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3233c3d102feSKris Buschelman } else { 3234ceb03754SKris Buschelman *newmat = B; 3235c3d102feSKris Buschelman } 32360fd73130SBarry Smith PetscFunctionReturn(0); 32370fd73130SBarry Smith } 32380fd73130SBarry Smith 3239*7dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 32409e516c8fSBarry Smith { 32419e516c8fSBarry Smith PetscErrorCode ierr; 32429e516c8fSBarry Smith Mat A; 32439e516c8fSBarry Smith 32449e516c8fSBarry Smith PetscFunctionBegin; 32459e516c8fSBarry Smith ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3246*7dae84e0SHong Zhang ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 32479e516c8fSBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 32489e516c8fSBarry Smith PetscFunctionReturn(0); 32499e516c8fSBarry Smith } 32500fd73130SBarry Smith 3251bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3252ff585edeSJed Brown /*@C 32530bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 32540bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 32550bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 32560bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 32570bad9183SKris Buschelman 3258ff585edeSJed Brown Collective 3259ff585edeSJed Brown 3260ff585edeSJed Brown Input Parameters: 3261ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3262ff585edeSJed Brown - dof - the block size (number of components per node) 3263ff585edeSJed Brown 3264ff585edeSJed Brown Output Parameter: 3265ff585edeSJed Brown . maij - the new MAIJ matrix 3266ff585edeSJed Brown 32670bad9183SKris Buschelman Operations provided: 32680fd73130SBarry Smith + MatMult 32690bad9183SKris Buschelman . MatMultTranspose 32700bad9183SKris Buschelman . MatMultAdd 32710bad9183SKris Buschelman . MatMultTransposeAdd 32720fd73130SBarry Smith - MatView 32730bad9183SKris Buschelman 32740bad9183SKris Buschelman Level: advanced 32750bad9183SKris Buschelman 3276b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3277ff585edeSJed Brown @*/ 32787087cfbeSBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 327982b1193eSBarry Smith { 3280dfbe8321SBarry Smith PetscErrorCode ierr; 3281b24ad042SBarry Smith PetscMPIInt size; 3282b24ad042SBarry Smith PetscInt n; 328382b1193eSBarry Smith Mat B; 328482b1193eSBarry Smith 328582b1193eSBarry Smith PetscFunctionBegin; 3286d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3287d72c5c08SBarry Smith 328826fbe8dcSKarl Rupp if (dof == 1) *maij = A; 328926fbe8dcSKarl Rupp else { 3290ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3291d0f46423SBarry Smith ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3292bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3293bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3294bccba955SJed Brown ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3295bccba955SJed Brown ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 329626fbe8dcSKarl Rupp 3297cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3298d72c5c08SBarry Smith 3299ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3300f4a53059SBarry Smith if (size == 1) { 3301feb9fe23SJed Brown Mat_SeqMAIJ *b; 3302feb9fe23SJed Brown 3303b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 330426fbe8dcSKarl Rupp 33050298fd71SBarry Smith B->ops->setup = NULL; 33064cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 33070fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 3308feb9fe23SJed Brown b = (Mat_SeqMAIJ*)B->data; 3309b9b97703SBarry Smith b->dof = dof; 33104cb9d9b8SBarry Smith b->AIJ = A; 331126fbe8dcSKarl Rupp 3312d72c5c08SBarry Smith if (dof == 2) { 3313cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3314cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3315cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3316cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3317bcc973b7SBarry Smith } else if (dof == 3) { 3318bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3319bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3320bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3321bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3322bcc973b7SBarry Smith } else if (dof == 4) { 3323bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3324bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3325bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3326bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3327f9fae5adSBarry Smith } else if (dof == 5) { 3328f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3329f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3330f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3331f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 33326cd98798SBarry Smith } else if (dof == 6) { 33336cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 33346cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 33356cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 33366cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3337ed8eea03SBarry Smith } else if (dof == 7) { 3338ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3339ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3340ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3341ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 334266ed3db0SBarry Smith } else if (dof == 8) { 334366ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 334466ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 334566ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 334666ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 33472b692628SMatthew Knepley } else if (dof == 9) { 33482b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 33492b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 33502b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 33512b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3352b9cda22cSBarry Smith } else if (dof == 10) { 3353b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3354b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3355b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3356b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3357dbdd7285SBarry Smith } else if (dof == 11) { 3358dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3359dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3360dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3361dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 33622f7816d4SBarry Smith } else if (dof == 16) { 33632f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 33642f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 33652f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 33662f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3367ed1c418cSBarry Smith } else if (dof == 18) { 3368ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3369ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3370ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3371ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 337282b1193eSBarry Smith } else { 33736861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 33746861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 33756861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 33766861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 337782b1193eSBarry Smith } 3378bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3379bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3380f4a53059SBarry Smith } else { 3381f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3382feb9fe23SJed Brown Mat_MPIMAIJ *b; 3383f4a53059SBarry Smith IS from,to; 3384f4a53059SBarry Smith Vec gvec; 3385f4a53059SBarry Smith 3386b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 338726fbe8dcSKarl Rupp 33880298fd71SBarry Smith B->ops->setup = NULL; 3389d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 33900fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 339126fbe8dcSKarl Rupp 3392b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3393b9b97703SBarry Smith b->dof = dof; 3394b9b97703SBarry Smith b->A = A; 339526fbe8dcSKarl Rupp 33964cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 33974cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 33984cb9d9b8SBarry Smith 3399f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3400a34bdc0bSBarry Smith ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3401a34bdc0bSBarry Smith ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 340213288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3403a34bdc0bSBarry Smith ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3404f4a53059SBarry Smith 3405f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 3406ce94432eSBarry Smith ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3407f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3408f4a53059SBarry Smith 3409f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 3410ce94432eSBarry Smith ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3411f4a53059SBarry Smith 3412f4a53059SBarry Smith /* generate the scatter context */ 3413f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3414f4a53059SBarry Smith 34156bf464f9SBarry Smith ierr = ISDestroy(&from);CHKERRQ(ierr); 34166bf464f9SBarry Smith ierr = ISDestroy(&to);CHKERRQ(ierr); 34176bf464f9SBarry Smith ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3418f4a53059SBarry Smith 3419f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 34204cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 34214cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 34224cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 342326fbe8dcSKarl Rupp 3424bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3425bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3426f4a53059SBarry Smith } 3427*7dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 34284994cf47SJed Brown ierr = MatSetUp(B);CHKERRQ(ierr); 3429cd3bbe55SBarry Smith *maij = B; 3430146574abSBarry Smith ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3431d72c5c08SBarry Smith } 343282b1193eSBarry Smith PetscFunctionReturn(0); 343382b1193eSBarry Smith } 3434