1be1d678aSKris Buschelman 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 2182b1193eSBarry Smith 22b350b9acSSatish Balay /*@ 23ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24ff585edeSJed Brown 25ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26ff585edeSJed Brown 27ff585edeSJed Brown Input Parameter: 28ff585edeSJed Brown . A - the MAIJ matrix 29ff585edeSJed Brown 30ff585edeSJed Brown Output Parameter: 31ff585edeSJed Brown . B - the AIJ matrix 32ff585edeSJed Brown 33ff585edeSJed Brown Level: advanced 34ff585edeSJed Brown 3595452b02SPatrick Sanan Notes: 3695452b02SPatrick Sanan The reference count on the AIJ matrix is not increased so you should not destroy it. 37ff585edeSJed Brown 38ff585edeSJed Brown .seealso: MatCreateMAIJ() 39ff585edeSJed Brown @*/ 407087cfbeSBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41b9b97703SBarry Smith { 42dfbe8321SBarry Smith PetscErrorCode ierr; 43ace3abfcSBarry Smith PetscBool ismpimaij,isseqmaij; 44b9b97703SBarry Smith 45b9b97703SBarry Smith PetscFunctionBegin; 46251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 47251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 48b9b97703SBarry Smith if (ismpimaij) { 49b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 50b9b97703SBarry Smith 51b9b97703SBarry Smith *B = b->A; 52b9b97703SBarry Smith } else if (isseqmaij) { 53b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 54b9b97703SBarry Smith 55b9b97703SBarry Smith *B = b->AIJ; 56b9b97703SBarry Smith } else { 57b9b97703SBarry Smith *B = A; 58b9b97703SBarry Smith } 59b9b97703SBarry Smith PetscFunctionReturn(0); 60b9b97703SBarry Smith } 61b9b97703SBarry Smith 62480dffcfSJed Brown /*@ 63ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 64ff585edeSJed Brown 653f9fe445SBarry Smith Logically Collective 66ff585edeSJed Brown 67ff585edeSJed Brown Input Parameter: 68ff585edeSJed Brown + A - the MAIJ matrix 69ff585edeSJed Brown - dof - the block size for the new matrix 70ff585edeSJed Brown 71ff585edeSJed Brown Output Parameter: 72ff585edeSJed Brown . B - the new MAIJ matrix 73ff585edeSJed Brown 74ff585edeSJed Brown Level: advanced 75ff585edeSJed Brown 76ff585edeSJed Brown .seealso: MatCreateMAIJ() 77ff585edeSJed Brown @*/ 787087cfbeSBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 79b9b97703SBarry Smith { 80dfbe8321SBarry Smith PetscErrorCode ierr; 810298fd71SBarry Smith Mat Aij = NULL; 82b9b97703SBarry Smith 83b9b97703SBarry Smith PetscFunctionBegin; 84c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A,dof,2); 85b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 86b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 87b9b97703SBarry Smith PetscFunctionReturn(0); 88b9b97703SBarry Smith } 89b9b97703SBarry Smith 90dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 9182b1193eSBarry Smith { 92dfbe8321SBarry Smith PetscErrorCode ierr; 934cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9482b1193eSBarry Smith 9582b1193eSBarry Smith PetscFunctionBegin; 966bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 97bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 98bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 99bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 10059e29146SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr); 101279e4bd4SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr); 102ec626240SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqmaij_C",NULL);CHKERRQ(ierr); 1034cb9d9b8SBarry Smith PetscFunctionReturn(0); 1044cb9d9b8SBarry Smith } 1054cb9d9b8SBarry Smith 106356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A) 107356c569eSBarry Smith { 108356c569eSBarry Smith PetscFunctionBegin; 109ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 110356c569eSBarry Smith PetscFunctionReturn(0); 111356c569eSBarry Smith } 112356c569eSBarry Smith 1130fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1140fd73130SBarry Smith { 1150fd73130SBarry Smith PetscErrorCode ierr; 1160fd73130SBarry Smith Mat B; 1170fd73130SBarry Smith 1180fd73130SBarry Smith PetscFunctionBegin; 119a2ea699eSBarry Smith ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1200fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1216bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1220fd73130SBarry Smith PetscFunctionReturn(0); 1230fd73130SBarry Smith } 1240fd73130SBarry Smith 1250fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1260fd73130SBarry Smith { 1270fd73130SBarry Smith PetscErrorCode ierr; 1280fd73130SBarry Smith Mat B; 1290fd73130SBarry Smith 1300fd73130SBarry Smith PetscFunctionBegin; 131a2ea699eSBarry Smith ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1320fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1336bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1340fd73130SBarry Smith PetscFunctionReturn(0); 1350fd73130SBarry Smith } 1360fd73130SBarry Smith 137dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1384cb9d9b8SBarry Smith { 139dfbe8321SBarry Smith PetscErrorCode ierr; 1404cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1414cb9d9b8SBarry Smith 1424cb9d9b8SBarry Smith PetscFunctionBegin; 1436bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 1446bf464f9SBarry Smith ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 1456bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 1466bf464f9SBarry Smith ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 1476bf464f9SBarry Smith ierr = VecDestroy(&b->w);CHKERRQ(ierr); 148bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 149bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 150bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 151ec626240SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_mpimaij_C",NULL);CHKERRQ(ierr); 152dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 153b9b97703SBarry Smith PetscFunctionReturn(0); 154b9b97703SBarry Smith } 155b9b97703SBarry Smith 1560bad9183SKris Buschelman /*MC 157fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1580bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1590bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1600bad9183SKris Buschelman 1610bad9183SKris Buschelman Operations provided: 1620bad9183SKris Buschelman . MatMult 1630bad9183SKris Buschelman . MatMultTranspose 1640bad9183SKris Buschelman . MatMultAdd 1650bad9183SKris Buschelman . MatMultTransposeAdd 1660bad9183SKris Buschelman 1670bad9183SKris Buschelman Level: advanced 1680bad9183SKris Buschelman 169b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 1700bad9183SKris Buschelman M*/ 1710bad9183SKris Buschelman 1728cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 17382b1193eSBarry Smith { 174dfbe8321SBarry Smith PetscErrorCode ierr; 1754cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 17664b52464SHong Zhang PetscMPIInt size; 17782b1193eSBarry Smith 17882b1193eSBarry Smith PetscFunctionBegin; 179b00a9115SJed Brown ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 180b0a32e0cSBarry Smith A->data = (void*)b; 18126fbe8dcSKarl Rupp 182cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 18326fbe8dcSKarl Rupp 184356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 185f4a53059SBarry Smith 186cd3bbe55SBarry Smith b->AIJ = 0; 187cd3bbe55SBarry Smith b->dof = 0; 188f4a53059SBarry Smith b->OAIJ = 0; 189f4a53059SBarry Smith b->ctx = 0; 190f4a53059SBarry Smith b->w = 0; 191ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 19264b52464SHong Zhang if (size == 1) { 19364b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 19464b52464SHong Zhang } else { 19564b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 19664b52464SHong Zhang } 19732e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 19832e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 19982b1193eSBarry Smith PetscFunctionReturn(0); 20082b1193eSBarry Smith } 20182b1193eSBarry Smith 202cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 203dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 20482b1193eSBarry Smith { 2054cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 206bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 207d2840e09SBarry Smith const PetscScalar *x,*v; 208d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 209dfbe8321SBarry Smith PetscErrorCode ierr; 210d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 211d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 21282b1193eSBarry Smith 213bcc973b7SBarry Smith PetscFunctionBegin; 2143649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2151ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 216bcc973b7SBarry Smith idx = a->j; 217bcc973b7SBarry Smith v = a->a; 218bcc973b7SBarry Smith ii = a->i; 219bcc973b7SBarry Smith 220bcc973b7SBarry Smith for (i=0; i<m; i++) { 221bcc973b7SBarry Smith jrow = ii[i]; 222bcc973b7SBarry Smith n = ii[i+1] - jrow; 223bcc973b7SBarry Smith sum1 = 0.0; 224bcc973b7SBarry Smith sum2 = 0.0; 22526fbe8dcSKarl Rupp 226b3c51c66SMatthew Knepley nonzerorow += (n>0); 227bcc973b7SBarry Smith for (j=0; j<n; j++) { 228bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 229bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 230bcc973b7SBarry Smith jrow++; 231bcc973b7SBarry Smith } 232bcc973b7SBarry Smith y[2*i] = sum1; 233bcc973b7SBarry Smith y[2*i+1] = sum2; 234bcc973b7SBarry Smith } 235bcc973b7SBarry Smith 236dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 2373649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2381ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 23982b1193eSBarry Smith PetscFunctionReturn(0); 24082b1193eSBarry Smith } 241bcc973b7SBarry Smith 242dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 24382b1193eSBarry Smith { 2444cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 245bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 246d2840e09SBarry Smith const PetscScalar *x,*v; 247d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 248dfbe8321SBarry Smith PetscErrorCode ierr; 249d2840e09SBarry Smith PetscInt n,i; 250d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 25182b1193eSBarry Smith 252bcc973b7SBarry Smith PetscFunctionBegin; 253d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2543649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2551ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 256bfec09a0SHong Zhang 257bcc973b7SBarry Smith for (i=0; i<m; i++) { 258bfec09a0SHong Zhang idx = a->j + a->i[i]; 259bfec09a0SHong Zhang v = a->a + a->i[i]; 260bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 261bcc973b7SBarry Smith alpha1 = x[2*i]; 262bcc973b7SBarry Smith alpha2 = x[2*i+1]; 26326fbe8dcSKarl Rupp while (n-->0) { 26426fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 26526fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 26626fbe8dcSKarl Rupp idx++; v++; 26726fbe8dcSKarl Rupp } 268bcc973b7SBarry Smith } 269dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 2703649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2711ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27282b1193eSBarry Smith PetscFunctionReturn(0); 27382b1193eSBarry Smith } 274bcc973b7SBarry Smith 275dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 27682b1193eSBarry Smith { 2774cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 278bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 279d2840e09SBarry Smith const PetscScalar *x,*v; 280d2840e09SBarry Smith PetscScalar *y,sum1, sum2; 281dfbe8321SBarry Smith PetscErrorCode ierr; 282b24ad042SBarry Smith PetscInt n,i,jrow,j; 283d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 28482b1193eSBarry Smith 285bcc973b7SBarry Smith PetscFunctionBegin; 286f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2873649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2881ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 289bcc973b7SBarry Smith idx = a->j; 290bcc973b7SBarry Smith v = a->a; 291bcc973b7SBarry Smith ii = a->i; 292bcc973b7SBarry Smith 293bcc973b7SBarry Smith for (i=0; i<m; i++) { 294bcc973b7SBarry Smith jrow = ii[i]; 295bcc973b7SBarry Smith n = ii[i+1] - jrow; 296bcc973b7SBarry Smith sum1 = 0.0; 297bcc973b7SBarry Smith sum2 = 0.0; 298bcc973b7SBarry Smith for (j=0; j<n; j++) { 299bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 300bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 301bcc973b7SBarry Smith jrow++; 302bcc973b7SBarry Smith } 303bcc973b7SBarry Smith y[2*i] += sum1; 304bcc973b7SBarry Smith y[2*i+1] += sum2; 305bcc973b7SBarry Smith } 306bcc973b7SBarry Smith 307dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3083649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3091ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 310bcc973b7SBarry Smith PetscFunctionReturn(0); 31182b1193eSBarry Smith } 312dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 31382b1193eSBarry Smith { 3144cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 315bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 316d2840e09SBarry Smith const PetscScalar *x,*v; 317d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 318dfbe8321SBarry Smith PetscErrorCode ierr; 319d2840e09SBarry Smith PetscInt n,i; 320d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 32182b1193eSBarry Smith 322bcc973b7SBarry Smith PetscFunctionBegin; 323f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3243649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3251ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 326bfec09a0SHong Zhang 327bcc973b7SBarry Smith for (i=0; i<m; i++) { 328bfec09a0SHong Zhang idx = a->j + a->i[i]; 329bfec09a0SHong Zhang v = a->a + a->i[i]; 330bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 331bcc973b7SBarry Smith alpha1 = x[2*i]; 332bcc973b7SBarry Smith alpha2 = x[2*i+1]; 33326fbe8dcSKarl Rupp while (n-->0) { 33426fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 33526fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 33626fbe8dcSKarl Rupp idx++; v++; 33726fbe8dcSKarl Rupp } 338bcc973b7SBarry Smith } 339dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3403649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3411ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 342bcc973b7SBarry Smith PetscFunctionReturn(0); 34382b1193eSBarry Smith } 344cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 345dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 346bcc973b7SBarry Smith { 3474cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 348bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 349d2840e09SBarry Smith const PetscScalar *x,*v; 350d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 351dfbe8321SBarry Smith PetscErrorCode ierr; 352d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 353d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 35482b1193eSBarry Smith 355bcc973b7SBarry Smith PetscFunctionBegin; 3563649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3571ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 358bcc973b7SBarry Smith idx = a->j; 359bcc973b7SBarry Smith v = a->a; 360bcc973b7SBarry Smith ii = a->i; 361bcc973b7SBarry Smith 362bcc973b7SBarry Smith for (i=0; i<m; i++) { 363bcc973b7SBarry Smith jrow = ii[i]; 364bcc973b7SBarry Smith n = ii[i+1] - jrow; 365bcc973b7SBarry Smith sum1 = 0.0; 366bcc973b7SBarry Smith sum2 = 0.0; 367bcc973b7SBarry Smith sum3 = 0.0; 36826fbe8dcSKarl Rupp 369b3c51c66SMatthew Knepley nonzerorow += (n>0); 370bcc973b7SBarry Smith for (j=0; j<n; j++) { 371bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 372bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 373bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 374bcc973b7SBarry Smith jrow++; 375bcc973b7SBarry Smith } 376bcc973b7SBarry Smith y[3*i] = sum1; 377bcc973b7SBarry Smith y[3*i+1] = sum2; 378bcc973b7SBarry Smith y[3*i+2] = sum3; 379bcc973b7SBarry Smith } 380bcc973b7SBarry Smith 381dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 3823649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3831ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 384bcc973b7SBarry Smith PetscFunctionReturn(0); 385bcc973b7SBarry Smith } 386bcc973b7SBarry Smith 387dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 388bcc973b7SBarry Smith { 3894cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 390bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 391d2840e09SBarry Smith const PetscScalar *x,*v; 392d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 393dfbe8321SBarry Smith PetscErrorCode ierr; 394d2840e09SBarry Smith PetscInt n,i; 395d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 396bcc973b7SBarry Smith 397bcc973b7SBarry Smith PetscFunctionBegin; 398d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 3993649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4001ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 401bfec09a0SHong Zhang 402bcc973b7SBarry Smith for (i=0; i<m; i++) { 403bfec09a0SHong Zhang idx = a->j + a->i[i]; 404bfec09a0SHong Zhang v = a->a + a->i[i]; 405bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 406bcc973b7SBarry Smith alpha1 = x[3*i]; 407bcc973b7SBarry Smith alpha2 = x[3*i+1]; 408bcc973b7SBarry Smith alpha3 = x[3*i+2]; 409bcc973b7SBarry Smith while (n-->0) { 410bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 411bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 412bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 413bcc973b7SBarry Smith idx++; v++; 414bcc973b7SBarry Smith } 415bcc973b7SBarry Smith } 416dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4173649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4181ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 419bcc973b7SBarry Smith PetscFunctionReturn(0); 420bcc973b7SBarry Smith } 421bcc973b7SBarry Smith 422dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 423bcc973b7SBarry Smith { 4244cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 425bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 426d2840e09SBarry Smith const PetscScalar *x,*v; 427d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 428dfbe8321SBarry Smith PetscErrorCode ierr; 429b24ad042SBarry Smith PetscInt n,i,jrow,j; 430d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 431bcc973b7SBarry Smith 432bcc973b7SBarry Smith PetscFunctionBegin; 433f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4343649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4351ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 436bcc973b7SBarry Smith idx = a->j; 437bcc973b7SBarry Smith v = a->a; 438bcc973b7SBarry Smith ii = a->i; 439bcc973b7SBarry Smith 440bcc973b7SBarry Smith for (i=0; i<m; i++) { 441bcc973b7SBarry Smith jrow = ii[i]; 442bcc973b7SBarry Smith n = ii[i+1] - jrow; 443bcc973b7SBarry Smith sum1 = 0.0; 444bcc973b7SBarry Smith sum2 = 0.0; 445bcc973b7SBarry Smith sum3 = 0.0; 446bcc973b7SBarry Smith for (j=0; j<n; j++) { 447bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 448bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 449bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 450bcc973b7SBarry Smith jrow++; 451bcc973b7SBarry Smith } 452bcc973b7SBarry Smith y[3*i] += sum1; 453bcc973b7SBarry Smith y[3*i+1] += sum2; 454bcc973b7SBarry Smith y[3*i+2] += sum3; 455bcc973b7SBarry Smith } 456bcc973b7SBarry Smith 457dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4583649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4591ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 460bcc973b7SBarry Smith PetscFunctionReturn(0); 461bcc973b7SBarry Smith } 462dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 463bcc973b7SBarry Smith { 4644cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 465bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 466d2840e09SBarry Smith const PetscScalar *x,*v; 467d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 468dfbe8321SBarry Smith PetscErrorCode ierr; 469d2840e09SBarry Smith PetscInt n,i; 470d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 471bcc973b7SBarry Smith 472bcc973b7SBarry Smith PetscFunctionBegin; 473f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4743649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4751ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 476bcc973b7SBarry Smith for (i=0; i<m; i++) { 477bfec09a0SHong Zhang idx = a->j + a->i[i]; 478bfec09a0SHong Zhang v = a->a + a->i[i]; 479bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 480bcc973b7SBarry Smith alpha1 = x[3*i]; 481bcc973b7SBarry Smith alpha2 = x[3*i+1]; 482bcc973b7SBarry Smith alpha3 = x[3*i+2]; 483bcc973b7SBarry Smith while (n-->0) { 484bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 485bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 486bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 487bcc973b7SBarry Smith idx++; v++; 488bcc973b7SBarry Smith } 489bcc973b7SBarry Smith } 490dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4913649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4921ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 493bcc973b7SBarry Smith PetscFunctionReturn(0); 494bcc973b7SBarry Smith } 495bcc973b7SBarry Smith 496bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 497dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 498bcc973b7SBarry Smith { 4994cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 500bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 501d2840e09SBarry Smith const PetscScalar *x,*v; 502d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 503dfbe8321SBarry Smith PetscErrorCode ierr; 504d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 505d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 506bcc973b7SBarry Smith 507bcc973b7SBarry Smith PetscFunctionBegin; 5083649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5091ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 510bcc973b7SBarry Smith idx = a->j; 511bcc973b7SBarry Smith v = a->a; 512bcc973b7SBarry Smith ii = a->i; 513bcc973b7SBarry Smith 514bcc973b7SBarry Smith for (i=0; i<m; i++) { 515bcc973b7SBarry Smith jrow = ii[i]; 516bcc973b7SBarry Smith n = ii[i+1] - jrow; 517bcc973b7SBarry Smith sum1 = 0.0; 518bcc973b7SBarry Smith sum2 = 0.0; 519bcc973b7SBarry Smith sum3 = 0.0; 520bcc973b7SBarry Smith sum4 = 0.0; 521b3c51c66SMatthew Knepley nonzerorow += (n>0); 522bcc973b7SBarry Smith for (j=0; j<n; j++) { 523bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 524bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 525bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 526bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 527bcc973b7SBarry Smith jrow++; 528bcc973b7SBarry Smith } 529bcc973b7SBarry Smith y[4*i] = sum1; 530bcc973b7SBarry Smith y[4*i+1] = sum2; 531bcc973b7SBarry Smith y[4*i+2] = sum3; 532bcc973b7SBarry Smith y[4*i+3] = sum4; 533bcc973b7SBarry Smith } 534bcc973b7SBarry Smith 535dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 5363649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5371ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 538bcc973b7SBarry Smith PetscFunctionReturn(0); 539bcc973b7SBarry Smith } 540bcc973b7SBarry Smith 541dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 542bcc973b7SBarry Smith { 5434cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 544bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 545d2840e09SBarry Smith const PetscScalar *x,*v; 546d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 547dfbe8321SBarry Smith PetscErrorCode ierr; 548d2840e09SBarry Smith PetscInt n,i; 549d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 550bcc973b7SBarry Smith 551bcc973b7SBarry Smith PetscFunctionBegin; 552d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 5533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5541ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 555bcc973b7SBarry Smith for (i=0; i<m; i++) { 556bfec09a0SHong Zhang idx = a->j + a->i[i]; 557bfec09a0SHong Zhang v = a->a + a->i[i]; 558bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 559bcc973b7SBarry Smith alpha1 = x[4*i]; 560bcc973b7SBarry Smith alpha2 = x[4*i+1]; 561bcc973b7SBarry Smith alpha3 = x[4*i+2]; 562bcc973b7SBarry Smith alpha4 = x[4*i+3]; 563bcc973b7SBarry Smith while (n-->0) { 564bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 565bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 566bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 567bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 568bcc973b7SBarry Smith idx++; v++; 569bcc973b7SBarry Smith } 570bcc973b7SBarry Smith } 571dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 5723649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5731ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 574bcc973b7SBarry Smith PetscFunctionReturn(0); 575bcc973b7SBarry Smith } 576bcc973b7SBarry Smith 577dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 578bcc973b7SBarry Smith { 5794cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 580f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 581d2840e09SBarry Smith const PetscScalar *x,*v; 582d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 583dfbe8321SBarry Smith PetscErrorCode ierr; 584b24ad042SBarry Smith PetscInt n,i,jrow,j; 585d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 586f9fae5adSBarry Smith 587f9fae5adSBarry Smith PetscFunctionBegin; 588f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5893649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5901ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 591f9fae5adSBarry Smith idx = a->j; 592f9fae5adSBarry Smith v = a->a; 593f9fae5adSBarry Smith ii = a->i; 594f9fae5adSBarry Smith 595f9fae5adSBarry Smith for (i=0; i<m; i++) { 596f9fae5adSBarry Smith jrow = ii[i]; 597f9fae5adSBarry Smith n = ii[i+1] - jrow; 598f9fae5adSBarry Smith sum1 = 0.0; 599f9fae5adSBarry Smith sum2 = 0.0; 600f9fae5adSBarry Smith sum3 = 0.0; 601f9fae5adSBarry Smith sum4 = 0.0; 602f9fae5adSBarry Smith for (j=0; j<n; j++) { 603f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 604f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 605f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 606f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 607f9fae5adSBarry Smith jrow++; 608f9fae5adSBarry Smith } 609f9fae5adSBarry Smith y[4*i] += sum1; 610f9fae5adSBarry Smith y[4*i+1] += sum2; 611f9fae5adSBarry Smith y[4*i+2] += sum3; 612f9fae5adSBarry Smith y[4*i+3] += sum4; 613f9fae5adSBarry Smith } 614f9fae5adSBarry Smith 615dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6163649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6171ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 618f9fae5adSBarry Smith PetscFunctionReturn(0); 619bcc973b7SBarry Smith } 620dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 621bcc973b7SBarry Smith { 6224cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 623f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 624d2840e09SBarry Smith const PetscScalar *x,*v; 625d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 626dfbe8321SBarry Smith PetscErrorCode ierr; 627d2840e09SBarry Smith PetscInt n,i; 628d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 629f9fae5adSBarry Smith 630f9fae5adSBarry Smith PetscFunctionBegin; 631f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6323649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6331ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 634bfec09a0SHong Zhang 635f9fae5adSBarry Smith for (i=0; i<m; i++) { 636bfec09a0SHong Zhang idx = a->j + a->i[i]; 637bfec09a0SHong Zhang v = a->a + a->i[i]; 638f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 639f9fae5adSBarry Smith alpha1 = x[4*i]; 640f9fae5adSBarry Smith alpha2 = x[4*i+1]; 641f9fae5adSBarry Smith alpha3 = x[4*i+2]; 642f9fae5adSBarry Smith alpha4 = x[4*i+3]; 643f9fae5adSBarry Smith while (n-->0) { 644f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 645f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 646f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 647f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 648f9fae5adSBarry Smith idx++; v++; 649f9fae5adSBarry Smith } 650f9fae5adSBarry Smith } 651dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6523649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6531ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 654f9fae5adSBarry Smith PetscFunctionReturn(0); 655f9fae5adSBarry Smith } 656f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6576cd98798SBarry Smith 658dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 659f9fae5adSBarry Smith { 6604cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 661f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 662d2840e09SBarry Smith const PetscScalar *x,*v; 663d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 664dfbe8321SBarry Smith PetscErrorCode ierr; 665d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 666d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 667f9fae5adSBarry Smith 668f9fae5adSBarry Smith PetscFunctionBegin; 6693649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6701ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 671f9fae5adSBarry Smith idx = a->j; 672f9fae5adSBarry Smith v = a->a; 673f9fae5adSBarry Smith ii = a->i; 674f9fae5adSBarry Smith 675f9fae5adSBarry Smith for (i=0; i<m; i++) { 676f9fae5adSBarry Smith jrow = ii[i]; 677f9fae5adSBarry Smith n = ii[i+1] - jrow; 678f9fae5adSBarry Smith sum1 = 0.0; 679f9fae5adSBarry Smith sum2 = 0.0; 680f9fae5adSBarry Smith sum3 = 0.0; 681f9fae5adSBarry Smith sum4 = 0.0; 682f9fae5adSBarry Smith sum5 = 0.0; 68326fbe8dcSKarl Rupp 684b3c51c66SMatthew Knepley nonzerorow += (n>0); 685f9fae5adSBarry Smith for (j=0; j<n; j++) { 686f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 687f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 688f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 689f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 690f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 691f9fae5adSBarry Smith jrow++; 692f9fae5adSBarry Smith } 693f9fae5adSBarry Smith y[5*i] = sum1; 694f9fae5adSBarry Smith y[5*i+1] = sum2; 695f9fae5adSBarry Smith y[5*i+2] = sum3; 696f9fae5adSBarry Smith y[5*i+3] = sum4; 697f9fae5adSBarry Smith y[5*i+4] = sum5; 698f9fae5adSBarry Smith } 699f9fae5adSBarry Smith 700dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 7013649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7021ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 703f9fae5adSBarry Smith PetscFunctionReturn(0); 704f9fae5adSBarry Smith } 705f9fae5adSBarry Smith 706dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 707f9fae5adSBarry Smith { 7084cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 709f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 710d2840e09SBarry Smith const PetscScalar *x,*v; 711d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 712dfbe8321SBarry Smith PetscErrorCode ierr; 713d2840e09SBarry Smith PetscInt n,i; 714d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 715f9fae5adSBarry Smith 716f9fae5adSBarry Smith PetscFunctionBegin; 717d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 7183649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7191ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 720bfec09a0SHong Zhang 721f9fae5adSBarry Smith for (i=0; i<m; i++) { 722bfec09a0SHong Zhang idx = a->j + a->i[i]; 723bfec09a0SHong Zhang v = a->a + a->i[i]; 724f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 725f9fae5adSBarry Smith alpha1 = x[5*i]; 726f9fae5adSBarry Smith alpha2 = x[5*i+1]; 727f9fae5adSBarry Smith alpha3 = x[5*i+2]; 728f9fae5adSBarry Smith alpha4 = x[5*i+3]; 729f9fae5adSBarry Smith alpha5 = x[5*i+4]; 730f9fae5adSBarry Smith while (n-->0) { 731f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 732f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 733f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 734f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 735f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 736f9fae5adSBarry Smith idx++; v++; 737f9fae5adSBarry Smith } 738f9fae5adSBarry Smith } 739dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7403649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7411ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 742f9fae5adSBarry Smith PetscFunctionReturn(0); 743f9fae5adSBarry Smith } 744f9fae5adSBarry Smith 745dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 746f9fae5adSBarry Smith { 7474cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 748f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 749d2840e09SBarry Smith const PetscScalar *x,*v; 750d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 751dfbe8321SBarry Smith PetscErrorCode ierr; 752b24ad042SBarry Smith PetscInt n,i,jrow,j; 753d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 754f9fae5adSBarry Smith 755f9fae5adSBarry Smith PetscFunctionBegin; 756f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7573649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7581ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 759f9fae5adSBarry Smith idx = a->j; 760f9fae5adSBarry Smith v = a->a; 761f9fae5adSBarry Smith ii = a->i; 762f9fae5adSBarry Smith 763f9fae5adSBarry Smith for (i=0; i<m; i++) { 764f9fae5adSBarry Smith jrow = ii[i]; 765f9fae5adSBarry Smith n = ii[i+1] - jrow; 766f9fae5adSBarry Smith sum1 = 0.0; 767f9fae5adSBarry Smith sum2 = 0.0; 768f9fae5adSBarry Smith sum3 = 0.0; 769f9fae5adSBarry Smith sum4 = 0.0; 770f9fae5adSBarry Smith sum5 = 0.0; 771f9fae5adSBarry Smith for (j=0; j<n; j++) { 772f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 773f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 774f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 775f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 776f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 777f9fae5adSBarry Smith jrow++; 778f9fae5adSBarry Smith } 779f9fae5adSBarry Smith y[5*i] += sum1; 780f9fae5adSBarry Smith y[5*i+1] += sum2; 781f9fae5adSBarry Smith y[5*i+2] += sum3; 782f9fae5adSBarry Smith y[5*i+3] += sum4; 783f9fae5adSBarry Smith y[5*i+4] += sum5; 784f9fae5adSBarry Smith } 785f9fae5adSBarry Smith 786dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7873649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7881ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 789f9fae5adSBarry Smith PetscFunctionReturn(0); 790f9fae5adSBarry Smith } 791f9fae5adSBarry Smith 792dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 793f9fae5adSBarry Smith { 7944cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 795f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 796d2840e09SBarry Smith const PetscScalar *x,*v; 797d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 798dfbe8321SBarry Smith PetscErrorCode ierr; 799d2840e09SBarry Smith PetscInt n,i; 800d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 801f9fae5adSBarry Smith 802f9fae5adSBarry Smith PetscFunctionBegin; 803f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8043649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8051ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 806bfec09a0SHong Zhang 807f9fae5adSBarry Smith for (i=0; i<m; i++) { 808bfec09a0SHong Zhang idx = a->j + a->i[i]; 809bfec09a0SHong Zhang v = a->a + a->i[i]; 810f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 811f9fae5adSBarry Smith alpha1 = x[5*i]; 812f9fae5adSBarry Smith alpha2 = x[5*i+1]; 813f9fae5adSBarry Smith alpha3 = x[5*i+2]; 814f9fae5adSBarry Smith alpha4 = x[5*i+3]; 815f9fae5adSBarry Smith alpha5 = x[5*i+4]; 816f9fae5adSBarry Smith while (n-->0) { 817f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 818f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 819f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 820f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 821f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 822f9fae5adSBarry Smith idx++; v++; 823f9fae5adSBarry Smith } 824f9fae5adSBarry Smith } 825dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8263649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8271ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 828f9fae5adSBarry Smith PetscFunctionReturn(0); 829bcc973b7SBarry Smith } 830bcc973b7SBarry Smith 8316cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 832dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8336cd98798SBarry Smith { 8346cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8356cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 836d2840e09SBarry Smith const PetscScalar *x,*v; 837d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 838dfbe8321SBarry Smith PetscErrorCode ierr; 839d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 840d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 8416cd98798SBarry Smith 8426cd98798SBarry Smith PetscFunctionBegin; 8433649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8441ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8456cd98798SBarry Smith idx = a->j; 8466cd98798SBarry Smith v = a->a; 8476cd98798SBarry Smith ii = a->i; 8486cd98798SBarry Smith 8496cd98798SBarry Smith for (i=0; i<m; i++) { 8506cd98798SBarry Smith jrow = ii[i]; 8516cd98798SBarry Smith n = ii[i+1] - jrow; 8526cd98798SBarry Smith sum1 = 0.0; 8536cd98798SBarry Smith sum2 = 0.0; 8546cd98798SBarry Smith sum3 = 0.0; 8556cd98798SBarry Smith sum4 = 0.0; 8566cd98798SBarry Smith sum5 = 0.0; 8576cd98798SBarry Smith sum6 = 0.0; 85826fbe8dcSKarl Rupp 859b3c51c66SMatthew Knepley nonzerorow += (n>0); 8606cd98798SBarry Smith for (j=0; j<n; j++) { 8616cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8626cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8636cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8646cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8656cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8666cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8676cd98798SBarry Smith jrow++; 8686cd98798SBarry Smith } 8696cd98798SBarry Smith y[6*i] = sum1; 8706cd98798SBarry Smith y[6*i+1] = sum2; 8716cd98798SBarry Smith y[6*i+2] = sum3; 8726cd98798SBarry Smith y[6*i+3] = sum4; 8736cd98798SBarry Smith y[6*i+4] = sum5; 8746cd98798SBarry Smith y[6*i+5] = sum6; 8756cd98798SBarry Smith } 8766cd98798SBarry Smith 877dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 8783649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8791ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8806cd98798SBarry Smith PetscFunctionReturn(0); 8816cd98798SBarry Smith } 8826cd98798SBarry Smith 883dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8846cd98798SBarry Smith { 8856cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8866cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 887d2840e09SBarry Smith const PetscScalar *x,*v; 888d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 889dfbe8321SBarry Smith PetscErrorCode ierr; 890d2840e09SBarry Smith PetscInt n,i; 891d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 8926cd98798SBarry Smith 8936cd98798SBarry Smith PetscFunctionBegin; 894d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 8953649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8961ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 897bfec09a0SHong Zhang 8986cd98798SBarry Smith for (i=0; i<m; i++) { 899bfec09a0SHong Zhang idx = a->j + a->i[i]; 900bfec09a0SHong Zhang v = a->a + a->i[i]; 9016cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9026cd98798SBarry Smith alpha1 = x[6*i]; 9036cd98798SBarry Smith alpha2 = x[6*i+1]; 9046cd98798SBarry Smith alpha3 = x[6*i+2]; 9056cd98798SBarry Smith alpha4 = x[6*i+3]; 9066cd98798SBarry Smith alpha5 = x[6*i+4]; 9076cd98798SBarry Smith alpha6 = x[6*i+5]; 9086cd98798SBarry Smith while (n-->0) { 9096cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9106cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9116cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9126cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9136cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9146cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9156cd98798SBarry Smith idx++; v++; 9166cd98798SBarry Smith } 9176cd98798SBarry Smith } 918dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9201ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9216cd98798SBarry Smith PetscFunctionReturn(0); 9226cd98798SBarry Smith } 9236cd98798SBarry Smith 924dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9256cd98798SBarry Smith { 9266cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9276cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 928d2840e09SBarry Smith const PetscScalar *x,*v; 929d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 930dfbe8321SBarry Smith PetscErrorCode ierr; 931b24ad042SBarry Smith PetscInt n,i,jrow,j; 932d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 9336cd98798SBarry Smith 9346cd98798SBarry Smith PetscFunctionBegin; 9356cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9363649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9371ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9386cd98798SBarry Smith idx = a->j; 9396cd98798SBarry Smith v = a->a; 9406cd98798SBarry Smith ii = a->i; 9416cd98798SBarry Smith 9426cd98798SBarry Smith for (i=0; i<m; i++) { 9436cd98798SBarry Smith jrow = ii[i]; 9446cd98798SBarry Smith n = ii[i+1] - jrow; 9456cd98798SBarry Smith sum1 = 0.0; 9466cd98798SBarry Smith sum2 = 0.0; 9476cd98798SBarry Smith sum3 = 0.0; 9486cd98798SBarry Smith sum4 = 0.0; 9496cd98798SBarry Smith sum5 = 0.0; 9506cd98798SBarry Smith sum6 = 0.0; 9516cd98798SBarry Smith for (j=0; j<n; j++) { 9526cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9536cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9546cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9556cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9566cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9576cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9586cd98798SBarry Smith jrow++; 9596cd98798SBarry Smith } 9606cd98798SBarry Smith y[6*i] += sum1; 9616cd98798SBarry Smith y[6*i+1] += sum2; 9626cd98798SBarry Smith y[6*i+2] += sum3; 9636cd98798SBarry Smith y[6*i+3] += sum4; 9646cd98798SBarry Smith y[6*i+4] += sum5; 9656cd98798SBarry Smith y[6*i+5] += sum6; 9666cd98798SBarry Smith } 9676cd98798SBarry Smith 968dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9693649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9701ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9716cd98798SBarry Smith PetscFunctionReturn(0); 9726cd98798SBarry Smith } 9736cd98798SBarry Smith 974dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9756cd98798SBarry Smith { 9766cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9776cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 978d2840e09SBarry Smith const PetscScalar *x,*v; 979d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 980dfbe8321SBarry Smith PetscErrorCode ierr; 981d2840e09SBarry Smith PetscInt n,i; 982d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 9836cd98798SBarry Smith 9846cd98798SBarry Smith PetscFunctionBegin; 9856cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9871ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 988bfec09a0SHong Zhang 9896cd98798SBarry Smith for (i=0; i<m; i++) { 990bfec09a0SHong Zhang idx = a->j + a->i[i]; 991bfec09a0SHong Zhang v = a->a + a->i[i]; 9926cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9936cd98798SBarry Smith alpha1 = x[6*i]; 9946cd98798SBarry Smith alpha2 = x[6*i+1]; 9956cd98798SBarry Smith alpha3 = x[6*i+2]; 9966cd98798SBarry Smith alpha4 = x[6*i+3]; 9976cd98798SBarry Smith alpha5 = x[6*i+4]; 9986cd98798SBarry Smith alpha6 = x[6*i+5]; 9996cd98798SBarry Smith while (n-->0) { 10006cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 10016cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 10026cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 10036cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 10046cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 10056cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 10066cd98798SBarry Smith idx++; v++; 10076cd98798SBarry Smith } 10086cd98798SBarry Smith } 1009dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 10103649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 10111ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10126cd98798SBarry Smith PetscFunctionReturn(0); 10136cd98798SBarry Smith } 10146cd98798SBarry Smith 101566ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 1016ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1017ed8eea03SBarry Smith { 1018ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1019ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1020d2840e09SBarry Smith const PetscScalar *x,*v; 1021d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1022ed8eea03SBarry Smith PetscErrorCode ierr; 1023d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1024d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1025ed8eea03SBarry Smith 1026ed8eea03SBarry Smith PetscFunctionBegin; 10273649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1028ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1029ed8eea03SBarry Smith idx = a->j; 1030ed8eea03SBarry Smith v = a->a; 1031ed8eea03SBarry Smith ii = a->i; 1032ed8eea03SBarry Smith 1033ed8eea03SBarry Smith for (i=0; i<m; i++) { 1034ed8eea03SBarry Smith jrow = ii[i]; 1035ed8eea03SBarry Smith n = ii[i+1] - jrow; 1036ed8eea03SBarry Smith sum1 = 0.0; 1037ed8eea03SBarry Smith sum2 = 0.0; 1038ed8eea03SBarry Smith sum3 = 0.0; 1039ed8eea03SBarry Smith sum4 = 0.0; 1040ed8eea03SBarry Smith sum5 = 0.0; 1041ed8eea03SBarry Smith sum6 = 0.0; 1042ed8eea03SBarry Smith sum7 = 0.0; 104326fbe8dcSKarl Rupp 1044b3c51c66SMatthew Knepley nonzerorow += (n>0); 1045ed8eea03SBarry Smith for (j=0; j<n; j++) { 1046ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1047ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1048ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1049ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1050ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1051ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1052ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1053ed8eea03SBarry Smith jrow++; 1054ed8eea03SBarry Smith } 1055ed8eea03SBarry Smith y[7*i] = sum1; 1056ed8eea03SBarry Smith y[7*i+1] = sum2; 1057ed8eea03SBarry Smith y[7*i+2] = sum3; 1058ed8eea03SBarry Smith y[7*i+3] = sum4; 1059ed8eea03SBarry Smith y[7*i+4] = sum5; 1060ed8eea03SBarry Smith y[7*i+5] = sum6; 1061ed8eea03SBarry Smith y[7*i+6] = sum7; 1062ed8eea03SBarry Smith } 1063ed8eea03SBarry Smith 1064dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 10653649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1066ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1067ed8eea03SBarry Smith PetscFunctionReturn(0); 1068ed8eea03SBarry Smith } 1069ed8eea03SBarry Smith 1070ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1071ed8eea03SBarry Smith { 1072ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1073ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1074d2840e09SBarry Smith const PetscScalar *x,*v; 1075d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1076ed8eea03SBarry Smith PetscErrorCode ierr; 1077d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1078d2840e09SBarry Smith PetscInt n,i; 1079ed8eea03SBarry Smith 1080ed8eea03SBarry Smith PetscFunctionBegin; 1081d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 10823649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1083ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1084ed8eea03SBarry Smith 1085ed8eea03SBarry Smith for (i=0; i<m; i++) { 1086ed8eea03SBarry Smith idx = a->j + a->i[i]; 1087ed8eea03SBarry Smith v = a->a + a->i[i]; 1088ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1089ed8eea03SBarry Smith alpha1 = x[7*i]; 1090ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1091ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1092ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1093ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1094ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1095ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1096ed8eea03SBarry Smith while (n-->0) { 1097ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1098ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1099ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1100ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1101ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1102ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1103ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1104ed8eea03SBarry Smith idx++; v++; 1105ed8eea03SBarry Smith } 1106ed8eea03SBarry Smith } 1107dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11083649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1109ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1110ed8eea03SBarry Smith PetscFunctionReturn(0); 1111ed8eea03SBarry Smith } 1112ed8eea03SBarry Smith 1113ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1114ed8eea03SBarry Smith { 1115ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1116ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1117d2840e09SBarry Smith const PetscScalar *x,*v; 1118d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1119ed8eea03SBarry Smith PetscErrorCode ierr; 1120d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1121ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1122ed8eea03SBarry Smith 1123ed8eea03SBarry Smith PetscFunctionBegin; 1124ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11253649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1126ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1127ed8eea03SBarry Smith idx = a->j; 1128ed8eea03SBarry Smith v = a->a; 1129ed8eea03SBarry Smith ii = a->i; 1130ed8eea03SBarry Smith 1131ed8eea03SBarry Smith for (i=0; i<m; i++) { 1132ed8eea03SBarry Smith jrow = ii[i]; 1133ed8eea03SBarry Smith n = ii[i+1] - jrow; 1134ed8eea03SBarry Smith sum1 = 0.0; 1135ed8eea03SBarry Smith sum2 = 0.0; 1136ed8eea03SBarry Smith sum3 = 0.0; 1137ed8eea03SBarry Smith sum4 = 0.0; 1138ed8eea03SBarry Smith sum5 = 0.0; 1139ed8eea03SBarry Smith sum6 = 0.0; 1140ed8eea03SBarry Smith sum7 = 0.0; 1141ed8eea03SBarry Smith for (j=0; j<n; j++) { 1142ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1143ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1144ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1145ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1146ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1147ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1148ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1149ed8eea03SBarry Smith jrow++; 1150ed8eea03SBarry Smith } 1151ed8eea03SBarry Smith y[7*i] += sum1; 1152ed8eea03SBarry Smith y[7*i+1] += sum2; 1153ed8eea03SBarry Smith y[7*i+2] += sum3; 1154ed8eea03SBarry Smith y[7*i+3] += sum4; 1155ed8eea03SBarry Smith y[7*i+4] += sum5; 1156ed8eea03SBarry Smith y[7*i+5] += sum6; 1157ed8eea03SBarry Smith y[7*i+6] += sum7; 1158ed8eea03SBarry Smith } 1159ed8eea03SBarry Smith 1160dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11613649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1162ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1163ed8eea03SBarry Smith PetscFunctionReturn(0); 1164ed8eea03SBarry Smith } 1165ed8eea03SBarry Smith 1166ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1167ed8eea03SBarry Smith { 1168ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1169ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1170d2840e09SBarry Smith const PetscScalar *x,*v; 1171d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1172ed8eea03SBarry Smith PetscErrorCode ierr; 1173d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1174d2840e09SBarry Smith PetscInt n,i; 1175ed8eea03SBarry Smith 1176ed8eea03SBarry Smith PetscFunctionBegin; 1177ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11783649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1179ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1180ed8eea03SBarry Smith for (i=0; i<m; i++) { 1181ed8eea03SBarry Smith idx = a->j + a->i[i]; 1182ed8eea03SBarry Smith v = a->a + a->i[i]; 1183ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1184ed8eea03SBarry Smith alpha1 = x[7*i]; 1185ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1186ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1187ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1188ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1189ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1190ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1191ed8eea03SBarry Smith while (n-->0) { 1192ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1193ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1194ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1195ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1196ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1197ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1198ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1199ed8eea03SBarry Smith idx++; v++; 1200ed8eea03SBarry Smith } 1201ed8eea03SBarry Smith } 1202dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 12033649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1204ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1205ed8eea03SBarry Smith PetscFunctionReturn(0); 1206ed8eea03SBarry Smith } 1207ed8eea03SBarry Smith 1208dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 120966ed3db0SBarry Smith { 121066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 121166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1212d2840e09SBarry Smith const PetscScalar *x,*v; 1213d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1214dfbe8321SBarry Smith PetscErrorCode ierr; 1215d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1216d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 121766ed3db0SBarry Smith 121866ed3db0SBarry Smith PetscFunctionBegin; 12193649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12201ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 122166ed3db0SBarry Smith idx = a->j; 122266ed3db0SBarry Smith v = a->a; 122366ed3db0SBarry Smith ii = a->i; 122466ed3db0SBarry Smith 122566ed3db0SBarry Smith for (i=0; i<m; i++) { 122666ed3db0SBarry Smith jrow = ii[i]; 122766ed3db0SBarry Smith n = ii[i+1] - jrow; 122866ed3db0SBarry Smith sum1 = 0.0; 122966ed3db0SBarry Smith sum2 = 0.0; 123066ed3db0SBarry Smith sum3 = 0.0; 123166ed3db0SBarry Smith sum4 = 0.0; 123266ed3db0SBarry Smith sum5 = 0.0; 123366ed3db0SBarry Smith sum6 = 0.0; 123466ed3db0SBarry Smith sum7 = 0.0; 123566ed3db0SBarry Smith sum8 = 0.0; 123626fbe8dcSKarl Rupp 1237b3c51c66SMatthew Knepley nonzerorow += (n>0); 123866ed3db0SBarry Smith for (j=0; j<n; j++) { 123966ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 124066ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 124166ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 124266ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 124366ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 124466ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 124566ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 124666ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 124766ed3db0SBarry Smith jrow++; 124866ed3db0SBarry Smith } 124966ed3db0SBarry Smith y[8*i] = sum1; 125066ed3db0SBarry Smith y[8*i+1] = sum2; 125166ed3db0SBarry Smith y[8*i+2] = sum3; 125266ed3db0SBarry Smith y[8*i+3] = sum4; 125366ed3db0SBarry Smith y[8*i+4] = sum5; 125466ed3db0SBarry Smith y[8*i+5] = sum6; 125566ed3db0SBarry Smith y[8*i+6] = sum7; 125666ed3db0SBarry Smith y[8*i+7] = sum8; 125766ed3db0SBarry Smith } 125866ed3db0SBarry Smith 1259dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 12603649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 12611ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 126266ed3db0SBarry Smith PetscFunctionReturn(0); 126366ed3db0SBarry Smith } 126466ed3db0SBarry Smith 1265dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 126666ed3db0SBarry Smith { 126766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 126866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1269d2840e09SBarry Smith const PetscScalar *x,*v; 1270d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1271dfbe8321SBarry Smith PetscErrorCode ierr; 1272d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1273d2840e09SBarry Smith PetscInt n,i; 127466ed3db0SBarry Smith 127566ed3db0SBarry Smith PetscFunctionBegin; 1276d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 12773649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12781ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1279bfec09a0SHong Zhang 128066ed3db0SBarry Smith for (i=0; i<m; i++) { 1281bfec09a0SHong Zhang idx = a->j + a->i[i]; 1282bfec09a0SHong Zhang v = a->a + a->i[i]; 128366ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 128466ed3db0SBarry Smith alpha1 = x[8*i]; 128566ed3db0SBarry Smith alpha2 = x[8*i+1]; 128666ed3db0SBarry Smith alpha3 = x[8*i+2]; 128766ed3db0SBarry Smith alpha4 = x[8*i+3]; 128866ed3db0SBarry Smith alpha5 = x[8*i+4]; 128966ed3db0SBarry Smith alpha6 = x[8*i+5]; 129066ed3db0SBarry Smith alpha7 = x[8*i+6]; 129166ed3db0SBarry Smith alpha8 = x[8*i+7]; 129266ed3db0SBarry Smith while (n-->0) { 129366ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 129466ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 129566ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 129666ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 129766ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 129866ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 129966ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 130066ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 130166ed3db0SBarry Smith idx++; v++; 130266ed3db0SBarry Smith } 130366ed3db0SBarry Smith } 1304dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13061ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 130766ed3db0SBarry Smith PetscFunctionReturn(0); 130866ed3db0SBarry Smith } 130966ed3db0SBarry Smith 1310dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 131166ed3db0SBarry Smith { 131266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 131366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1314d2840e09SBarry Smith const PetscScalar *x,*v; 1315d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1316dfbe8321SBarry Smith PetscErrorCode ierr; 1317d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1318b24ad042SBarry Smith PetscInt n,i,jrow,j; 131966ed3db0SBarry Smith 132066ed3db0SBarry Smith PetscFunctionBegin; 132166ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13223649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13231ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 132466ed3db0SBarry Smith idx = a->j; 132566ed3db0SBarry Smith v = a->a; 132666ed3db0SBarry Smith ii = a->i; 132766ed3db0SBarry Smith 132866ed3db0SBarry Smith for (i=0; i<m; i++) { 132966ed3db0SBarry Smith jrow = ii[i]; 133066ed3db0SBarry Smith n = ii[i+1] - jrow; 133166ed3db0SBarry Smith sum1 = 0.0; 133266ed3db0SBarry Smith sum2 = 0.0; 133366ed3db0SBarry Smith sum3 = 0.0; 133466ed3db0SBarry Smith sum4 = 0.0; 133566ed3db0SBarry Smith sum5 = 0.0; 133666ed3db0SBarry Smith sum6 = 0.0; 133766ed3db0SBarry Smith sum7 = 0.0; 133866ed3db0SBarry Smith sum8 = 0.0; 133966ed3db0SBarry Smith for (j=0; j<n; j++) { 134066ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 134166ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 134266ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 134366ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 134466ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 134566ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 134666ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 134766ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 134866ed3db0SBarry Smith jrow++; 134966ed3db0SBarry Smith } 135066ed3db0SBarry Smith y[8*i] += sum1; 135166ed3db0SBarry Smith y[8*i+1] += sum2; 135266ed3db0SBarry Smith y[8*i+2] += sum3; 135366ed3db0SBarry Smith y[8*i+3] += sum4; 135466ed3db0SBarry Smith y[8*i+4] += sum5; 135566ed3db0SBarry Smith y[8*i+5] += sum6; 135666ed3db0SBarry Smith y[8*i+6] += sum7; 135766ed3db0SBarry Smith y[8*i+7] += sum8; 135866ed3db0SBarry Smith } 135966ed3db0SBarry Smith 1360dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13613649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13621ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 136366ed3db0SBarry Smith PetscFunctionReturn(0); 136466ed3db0SBarry Smith } 136566ed3db0SBarry Smith 1366dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 136766ed3db0SBarry Smith { 136866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 136966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1370d2840e09SBarry Smith const PetscScalar *x,*v; 1371d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1372dfbe8321SBarry Smith PetscErrorCode ierr; 1373d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1374d2840e09SBarry Smith PetscInt n,i; 137566ed3db0SBarry Smith 137666ed3db0SBarry Smith PetscFunctionBegin; 137766ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13783649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13791ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 138066ed3db0SBarry Smith for (i=0; i<m; i++) { 1381bfec09a0SHong Zhang idx = a->j + a->i[i]; 1382bfec09a0SHong Zhang v = a->a + a->i[i]; 138366ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 138466ed3db0SBarry Smith alpha1 = x[8*i]; 138566ed3db0SBarry Smith alpha2 = x[8*i+1]; 138666ed3db0SBarry Smith alpha3 = x[8*i+2]; 138766ed3db0SBarry Smith alpha4 = x[8*i+3]; 138866ed3db0SBarry Smith alpha5 = x[8*i+4]; 138966ed3db0SBarry Smith alpha6 = x[8*i+5]; 139066ed3db0SBarry Smith alpha7 = x[8*i+6]; 139166ed3db0SBarry Smith alpha8 = x[8*i+7]; 139266ed3db0SBarry Smith while (n-->0) { 139366ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 139466ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 139566ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 139666ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 139766ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 139866ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 139966ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 140066ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 140166ed3db0SBarry Smith idx++; v++; 140266ed3db0SBarry Smith } 140366ed3db0SBarry Smith } 1404dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 14053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14061ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 14072f7816d4SBarry Smith PetscFunctionReturn(0); 14082f7816d4SBarry Smith } 14092f7816d4SBarry Smith 14102b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 1411dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14122b692628SMatthew Knepley { 14132b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14142b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1415d2840e09SBarry Smith const PetscScalar *x,*v; 1416d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1417dfbe8321SBarry Smith PetscErrorCode ierr; 1418d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1419d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 14202b692628SMatthew Knepley 14212b692628SMatthew Knepley PetscFunctionBegin; 14223649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14231ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14242b692628SMatthew Knepley idx = a->j; 14252b692628SMatthew Knepley v = a->a; 14262b692628SMatthew Knepley ii = a->i; 14272b692628SMatthew Knepley 14282b692628SMatthew Knepley for (i=0; i<m; i++) { 14292b692628SMatthew Knepley jrow = ii[i]; 14302b692628SMatthew Knepley n = ii[i+1] - jrow; 14312b692628SMatthew Knepley sum1 = 0.0; 14322b692628SMatthew Knepley sum2 = 0.0; 14332b692628SMatthew Knepley sum3 = 0.0; 14342b692628SMatthew Knepley sum4 = 0.0; 14352b692628SMatthew Knepley sum5 = 0.0; 14362b692628SMatthew Knepley sum6 = 0.0; 14372b692628SMatthew Knepley sum7 = 0.0; 14382b692628SMatthew Knepley sum8 = 0.0; 14392b692628SMatthew Knepley sum9 = 0.0; 144026fbe8dcSKarl Rupp 1441b3c51c66SMatthew Knepley nonzerorow += (n>0); 14422b692628SMatthew Knepley for (j=0; j<n; j++) { 14432b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14442b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14452b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14462b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14472b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14482b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14492b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14502b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14512b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14522b692628SMatthew Knepley jrow++; 14532b692628SMatthew Knepley } 14542b692628SMatthew Knepley y[9*i] = sum1; 14552b692628SMatthew Knepley y[9*i+1] = sum2; 14562b692628SMatthew Knepley y[9*i+2] = sum3; 14572b692628SMatthew Knepley y[9*i+3] = sum4; 14582b692628SMatthew Knepley y[9*i+4] = sum5; 14592b692628SMatthew Knepley y[9*i+5] = sum6; 14602b692628SMatthew Knepley y[9*i+6] = sum7; 14612b692628SMatthew Knepley y[9*i+7] = sum8; 14622b692628SMatthew Knepley y[9*i+8] = sum9; 14632b692628SMatthew Knepley } 14642b692628SMatthew Knepley 1465dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 14663649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14671ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14682b692628SMatthew Knepley PetscFunctionReturn(0); 14692b692628SMatthew Knepley } 14702b692628SMatthew Knepley 1471b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1472b9cda22cSBarry Smith 1473dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14742b692628SMatthew Knepley { 14752b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14762b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1477d2840e09SBarry Smith const PetscScalar *x,*v; 1478d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1479dfbe8321SBarry Smith PetscErrorCode ierr; 1480d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1481d2840e09SBarry Smith PetscInt n,i; 14822b692628SMatthew Knepley 14832b692628SMatthew Knepley PetscFunctionBegin; 1484d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 14853649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14861ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14872b692628SMatthew Knepley 14882b692628SMatthew Knepley for (i=0; i<m; i++) { 14892b692628SMatthew Knepley idx = a->j + a->i[i]; 14902b692628SMatthew Knepley v = a->a + a->i[i]; 14912b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14922b692628SMatthew Knepley alpha1 = x[9*i]; 14932b692628SMatthew Knepley alpha2 = x[9*i+1]; 14942b692628SMatthew Knepley alpha3 = x[9*i+2]; 14952b692628SMatthew Knepley alpha4 = x[9*i+3]; 14962b692628SMatthew Knepley alpha5 = x[9*i+4]; 14972b692628SMatthew Knepley alpha6 = x[9*i+5]; 14982b692628SMatthew Knepley alpha7 = x[9*i+6]; 14992b692628SMatthew Knepley alpha8 = x[9*i+7]; 15002f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 15012b692628SMatthew Knepley while (n-->0) { 15022b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15032b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15042b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15052b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15062b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15072b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15082b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15092b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15102b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15112b692628SMatthew Knepley idx++; v++; 15122b692628SMatthew Knepley } 15132b692628SMatthew Knepley } 1514dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 15153649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15161ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15172b692628SMatthew Knepley PetscFunctionReturn(0); 15182b692628SMatthew Knepley } 15192b692628SMatthew Knepley 1520dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15212b692628SMatthew Knepley { 15222b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15232b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1524d2840e09SBarry Smith const PetscScalar *x,*v; 1525d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1526dfbe8321SBarry Smith PetscErrorCode ierr; 1527d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1528b24ad042SBarry Smith PetscInt n,i,jrow,j; 15292b692628SMatthew Knepley 15302b692628SMatthew Knepley PetscFunctionBegin; 15312b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15323649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15331ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15342b692628SMatthew Knepley idx = a->j; 15352b692628SMatthew Knepley v = a->a; 15362b692628SMatthew Knepley ii = a->i; 15372b692628SMatthew Knepley 15382b692628SMatthew Knepley for (i=0; i<m; i++) { 15392b692628SMatthew Knepley jrow = ii[i]; 15402b692628SMatthew Knepley n = ii[i+1] - jrow; 15412b692628SMatthew Knepley sum1 = 0.0; 15422b692628SMatthew Knepley sum2 = 0.0; 15432b692628SMatthew Knepley sum3 = 0.0; 15442b692628SMatthew Knepley sum4 = 0.0; 15452b692628SMatthew Knepley sum5 = 0.0; 15462b692628SMatthew Knepley sum6 = 0.0; 15472b692628SMatthew Knepley sum7 = 0.0; 15482b692628SMatthew Knepley sum8 = 0.0; 15492b692628SMatthew Knepley sum9 = 0.0; 15502b692628SMatthew Knepley for (j=0; j<n; j++) { 15512b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15522b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15532b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15542b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15552b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15562b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15572b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15582b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15592b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15602b692628SMatthew Knepley jrow++; 15612b692628SMatthew Knepley } 15622b692628SMatthew Knepley y[9*i] += sum1; 15632b692628SMatthew Knepley y[9*i+1] += sum2; 15642b692628SMatthew Knepley y[9*i+2] += sum3; 15652b692628SMatthew Knepley y[9*i+3] += sum4; 15662b692628SMatthew Knepley y[9*i+4] += sum5; 15672b692628SMatthew Knepley y[9*i+5] += sum6; 15682b692628SMatthew Knepley y[9*i+6] += sum7; 15692b692628SMatthew Knepley y[9*i+7] += sum8; 15702b692628SMatthew Knepley y[9*i+8] += sum9; 15712b692628SMatthew Knepley } 15722b692628SMatthew Knepley 1573efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15743649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15751ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15762b692628SMatthew Knepley PetscFunctionReturn(0); 15772b692628SMatthew Knepley } 15782b692628SMatthew Knepley 1579dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15802b692628SMatthew Knepley { 15812b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15822b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1583d2840e09SBarry Smith const PetscScalar *x,*v; 1584d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1585dfbe8321SBarry Smith PetscErrorCode ierr; 1586d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1587d2840e09SBarry Smith PetscInt n,i; 15882b692628SMatthew Knepley 15892b692628SMatthew Knepley PetscFunctionBegin; 15902b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15913649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15921ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15932b692628SMatthew Knepley for (i=0; i<m; i++) { 15942b692628SMatthew Knepley idx = a->j + a->i[i]; 15952b692628SMatthew Knepley v = a->a + a->i[i]; 15962b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15972b692628SMatthew Knepley alpha1 = x[9*i]; 15982b692628SMatthew Knepley alpha2 = x[9*i+1]; 15992b692628SMatthew Knepley alpha3 = x[9*i+2]; 16002b692628SMatthew Knepley alpha4 = x[9*i+3]; 16012b692628SMatthew Knepley alpha5 = x[9*i+4]; 16022b692628SMatthew Knepley alpha6 = x[9*i+5]; 16032b692628SMatthew Knepley alpha7 = x[9*i+6]; 16042b692628SMatthew Knepley alpha8 = x[9*i+7]; 16052b692628SMatthew Knepley alpha9 = x[9*i+8]; 16062b692628SMatthew Knepley while (n-->0) { 16072b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 16082b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 16092b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 16102b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 16112b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 16122b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 16132b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 16142b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 16152b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 16162b692628SMatthew Knepley idx++; v++; 16172b692628SMatthew Knepley } 16182b692628SMatthew Knepley } 1619dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 16203649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16211ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16222b692628SMatthew Knepley PetscFunctionReturn(0); 16232b692628SMatthew Knepley } 1624b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1625b9cda22cSBarry Smith { 1626b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1627b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1628d2840e09SBarry Smith const PetscScalar *x,*v; 1629d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1630b9cda22cSBarry Smith PetscErrorCode ierr; 1631d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1632d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1633b9cda22cSBarry Smith 1634b9cda22cSBarry Smith PetscFunctionBegin; 16353649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1636b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1637b9cda22cSBarry Smith idx = a->j; 1638b9cda22cSBarry Smith v = a->a; 1639b9cda22cSBarry Smith ii = a->i; 1640b9cda22cSBarry Smith 1641b9cda22cSBarry Smith for (i=0; i<m; i++) { 1642b9cda22cSBarry Smith jrow = ii[i]; 1643b9cda22cSBarry Smith n = ii[i+1] - jrow; 1644b9cda22cSBarry Smith sum1 = 0.0; 1645b9cda22cSBarry Smith sum2 = 0.0; 1646b9cda22cSBarry Smith sum3 = 0.0; 1647b9cda22cSBarry Smith sum4 = 0.0; 1648b9cda22cSBarry Smith sum5 = 0.0; 1649b9cda22cSBarry Smith sum6 = 0.0; 1650b9cda22cSBarry Smith sum7 = 0.0; 1651b9cda22cSBarry Smith sum8 = 0.0; 1652b9cda22cSBarry Smith sum9 = 0.0; 1653b9cda22cSBarry Smith sum10 = 0.0; 165426fbe8dcSKarl Rupp 1655b3c51c66SMatthew Knepley nonzerorow += (n>0); 1656b9cda22cSBarry Smith for (j=0; j<n; j++) { 1657b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1658b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1659b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1660b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1661b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1662b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1663b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1664b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1665b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1666b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1667b9cda22cSBarry Smith jrow++; 1668b9cda22cSBarry Smith } 1669b9cda22cSBarry Smith y[10*i] = sum1; 1670b9cda22cSBarry Smith y[10*i+1] = sum2; 1671b9cda22cSBarry Smith y[10*i+2] = sum3; 1672b9cda22cSBarry Smith y[10*i+3] = sum4; 1673b9cda22cSBarry Smith y[10*i+4] = sum5; 1674b9cda22cSBarry Smith y[10*i+5] = sum6; 1675b9cda22cSBarry Smith y[10*i+6] = sum7; 1676b9cda22cSBarry Smith y[10*i+7] = sum8; 1677b9cda22cSBarry Smith y[10*i+8] = sum9; 1678b9cda22cSBarry Smith y[10*i+9] = sum10; 1679b9cda22cSBarry Smith } 1680b9cda22cSBarry Smith 1681dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 16823649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1683b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1684b9cda22cSBarry Smith PetscFunctionReturn(0); 1685b9cda22cSBarry Smith } 1686b9cda22cSBarry Smith 1687b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1688b9cda22cSBarry Smith { 1689b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1690b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1691d2840e09SBarry Smith const PetscScalar *x,*v; 1692d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1693b9cda22cSBarry Smith PetscErrorCode ierr; 1694d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1695b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1696b9cda22cSBarry Smith 1697b9cda22cSBarry Smith PetscFunctionBegin; 1698b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16993649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1700b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1701b9cda22cSBarry Smith idx = a->j; 1702b9cda22cSBarry Smith v = a->a; 1703b9cda22cSBarry Smith ii = a->i; 1704b9cda22cSBarry Smith 1705b9cda22cSBarry Smith for (i=0; i<m; i++) { 1706b9cda22cSBarry Smith jrow = ii[i]; 1707b9cda22cSBarry Smith n = ii[i+1] - jrow; 1708b9cda22cSBarry Smith sum1 = 0.0; 1709b9cda22cSBarry Smith sum2 = 0.0; 1710b9cda22cSBarry Smith sum3 = 0.0; 1711b9cda22cSBarry Smith sum4 = 0.0; 1712b9cda22cSBarry Smith sum5 = 0.0; 1713b9cda22cSBarry Smith sum6 = 0.0; 1714b9cda22cSBarry Smith sum7 = 0.0; 1715b9cda22cSBarry Smith sum8 = 0.0; 1716b9cda22cSBarry Smith sum9 = 0.0; 1717b9cda22cSBarry Smith sum10 = 0.0; 1718b9cda22cSBarry Smith for (j=0; j<n; j++) { 1719b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1720b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1721b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1722b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1723b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1724b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1725b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1726b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1727b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1728b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1729b9cda22cSBarry Smith jrow++; 1730b9cda22cSBarry Smith } 1731b9cda22cSBarry Smith y[10*i] += sum1; 1732b9cda22cSBarry Smith y[10*i+1] += sum2; 1733b9cda22cSBarry Smith y[10*i+2] += sum3; 1734b9cda22cSBarry Smith y[10*i+3] += sum4; 1735b9cda22cSBarry Smith y[10*i+4] += sum5; 1736b9cda22cSBarry Smith y[10*i+5] += sum6; 1737b9cda22cSBarry Smith y[10*i+6] += sum7; 1738b9cda22cSBarry Smith y[10*i+7] += sum8; 1739b9cda22cSBarry Smith y[10*i+8] += sum9; 1740b9cda22cSBarry Smith y[10*i+9] += sum10; 1741b9cda22cSBarry Smith } 1742b9cda22cSBarry Smith 1743dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 17443649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1745b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1746b9cda22cSBarry Smith PetscFunctionReturn(0); 1747b9cda22cSBarry Smith } 1748b9cda22cSBarry Smith 1749b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1750b9cda22cSBarry Smith { 1751b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1752b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1753d2840e09SBarry Smith const PetscScalar *x,*v; 1754d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1755b9cda22cSBarry Smith PetscErrorCode ierr; 1756d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1757d2840e09SBarry Smith PetscInt n,i; 1758b9cda22cSBarry Smith 1759b9cda22cSBarry Smith PetscFunctionBegin; 1760d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 17613649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1762b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1763b9cda22cSBarry Smith 1764b9cda22cSBarry Smith for (i=0; i<m; i++) { 1765b9cda22cSBarry Smith idx = a->j + a->i[i]; 1766b9cda22cSBarry Smith v = a->a + a->i[i]; 1767b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1768e29fdc22SBarry Smith alpha1 = x[10*i]; 1769e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1770e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1771e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1772e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1773e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1774e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1775e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1776e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1777b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1778b9cda22cSBarry Smith while (n-->0) { 1779e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1780e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1781e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1782e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1783e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1784e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1785e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1786e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1787e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1788b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1789b9cda22cSBarry Smith idx++; v++; 1790b9cda22cSBarry Smith } 1791b9cda22cSBarry Smith } 1792dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 17933649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1794b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1795b9cda22cSBarry Smith PetscFunctionReturn(0); 1796b9cda22cSBarry Smith } 1797b9cda22cSBarry Smith 1798b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1799b9cda22cSBarry Smith { 1800b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1801b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1802d2840e09SBarry Smith const PetscScalar *x,*v; 1803d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1804b9cda22cSBarry Smith PetscErrorCode ierr; 1805d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1806d2840e09SBarry Smith PetscInt n,i; 1807b9cda22cSBarry Smith 1808b9cda22cSBarry Smith PetscFunctionBegin; 1809b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 18103649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1811b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1812b9cda22cSBarry Smith for (i=0; i<m; i++) { 1813b9cda22cSBarry Smith idx = a->j + a->i[i]; 1814b9cda22cSBarry Smith v = a->a + a->i[i]; 1815b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1816b9cda22cSBarry Smith alpha1 = x[10*i]; 1817b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1818b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1819b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1820b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1821b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1822b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1823b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1824b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1825b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1826b9cda22cSBarry Smith while (n-->0) { 1827b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1828b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1829b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1830b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1831b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1832b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1833b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1834b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1835b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1836b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1837b9cda22cSBarry Smith idx++; v++; 1838b9cda22cSBarry Smith } 1839b9cda22cSBarry Smith } 1840dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18413649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1842b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1843b9cda22cSBarry Smith PetscFunctionReturn(0); 1844b9cda22cSBarry Smith } 1845b9cda22cSBarry Smith 18462b692628SMatthew Knepley 18472f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 1848dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1849dbdd7285SBarry Smith { 1850dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1851dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1852d2840e09SBarry Smith const PetscScalar *x,*v; 1853d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1854dbdd7285SBarry Smith PetscErrorCode ierr; 1855d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1856d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1857dbdd7285SBarry Smith 1858dbdd7285SBarry Smith PetscFunctionBegin; 18593649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1860dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1861dbdd7285SBarry Smith idx = a->j; 1862dbdd7285SBarry Smith v = a->a; 1863dbdd7285SBarry Smith ii = a->i; 1864dbdd7285SBarry Smith 1865dbdd7285SBarry Smith for (i=0; i<m; i++) { 1866dbdd7285SBarry Smith jrow = ii[i]; 1867dbdd7285SBarry Smith n = ii[i+1] - jrow; 1868dbdd7285SBarry Smith sum1 = 0.0; 1869dbdd7285SBarry Smith sum2 = 0.0; 1870dbdd7285SBarry Smith sum3 = 0.0; 1871dbdd7285SBarry Smith sum4 = 0.0; 1872dbdd7285SBarry Smith sum5 = 0.0; 1873dbdd7285SBarry Smith sum6 = 0.0; 1874dbdd7285SBarry Smith sum7 = 0.0; 1875dbdd7285SBarry Smith sum8 = 0.0; 1876dbdd7285SBarry Smith sum9 = 0.0; 1877dbdd7285SBarry Smith sum10 = 0.0; 1878dbdd7285SBarry Smith sum11 = 0.0; 187926fbe8dcSKarl Rupp 1880dbdd7285SBarry Smith nonzerorow += (n>0); 1881dbdd7285SBarry Smith for (j=0; j<n; j++) { 1882dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1883dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1884dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1885dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1886dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1887dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1888dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1889dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1890dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1891dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1892dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1893dbdd7285SBarry Smith jrow++; 1894dbdd7285SBarry Smith } 1895dbdd7285SBarry Smith y[11*i] = sum1; 1896dbdd7285SBarry Smith y[11*i+1] = sum2; 1897dbdd7285SBarry Smith y[11*i+2] = sum3; 1898dbdd7285SBarry Smith y[11*i+3] = sum4; 1899dbdd7285SBarry Smith y[11*i+4] = sum5; 1900dbdd7285SBarry Smith y[11*i+5] = sum6; 1901dbdd7285SBarry Smith y[11*i+6] = sum7; 1902dbdd7285SBarry Smith y[11*i+7] = sum8; 1903dbdd7285SBarry Smith y[11*i+8] = sum9; 1904dbdd7285SBarry Smith y[11*i+9] = sum10; 1905dbdd7285SBarry Smith y[11*i+10] = sum11; 1906dbdd7285SBarry Smith } 1907dbdd7285SBarry Smith 1908dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 19093649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1910dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1911dbdd7285SBarry Smith PetscFunctionReturn(0); 1912dbdd7285SBarry Smith } 1913dbdd7285SBarry Smith 1914dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1915dbdd7285SBarry Smith { 1916dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1917dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1918d2840e09SBarry Smith const PetscScalar *x,*v; 1919d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1920dbdd7285SBarry Smith PetscErrorCode ierr; 1921d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1922dbdd7285SBarry Smith PetscInt n,i,jrow,j; 1923dbdd7285SBarry Smith 1924dbdd7285SBarry Smith PetscFunctionBegin; 1925dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19263649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1927dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1928dbdd7285SBarry Smith idx = a->j; 1929dbdd7285SBarry Smith v = a->a; 1930dbdd7285SBarry Smith ii = a->i; 1931dbdd7285SBarry Smith 1932dbdd7285SBarry Smith for (i=0; i<m; i++) { 1933dbdd7285SBarry Smith jrow = ii[i]; 1934dbdd7285SBarry Smith n = ii[i+1] - jrow; 1935dbdd7285SBarry Smith sum1 = 0.0; 1936dbdd7285SBarry Smith sum2 = 0.0; 1937dbdd7285SBarry Smith sum3 = 0.0; 1938dbdd7285SBarry Smith sum4 = 0.0; 1939dbdd7285SBarry Smith sum5 = 0.0; 1940dbdd7285SBarry Smith sum6 = 0.0; 1941dbdd7285SBarry Smith sum7 = 0.0; 1942dbdd7285SBarry Smith sum8 = 0.0; 1943dbdd7285SBarry Smith sum9 = 0.0; 1944dbdd7285SBarry Smith sum10 = 0.0; 1945dbdd7285SBarry Smith sum11 = 0.0; 1946dbdd7285SBarry Smith for (j=0; j<n; j++) { 1947dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1948dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1949dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1950dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1951dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1952dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1953dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1954dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1955dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1956dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1957dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1958dbdd7285SBarry Smith jrow++; 1959dbdd7285SBarry Smith } 1960dbdd7285SBarry Smith y[11*i] += sum1; 1961dbdd7285SBarry Smith y[11*i+1] += sum2; 1962dbdd7285SBarry Smith y[11*i+2] += sum3; 1963dbdd7285SBarry Smith y[11*i+3] += sum4; 1964dbdd7285SBarry Smith y[11*i+4] += sum5; 1965dbdd7285SBarry Smith y[11*i+5] += sum6; 1966dbdd7285SBarry Smith y[11*i+6] += sum7; 1967dbdd7285SBarry Smith y[11*i+7] += sum8; 1968dbdd7285SBarry Smith y[11*i+8] += sum9; 1969dbdd7285SBarry Smith y[11*i+9] += sum10; 1970dbdd7285SBarry Smith y[11*i+10] += sum11; 1971dbdd7285SBarry Smith } 1972dbdd7285SBarry Smith 1973dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 19743649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1975dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1976dbdd7285SBarry Smith PetscFunctionReturn(0); 1977dbdd7285SBarry Smith } 1978dbdd7285SBarry Smith 1979dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1980dbdd7285SBarry Smith { 1981dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1982dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1983d2840e09SBarry Smith const PetscScalar *x,*v; 1984d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1985dbdd7285SBarry Smith PetscErrorCode ierr; 1986d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1987d2840e09SBarry Smith PetscInt n,i; 1988dbdd7285SBarry Smith 1989dbdd7285SBarry Smith PetscFunctionBegin; 1990d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 19913649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1992dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1993dbdd7285SBarry Smith 1994dbdd7285SBarry Smith for (i=0; i<m; i++) { 1995dbdd7285SBarry Smith idx = a->j + a->i[i]; 1996dbdd7285SBarry Smith v = a->a + a->i[i]; 1997dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 1998dbdd7285SBarry Smith alpha1 = x[11*i]; 1999dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2000dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2001dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2002dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2003dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2004dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2005dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2006dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2007dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2008dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2009dbdd7285SBarry Smith while (n-->0) { 2010dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2011dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2012dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2013dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2014dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2015dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2016dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2017dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2018dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2019dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2020dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2021dbdd7285SBarry Smith idx++; v++; 2022dbdd7285SBarry Smith } 2023dbdd7285SBarry Smith } 2024dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20253649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2026dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2027dbdd7285SBarry Smith PetscFunctionReturn(0); 2028dbdd7285SBarry Smith } 2029dbdd7285SBarry Smith 2030dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2031dbdd7285SBarry Smith { 2032dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2033dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2034d2840e09SBarry Smith const PetscScalar *x,*v; 2035d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2036dbdd7285SBarry Smith PetscErrorCode ierr; 2037d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2038d2840e09SBarry Smith PetscInt n,i; 2039dbdd7285SBarry Smith 2040dbdd7285SBarry Smith PetscFunctionBegin; 2041dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20423649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2043dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2044dbdd7285SBarry Smith for (i=0; i<m; i++) { 2045dbdd7285SBarry Smith idx = a->j + a->i[i]; 2046dbdd7285SBarry Smith v = a->a + a->i[i]; 2047dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2048dbdd7285SBarry Smith alpha1 = x[11*i]; 2049dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2050dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2051dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2052dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2053dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2054dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2055dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2056dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2057dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2058dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2059dbdd7285SBarry Smith while (n-->0) { 2060dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2061dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2062dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2063dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2064dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2065dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2066dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2067dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2068dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2069dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2070dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2071dbdd7285SBarry Smith idx++; v++; 2072dbdd7285SBarry Smith } 2073dbdd7285SBarry Smith } 2074dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20753649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2076dbdd7285SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2077dbdd7285SBarry Smith PetscFunctionReturn(0); 2078dbdd7285SBarry Smith } 2079dbdd7285SBarry Smith 2080dbdd7285SBarry Smith 2081dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2082dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 20832f7816d4SBarry Smith { 20842f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20852f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2086d2840e09SBarry Smith const PetscScalar *x,*v; 2087d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 20882f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2089dfbe8321SBarry Smith PetscErrorCode ierr; 2090d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2091d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 20922f7816d4SBarry Smith 20932f7816d4SBarry Smith PetscFunctionBegin; 20943649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 20951ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 20962f7816d4SBarry Smith idx = a->j; 20972f7816d4SBarry Smith v = a->a; 20982f7816d4SBarry Smith ii = a->i; 20992f7816d4SBarry Smith 21002f7816d4SBarry Smith for (i=0; i<m; i++) { 21012f7816d4SBarry Smith jrow = ii[i]; 21022f7816d4SBarry Smith n = ii[i+1] - jrow; 21032f7816d4SBarry Smith sum1 = 0.0; 21042f7816d4SBarry Smith sum2 = 0.0; 21052f7816d4SBarry Smith sum3 = 0.0; 21062f7816d4SBarry Smith sum4 = 0.0; 21072f7816d4SBarry Smith sum5 = 0.0; 21082f7816d4SBarry Smith sum6 = 0.0; 21092f7816d4SBarry Smith sum7 = 0.0; 21102f7816d4SBarry Smith sum8 = 0.0; 21112f7816d4SBarry Smith sum9 = 0.0; 21122f7816d4SBarry Smith sum10 = 0.0; 21132f7816d4SBarry Smith sum11 = 0.0; 21142f7816d4SBarry Smith sum12 = 0.0; 21152f7816d4SBarry Smith sum13 = 0.0; 21162f7816d4SBarry Smith sum14 = 0.0; 21172f7816d4SBarry Smith sum15 = 0.0; 21182f7816d4SBarry Smith sum16 = 0.0; 211926fbe8dcSKarl Rupp 2120b3c51c66SMatthew Knepley nonzerorow += (n>0); 21212f7816d4SBarry Smith for (j=0; j<n; j++) { 21222f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 21232f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 21242f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 21252f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 21262f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 21272f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 21282f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 21292f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 21302f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 21312f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 21322f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 21332f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 21342f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 21352f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 21362f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 21372f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 21382f7816d4SBarry Smith jrow++; 21392f7816d4SBarry Smith } 21402f7816d4SBarry Smith y[16*i] = sum1; 21412f7816d4SBarry Smith y[16*i+1] = sum2; 21422f7816d4SBarry Smith y[16*i+2] = sum3; 21432f7816d4SBarry Smith y[16*i+3] = sum4; 21442f7816d4SBarry Smith y[16*i+4] = sum5; 21452f7816d4SBarry Smith y[16*i+5] = sum6; 21462f7816d4SBarry Smith y[16*i+6] = sum7; 21472f7816d4SBarry Smith y[16*i+7] = sum8; 21482f7816d4SBarry Smith y[16*i+8] = sum9; 21492f7816d4SBarry Smith y[16*i+9] = sum10; 21502f7816d4SBarry Smith y[16*i+10] = sum11; 21512f7816d4SBarry Smith y[16*i+11] = sum12; 21522f7816d4SBarry Smith y[16*i+12] = sum13; 21532f7816d4SBarry Smith y[16*i+13] = sum14; 21542f7816d4SBarry Smith y[16*i+14] = sum15; 21552f7816d4SBarry Smith y[16*i+15] = sum16; 21562f7816d4SBarry Smith } 21572f7816d4SBarry Smith 2158dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 21593649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 21601ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 21612f7816d4SBarry Smith PetscFunctionReturn(0); 21622f7816d4SBarry Smith } 21632f7816d4SBarry Smith 2164dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 21652f7816d4SBarry Smith { 21662f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21672f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2168d2840e09SBarry Smith const PetscScalar *x,*v; 2169d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 21702f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2171dfbe8321SBarry Smith PetscErrorCode ierr; 2172d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2173d2840e09SBarry Smith PetscInt n,i; 21742f7816d4SBarry Smith 21752f7816d4SBarry Smith PetscFunctionBegin; 2176d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 21773649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 21781ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2179bfec09a0SHong Zhang 21802f7816d4SBarry Smith for (i=0; i<m; i++) { 2181bfec09a0SHong Zhang idx = a->j + a->i[i]; 2182bfec09a0SHong Zhang v = a->a + a->i[i]; 21832f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 21842f7816d4SBarry Smith alpha1 = x[16*i]; 21852f7816d4SBarry Smith alpha2 = x[16*i+1]; 21862f7816d4SBarry Smith alpha3 = x[16*i+2]; 21872f7816d4SBarry Smith alpha4 = x[16*i+3]; 21882f7816d4SBarry Smith alpha5 = x[16*i+4]; 21892f7816d4SBarry Smith alpha6 = x[16*i+5]; 21902f7816d4SBarry Smith alpha7 = x[16*i+6]; 21912f7816d4SBarry Smith alpha8 = x[16*i+7]; 21922f7816d4SBarry Smith alpha9 = x[16*i+8]; 21932f7816d4SBarry Smith alpha10 = x[16*i+9]; 21942f7816d4SBarry Smith alpha11 = x[16*i+10]; 21952f7816d4SBarry Smith alpha12 = x[16*i+11]; 21962f7816d4SBarry Smith alpha13 = x[16*i+12]; 21972f7816d4SBarry Smith alpha14 = x[16*i+13]; 21982f7816d4SBarry Smith alpha15 = x[16*i+14]; 21992f7816d4SBarry Smith alpha16 = x[16*i+15]; 22002f7816d4SBarry Smith while (n-->0) { 22012f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 22022f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 22032f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 22042f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 22052f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 22062f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 22072f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 22082f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 22092f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 22102f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 22112f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 22122f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 22132f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 22142f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 22152f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 22162f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 22172f7816d4SBarry Smith idx++; v++; 22182f7816d4SBarry Smith } 22192f7816d4SBarry Smith } 2220dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 22213649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22221ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 22232f7816d4SBarry Smith PetscFunctionReturn(0); 22242f7816d4SBarry Smith } 22252f7816d4SBarry Smith 2226dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 22272f7816d4SBarry Smith { 22282f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22292f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2230d2840e09SBarry Smith const PetscScalar *x,*v; 2231d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 22322f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2233dfbe8321SBarry Smith PetscErrorCode ierr; 2234d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2235b24ad042SBarry Smith PetscInt n,i,jrow,j; 22362f7816d4SBarry Smith 22372f7816d4SBarry Smith PetscFunctionBegin; 22382f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 22393649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 22401ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 22412f7816d4SBarry Smith idx = a->j; 22422f7816d4SBarry Smith v = a->a; 22432f7816d4SBarry Smith ii = a->i; 22442f7816d4SBarry Smith 22452f7816d4SBarry Smith for (i=0; i<m; i++) { 22462f7816d4SBarry Smith jrow = ii[i]; 22472f7816d4SBarry Smith n = ii[i+1] - jrow; 22482f7816d4SBarry Smith sum1 = 0.0; 22492f7816d4SBarry Smith sum2 = 0.0; 22502f7816d4SBarry Smith sum3 = 0.0; 22512f7816d4SBarry Smith sum4 = 0.0; 22522f7816d4SBarry Smith sum5 = 0.0; 22532f7816d4SBarry Smith sum6 = 0.0; 22542f7816d4SBarry Smith sum7 = 0.0; 22552f7816d4SBarry Smith sum8 = 0.0; 22562f7816d4SBarry Smith sum9 = 0.0; 22572f7816d4SBarry Smith sum10 = 0.0; 22582f7816d4SBarry Smith sum11 = 0.0; 22592f7816d4SBarry Smith sum12 = 0.0; 22602f7816d4SBarry Smith sum13 = 0.0; 22612f7816d4SBarry Smith sum14 = 0.0; 22622f7816d4SBarry Smith sum15 = 0.0; 22632f7816d4SBarry Smith sum16 = 0.0; 22642f7816d4SBarry Smith for (j=0; j<n; j++) { 22652f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 22662f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 22672f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 22682f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 22692f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 22702f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 22712f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 22722f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 22732f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 22742f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 22752f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 22762f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 22772f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 22782f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 22792f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 22802f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 22812f7816d4SBarry Smith jrow++; 22822f7816d4SBarry Smith } 22832f7816d4SBarry Smith y[16*i] += sum1; 22842f7816d4SBarry Smith y[16*i+1] += sum2; 22852f7816d4SBarry Smith y[16*i+2] += sum3; 22862f7816d4SBarry Smith y[16*i+3] += sum4; 22872f7816d4SBarry Smith y[16*i+4] += sum5; 22882f7816d4SBarry Smith y[16*i+5] += sum6; 22892f7816d4SBarry Smith y[16*i+6] += sum7; 22902f7816d4SBarry Smith y[16*i+7] += sum8; 22912f7816d4SBarry Smith y[16*i+8] += sum9; 22922f7816d4SBarry Smith y[16*i+9] += sum10; 22932f7816d4SBarry Smith y[16*i+10] += sum11; 22942f7816d4SBarry Smith y[16*i+11] += sum12; 22952f7816d4SBarry Smith y[16*i+12] += sum13; 22962f7816d4SBarry Smith y[16*i+13] += sum14; 22972f7816d4SBarry Smith y[16*i+14] += sum15; 22982f7816d4SBarry Smith y[16*i+15] += sum16; 22992f7816d4SBarry Smith } 23002f7816d4SBarry Smith 2301dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 23023649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 23031ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 23042f7816d4SBarry Smith PetscFunctionReturn(0); 23052f7816d4SBarry Smith } 23062f7816d4SBarry Smith 2307dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 23082f7816d4SBarry Smith { 23092f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 23102f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2311d2840e09SBarry Smith const PetscScalar *x,*v; 2312d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 23132f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2314dfbe8321SBarry Smith PetscErrorCode ierr; 2315d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2316d2840e09SBarry Smith PetscInt n,i; 23172f7816d4SBarry Smith 23182f7816d4SBarry Smith PetscFunctionBegin; 23192f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 23203649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 23211ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 23222f7816d4SBarry Smith for (i=0; i<m; i++) { 2323bfec09a0SHong Zhang idx = a->j + a->i[i]; 2324bfec09a0SHong Zhang v = a->a + a->i[i]; 23252f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 23262f7816d4SBarry Smith alpha1 = x[16*i]; 23272f7816d4SBarry Smith alpha2 = x[16*i+1]; 23282f7816d4SBarry Smith alpha3 = x[16*i+2]; 23292f7816d4SBarry Smith alpha4 = x[16*i+3]; 23302f7816d4SBarry Smith alpha5 = x[16*i+4]; 23312f7816d4SBarry Smith alpha6 = x[16*i+5]; 23322f7816d4SBarry Smith alpha7 = x[16*i+6]; 23332f7816d4SBarry Smith alpha8 = x[16*i+7]; 23342f7816d4SBarry Smith alpha9 = x[16*i+8]; 23352f7816d4SBarry Smith alpha10 = x[16*i+9]; 23362f7816d4SBarry Smith alpha11 = x[16*i+10]; 23372f7816d4SBarry Smith alpha12 = x[16*i+11]; 23382f7816d4SBarry Smith alpha13 = x[16*i+12]; 23392f7816d4SBarry Smith alpha14 = x[16*i+13]; 23402f7816d4SBarry Smith alpha15 = x[16*i+14]; 23412f7816d4SBarry Smith alpha16 = x[16*i+15]; 23422f7816d4SBarry Smith while (n-->0) { 23432f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 23442f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 23452f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 23462f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 23472f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 23482f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 23492f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 23502f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 23512f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 23522f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 23532f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 23542f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 23552f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 23562f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 23572f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 23582f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 23592f7816d4SBarry Smith idx++; v++; 23602f7816d4SBarry Smith } 23612f7816d4SBarry Smith } 2362dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 23633649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 23641ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 236566ed3db0SBarry Smith PetscFunctionReturn(0); 236666ed3db0SBarry Smith } 236766ed3db0SBarry Smith 2368ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2369ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2370ed1c418cSBarry Smith { 2371ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2372ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2373d2840e09SBarry Smith const PetscScalar *x,*v; 2374d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2375ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2376ed1c418cSBarry Smith PetscErrorCode ierr; 2377d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2378d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 2379ed1c418cSBarry Smith 2380ed1c418cSBarry Smith PetscFunctionBegin; 23813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2382ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2383ed1c418cSBarry Smith idx = a->j; 2384ed1c418cSBarry Smith v = a->a; 2385ed1c418cSBarry Smith ii = a->i; 2386ed1c418cSBarry Smith 2387ed1c418cSBarry Smith for (i=0; i<m; i++) { 2388ed1c418cSBarry Smith jrow = ii[i]; 2389ed1c418cSBarry Smith n = ii[i+1] - jrow; 2390ed1c418cSBarry Smith sum1 = 0.0; 2391ed1c418cSBarry Smith sum2 = 0.0; 2392ed1c418cSBarry Smith sum3 = 0.0; 2393ed1c418cSBarry Smith sum4 = 0.0; 2394ed1c418cSBarry Smith sum5 = 0.0; 2395ed1c418cSBarry Smith sum6 = 0.0; 2396ed1c418cSBarry Smith sum7 = 0.0; 2397ed1c418cSBarry Smith sum8 = 0.0; 2398ed1c418cSBarry Smith sum9 = 0.0; 2399ed1c418cSBarry Smith sum10 = 0.0; 2400ed1c418cSBarry Smith sum11 = 0.0; 2401ed1c418cSBarry Smith sum12 = 0.0; 2402ed1c418cSBarry Smith sum13 = 0.0; 2403ed1c418cSBarry Smith sum14 = 0.0; 2404ed1c418cSBarry Smith sum15 = 0.0; 2405ed1c418cSBarry Smith sum16 = 0.0; 2406ed1c418cSBarry Smith sum17 = 0.0; 2407ed1c418cSBarry Smith sum18 = 0.0; 240826fbe8dcSKarl Rupp 2409ed1c418cSBarry Smith nonzerorow += (n>0); 2410ed1c418cSBarry Smith for (j=0; j<n; j++) { 2411ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2412ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2413ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2414ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2415ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2416ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2417ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2418ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2419ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2420ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2421ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2422ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2423ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2424ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2425ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2426ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2427ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2428ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2429ed1c418cSBarry Smith jrow++; 2430ed1c418cSBarry Smith } 2431ed1c418cSBarry Smith y[18*i] = sum1; 2432ed1c418cSBarry Smith y[18*i+1] = sum2; 2433ed1c418cSBarry Smith y[18*i+2] = sum3; 2434ed1c418cSBarry Smith y[18*i+3] = sum4; 2435ed1c418cSBarry Smith y[18*i+4] = sum5; 2436ed1c418cSBarry Smith y[18*i+5] = sum6; 2437ed1c418cSBarry Smith y[18*i+6] = sum7; 2438ed1c418cSBarry Smith y[18*i+7] = sum8; 2439ed1c418cSBarry Smith y[18*i+8] = sum9; 2440ed1c418cSBarry Smith y[18*i+9] = sum10; 2441ed1c418cSBarry Smith y[18*i+10] = sum11; 2442ed1c418cSBarry Smith y[18*i+11] = sum12; 2443ed1c418cSBarry Smith y[18*i+12] = sum13; 2444ed1c418cSBarry Smith y[18*i+13] = sum14; 2445ed1c418cSBarry Smith y[18*i+14] = sum15; 2446ed1c418cSBarry Smith y[18*i+15] = sum16; 2447ed1c418cSBarry Smith y[18*i+16] = sum17; 2448ed1c418cSBarry Smith y[18*i+17] = sum18; 2449ed1c418cSBarry Smith } 2450ed1c418cSBarry Smith 2451dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 24523649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2453ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2454ed1c418cSBarry Smith PetscFunctionReturn(0); 2455ed1c418cSBarry Smith } 2456ed1c418cSBarry Smith 2457ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2458ed1c418cSBarry Smith { 2459ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2460ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2461d2840e09SBarry Smith const PetscScalar *x,*v; 2462d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2463ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2464ed1c418cSBarry Smith PetscErrorCode ierr; 2465d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2466d2840e09SBarry Smith PetscInt n,i; 2467ed1c418cSBarry Smith 2468ed1c418cSBarry Smith PetscFunctionBegin; 2469d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 24703649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2471ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2472ed1c418cSBarry Smith 2473ed1c418cSBarry Smith for (i=0; i<m; i++) { 2474ed1c418cSBarry Smith idx = a->j + a->i[i]; 2475ed1c418cSBarry Smith v = a->a + a->i[i]; 2476ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2477ed1c418cSBarry Smith alpha1 = x[18*i]; 2478ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2479ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2480ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2481ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2482ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2483ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2484ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2485ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2486ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2487ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2488ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2489ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2490ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2491ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2492ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2493ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2494ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2495ed1c418cSBarry Smith while (n-->0) { 2496ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2497ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2498ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2499ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2500ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2501ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2502ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2503ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2504ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2505ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2506ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2507ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2508ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2509ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2510ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2511ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2512ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2513ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2514ed1c418cSBarry Smith idx++; v++; 2515ed1c418cSBarry Smith } 2516ed1c418cSBarry Smith } 2517dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 25183649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2519ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2520ed1c418cSBarry Smith PetscFunctionReturn(0); 2521ed1c418cSBarry Smith } 2522ed1c418cSBarry Smith 2523ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2524ed1c418cSBarry Smith { 2525ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2526ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2527d2840e09SBarry Smith const PetscScalar *x,*v; 2528d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2529ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2530ed1c418cSBarry Smith PetscErrorCode ierr; 2531d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2532ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2533ed1c418cSBarry Smith 2534ed1c418cSBarry Smith PetscFunctionBegin; 2535ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 25363649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2537ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2538ed1c418cSBarry Smith idx = a->j; 2539ed1c418cSBarry Smith v = a->a; 2540ed1c418cSBarry Smith ii = a->i; 2541ed1c418cSBarry Smith 2542ed1c418cSBarry Smith for (i=0; i<m; i++) { 2543ed1c418cSBarry Smith jrow = ii[i]; 2544ed1c418cSBarry Smith n = ii[i+1] - jrow; 2545ed1c418cSBarry Smith sum1 = 0.0; 2546ed1c418cSBarry Smith sum2 = 0.0; 2547ed1c418cSBarry Smith sum3 = 0.0; 2548ed1c418cSBarry Smith sum4 = 0.0; 2549ed1c418cSBarry Smith sum5 = 0.0; 2550ed1c418cSBarry Smith sum6 = 0.0; 2551ed1c418cSBarry Smith sum7 = 0.0; 2552ed1c418cSBarry Smith sum8 = 0.0; 2553ed1c418cSBarry Smith sum9 = 0.0; 2554ed1c418cSBarry Smith sum10 = 0.0; 2555ed1c418cSBarry Smith sum11 = 0.0; 2556ed1c418cSBarry Smith sum12 = 0.0; 2557ed1c418cSBarry Smith sum13 = 0.0; 2558ed1c418cSBarry Smith sum14 = 0.0; 2559ed1c418cSBarry Smith sum15 = 0.0; 2560ed1c418cSBarry Smith sum16 = 0.0; 2561ed1c418cSBarry Smith sum17 = 0.0; 2562ed1c418cSBarry Smith sum18 = 0.0; 2563ed1c418cSBarry Smith for (j=0; j<n; j++) { 2564ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2565ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2566ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2567ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2568ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2569ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2570ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2571ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2572ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2573ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2574ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2575ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2576ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2577ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2578ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2579ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2580ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2581ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2582ed1c418cSBarry Smith jrow++; 2583ed1c418cSBarry Smith } 2584ed1c418cSBarry Smith y[18*i] += sum1; 2585ed1c418cSBarry Smith y[18*i+1] += sum2; 2586ed1c418cSBarry Smith y[18*i+2] += sum3; 2587ed1c418cSBarry Smith y[18*i+3] += sum4; 2588ed1c418cSBarry Smith y[18*i+4] += sum5; 2589ed1c418cSBarry Smith y[18*i+5] += sum6; 2590ed1c418cSBarry Smith y[18*i+6] += sum7; 2591ed1c418cSBarry Smith y[18*i+7] += sum8; 2592ed1c418cSBarry Smith y[18*i+8] += sum9; 2593ed1c418cSBarry Smith y[18*i+9] += sum10; 2594ed1c418cSBarry Smith y[18*i+10] += sum11; 2595ed1c418cSBarry Smith y[18*i+11] += sum12; 2596ed1c418cSBarry Smith y[18*i+12] += sum13; 2597ed1c418cSBarry Smith y[18*i+13] += sum14; 2598ed1c418cSBarry Smith y[18*i+14] += sum15; 2599ed1c418cSBarry Smith y[18*i+15] += sum16; 2600ed1c418cSBarry Smith y[18*i+16] += sum17; 2601ed1c418cSBarry Smith y[18*i+17] += sum18; 2602ed1c418cSBarry Smith } 2603ed1c418cSBarry Smith 2604dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2606ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2607ed1c418cSBarry Smith PetscFunctionReturn(0); 2608ed1c418cSBarry Smith } 2609ed1c418cSBarry Smith 2610ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2611ed1c418cSBarry Smith { 2612ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2613ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2614d2840e09SBarry Smith const PetscScalar *x,*v; 2615d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2616ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2617ed1c418cSBarry Smith PetscErrorCode ierr; 2618d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2619d2840e09SBarry Smith PetscInt n,i; 2620ed1c418cSBarry Smith 2621ed1c418cSBarry Smith PetscFunctionBegin; 2622ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 26233649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2624ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2625ed1c418cSBarry Smith for (i=0; i<m; i++) { 2626ed1c418cSBarry Smith idx = a->j + a->i[i]; 2627ed1c418cSBarry Smith v = a->a + a->i[i]; 2628ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2629ed1c418cSBarry Smith alpha1 = x[18*i]; 2630ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2631ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2632ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2633ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2634ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2635ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2636ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2637ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2638ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2639ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2640ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2641ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2642ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2643ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2644ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2645ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2646ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2647ed1c418cSBarry Smith while (n-->0) { 2648ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2649ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2650ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2651ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2652ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2653ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2654ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2655ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2656ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2657ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2658ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2659ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2660ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2661ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2662ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2663ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2664ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2665ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2666ed1c418cSBarry Smith idx++; v++; 2667ed1c418cSBarry Smith } 2668ed1c418cSBarry Smith } 2669dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26703649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2671ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2672ed1c418cSBarry Smith PetscFunctionReturn(0); 2673ed1c418cSBarry Smith } 2674ed1c418cSBarry Smith 26756861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 26766861f193SBarry Smith { 26776861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 26786861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26796861f193SBarry Smith const PetscScalar *x,*v; 26806861f193SBarry Smith PetscScalar *y,*sums; 26816861f193SBarry Smith PetscErrorCode ierr; 26826861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 26836861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 26846861f193SBarry Smith 26856861f193SBarry Smith PetscFunctionBegin; 26863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 26876861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 26886861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 26896861f193SBarry Smith idx = a->j; 26906861f193SBarry Smith v = a->a; 26916861f193SBarry Smith ii = a->i; 26926861f193SBarry Smith 26936861f193SBarry Smith for (i=0; i<m; i++) { 26946861f193SBarry Smith jrow = ii[i]; 26956861f193SBarry Smith n = ii[i+1] - jrow; 26966861f193SBarry Smith sums = y + dof*i; 26976861f193SBarry Smith for (j=0; j<n; j++) { 26986861f193SBarry Smith for (k=0; k<dof; k++) { 26996861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 27006861f193SBarry Smith } 27016861f193SBarry Smith jrow++; 27026861f193SBarry Smith } 27036861f193SBarry Smith } 27046861f193SBarry Smith 27056861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27063649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27076861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27086861f193SBarry Smith PetscFunctionReturn(0); 27096861f193SBarry Smith } 27106861f193SBarry Smith 27116861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27126861f193SBarry Smith { 27136861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27146861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27156861f193SBarry Smith const PetscScalar *x,*v; 27166861f193SBarry Smith PetscScalar *y,*sums; 27176861f193SBarry Smith PetscErrorCode ierr; 27186861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27196861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 27206861f193SBarry Smith 27216861f193SBarry Smith PetscFunctionBegin; 27226861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 27233649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27246861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 27256861f193SBarry Smith idx = a->j; 27266861f193SBarry Smith v = a->a; 27276861f193SBarry Smith ii = a->i; 27286861f193SBarry Smith 27296861f193SBarry Smith for (i=0; i<m; i++) { 27306861f193SBarry Smith jrow = ii[i]; 27316861f193SBarry Smith n = ii[i+1] - jrow; 27326861f193SBarry Smith sums = y + dof*i; 27336861f193SBarry Smith for (j=0; j<n; j++) { 27346861f193SBarry Smith for (k=0; k<dof; k++) { 27356861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 27366861f193SBarry Smith } 27376861f193SBarry Smith jrow++; 27386861f193SBarry Smith } 27396861f193SBarry Smith } 27406861f193SBarry Smith 27416861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27423649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27436861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 27446861f193SBarry Smith PetscFunctionReturn(0); 27456861f193SBarry Smith } 27466861f193SBarry Smith 27476861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 27486861f193SBarry Smith { 27496861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27506861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27516861f193SBarry Smith const PetscScalar *x,*v,*alpha; 27526861f193SBarry Smith PetscScalar *y; 27536861f193SBarry Smith PetscErrorCode ierr; 27546861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 27556861f193SBarry Smith PetscInt n,i,k; 27566861f193SBarry Smith 27576861f193SBarry Smith PetscFunctionBegin; 27583649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27596861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 27606861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 27616861f193SBarry Smith for (i=0; i<m; i++) { 27626861f193SBarry Smith idx = a->j + a->i[i]; 27636861f193SBarry Smith v = a->a + a->i[i]; 27646861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27656861f193SBarry Smith alpha = x + dof*i; 27666861f193SBarry Smith while (n-->0) { 27676861f193SBarry Smith for (k=0; k<dof; k++) { 27686861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 27696861f193SBarry Smith } 277083ce7366SBarry Smith idx++; v++; 27716861f193SBarry Smith } 27726861f193SBarry Smith } 27736861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27743649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27756861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27766861f193SBarry Smith PetscFunctionReturn(0); 27776861f193SBarry Smith } 27786861f193SBarry Smith 27796861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27806861f193SBarry Smith { 27816861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27826861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27836861f193SBarry Smith const PetscScalar *x,*v,*alpha; 27846861f193SBarry Smith PetscScalar *y; 27856861f193SBarry Smith PetscErrorCode ierr; 27866861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 27876861f193SBarry Smith PetscInt n,i,k; 27886861f193SBarry Smith 27896861f193SBarry Smith PetscFunctionBegin; 27906861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 27913649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27926861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 27936861f193SBarry Smith for (i=0; i<m; i++) { 27946861f193SBarry Smith idx = a->j + a->i[i]; 27956861f193SBarry Smith v = a->a + a->i[i]; 27966861f193SBarry Smith n = a->i[i+1] - a->i[i]; 27976861f193SBarry Smith alpha = x + dof*i; 27986861f193SBarry Smith while (n-->0) { 27996861f193SBarry Smith for (k=0; k<dof; k++) { 28006861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 28016861f193SBarry Smith } 280283ce7366SBarry Smith idx++; v++; 28036861f193SBarry Smith } 28046861f193SBarry Smith } 28056861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28063649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28076861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 28086861f193SBarry Smith PetscFunctionReturn(0); 28096861f193SBarry Smith } 28106861f193SBarry Smith 2811f4a53059SBarry Smith /*===================================================================================*/ 2812dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2813f4a53059SBarry Smith { 28144cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2815dfbe8321SBarry Smith PetscErrorCode ierr; 2816f4a53059SBarry Smith 2817b24ad042SBarry Smith PetscFunctionBegin; 2818f4a53059SBarry Smith /* start the scatter */ 2819ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28204cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2821ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28224cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2823f4a53059SBarry Smith PetscFunctionReturn(0); 2824f4a53059SBarry Smith } 2825f4a53059SBarry Smith 2826dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 28274cb9d9b8SBarry Smith { 28284cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2829dfbe8321SBarry Smith PetscErrorCode ierr; 2830b24ad042SBarry Smith 28314cb9d9b8SBarry Smith PetscFunctionBegin; 28324cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 28334cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2834ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2835ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28364cb9d9b8SBarry Smith PetscFunctionReturn(0); 28374cb9d9b8SBarry Smith } 28384cb9d9b8SBarry Smith 2839dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 28404cb9d9b8SBarry Smith { 28414cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2842dfbe8321SBarry Smith PetscErrorCode ierr; 28434cb9d9b8SBarry Smith 2844b24ad042SBarry Smith PetscFunctionBegin; 28454cb9d9b8SBarry Smith /* start the scatter */ 2846ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2847d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2848ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2849717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 28504cb9d9b8SBarry Smith PetscFunctionReturn(0); 28514cb9d9b8SBarry Smith } 28524cb9d9b8SBarry Smith 2853dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 28544cb9d9b8SBarry Smith { 28554cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2856dfbe8321SBarry Smith PetscErrorCode ierr; 2857b24ad042SBarry Smith 28584cb9d9b8SBarry Smith PetscFunctionBegin; 28594cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2860d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2861e4a140f6SJunchao Zhang ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2862ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28634cb9d9b8SBarry Smith PetscFunctionReturn(0); 28644cb9d9b8SBarry Smith } 28654cb9d9b8SBarry Smith 286695ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 28677ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 28687ba1a0bfSKris Buschelman { 28697ba1a0bfSKris Buschelman PetscErrorCode ierr; 28700298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 28717ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 28727ba1a0bfSKris Buschelman Mat P =pp->AIJ; 28737ba1a0bfSKris Buschelman Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2874d2840e09SBarry Smith PetscInt *pti,*ptj,*ptJ; 28757ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2876d2840e09SBarry Smith const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2877d2840e09SBarry Smith PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 28787ba1a0bfSKris Buschelman MatScalar *ca; 2879d2840e09SBarry Smith const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 28807ba1a0bfSKris Buschelman 28817ba1a0bfSKris Buschelman PetscFunctionBegin; 28827ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 28837ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 28847ba1a0bfSKris Buschelman 28857ba1a0bfSKris Buschelman cn = pn*ppdof; 28867ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 28877ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 2888854ce69bSBarry Smith ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 28897ba1a0bfSKris Buschelman ci[0] = 0; 28907ba1a0bfSKris Buschelman 28917ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 2892dcca6d9dSJed Brown ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2893c43dabc9SBarry Smith ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2894c43dabc9SBarry Smith ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 28957ba1a0bfSKris Buschelman 28967ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 28977ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 28987ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2899f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 29007ba1a0bfSKris Buschelman current_space = free_space; 29017ba1a0bfSKris Buschelman 29027ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 29037ba1a0bfSKris Buschelman for (i=0; i<pn; i++) { 29047ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 29057ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 29067ba1a0bfSKris Buschelman for (dof=0; dof<ppdof; dof++) { 29077ba1a0bfSKris Buschelman ptanzi = 0; 29087ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 29097ba1a0bfSKris Buschelman for (j=0; j<ptnzi; j++) { 29107ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 29117ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 29127ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 29137ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 29147ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 29157ba1a0bfSKris Buschelman for (k=0; k<anzj; k++) { 29167ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 29177ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 29187ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 29197ba1a0bfSKris Buschelman } 29207ba1a0bfSKris Buschelman } 29217ba1a0bfSKris Buschelman } 29227ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 29237ba1a0bfSKris Buschelman ptaj = ptasparserow; 29247ba1a0bfSKris Buschelman cnzi = 0; 29257ba1a0bfSKris Buschelman for (j=0; j<ptanzi; j++) { 29267ba1a0bfSKris Buschelman /* Get offset within block of P */ 29277ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 29287ba1a0bfSKris Buschelman /* Get block row of P */ 29297ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 29307ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 29317ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 29327ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 29337ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 29347ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 29357ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 29367ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 29377ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 29387ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 29397ba1a0bfSKris Buschelman } 29407ba1a0bfSKris Buschelman } 29417ba1a0bfSKris Buschelman } 29427ba1a0bfSKris Buschelman 29437ba1a0bfSKris Buschelman /* sort sparserow */ 29447ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 29457ba1a0bfSKris Buschelman 29467ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 29477ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 29487ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 2949f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 29507ba1a0bfSKris Buschelman } 29517ba1a0bfSKris Buschelman 29527ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 29537ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 295426fbe8dcSKarl Rupp 29557ba1a0bfSKris Buschelman current_space->array += cnzi; 29567ba1a0bfSKris Buschelman current_space->local_used += cnzi; 29577ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 29587ba1a0bfSKris Buschelman 295926fbe8dcSKarl Rupp for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 296026fbe8dcSKarl Rupp for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 296126fbe8dcSKarl Rupp 29627ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 29637ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 29647ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 29657ba1a0bfSKris Buschelman } 29667ba1a0bfSKris Buschelman } 29677ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 29687ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 29697ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 2970854ce69bSBarry Smith ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 2971a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 297274ed9c26SBarry Smith ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 29737ba1a0bfSKris Buschelman 29747ba1a0bfSKris Buschelman /* Allocate space for ca */ 2975854ce69bSBarry Smith ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 29767ba1a0bfSKris Buschelman 29777ba1a0bfSKris Buschelman /* put together the new matrix */ 2978ce94432eSBarry Smith ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2979f27682edSJed Brown ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 29807ba1a0bfSKris Buschelman 29817ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 29827ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 29837ba1a0bfSKris Buschelman c = (Mat_SeqAIJ*)((*C)->data); 2984e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2985e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 29867ba1a0bfSKris Buschelman c->nonew = 0; 298726fbe8dcSKarl Rupp 2988d76021d8SHong Zhang (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29897ba1a0bfSKris Buschelman 29907ba1a0bfSKris Buschelman /* Clean up. */ 29917ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 29927ba1a0bfSKris Buschelman PetscFunctionReturn(0); 29937ba1a0bfSKris Buschelman } 29947ba1a0bfSKris Buschelman 29957ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 29967ba1a0bfSKris Buschelman { 29977ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 29987ba1a0bfSKris Buschelman PetscErrorCode ierr; 29997ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 30007ba1a0bfSKris Buschelman Mat P =pp->AIJ; 30017ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 30027ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 30037ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3004a2ea699eSBarry Smith const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3005d2840e09SBarry Smith const PetscInt *ci=c->i,*cj=c->j,*cjj; 3006d2840e09SBarry Smith const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3007d2840e09SBarry Smith PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3008a2ea699eSBarry Smith const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3009d2840e09SBarry Smith MatScalar *ca=c->a,*caj,*apa; 30107ba1a0bfSKris Buschelman 30117ba1a0bfSKris Buschelman PetscFunctionBegin; 30127ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 30131795a4d1SJed Brown ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 30147ba1a0bfSKris Buschelman 30157ba1a0bfSKris Buschelman /* Clear old values in C */ 30167ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 30177ba1a0bfSKris Buschelman 30187ba1a0bfSKris Buschelman for (i=0; i<am; i++) { 30197ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 30207ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 30217ba1a0bfSKris Buschelman apnzj = 0; 30227ba1a0bfSKris Buschelman for (j=0; j<anzi; j++) { 30237ba1a0bfSKris Buschelman /* Get offset within block of P */ 30247ba1a0bfSKris Buschelman pshift = *aj%ppdof; 30257ba1a0bfSKris Buschelman /* Get block row of P */ 30267ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 30277ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 30287ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30297ba1a0bfSKris Buschelman paj = pa + pi[prow]; 30307ba1a0bfSKris Buschelman for (k=0; k<pnzj; k++) { 30317ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 30327ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 30337ba1a0bfSKris Buschelman apjdense[poffset] = -1; 30347ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 30357ba1a0bfSKris Buschelman } 30367ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 30377ba1a0bfSKris Buschelman } 3038dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 30397ba1a0bfSKris Buschelman aa++; 30407ba1a0bfSKris Buschelman } 30417ba1a0bfSKris Buschelman 30427ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 30437ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 30447ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 30457ba1a0bfSKris Buschelman 30467ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 30477ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 30487ba1a0bfSKris Buschelman pshift = i%ppdof; 30497ba1a0bfSKris Buschelman poffset = pi[prow]; 30507ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 30517ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 30527ba1a0bfSKris Buschelman pJ = pj+poffset; 30537ba1a0bfSKris Buschelman pA = pa+poffset; 30547ba1a0bfSKris Buschelman for (j=0; j<pnzi; j++) { 30557ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 30567ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 30577ba1a0bfSKris Buschelman caj = ca + ci[crow]; 30587ba1a0bfSKris Buschelman pJ++; 30597ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 30607ba1a0bfSKris Buschelman for (k=0,nextap=0; nextap<apnzj; k++) { 306126fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 30627ba1a0bfSKris Buschelman } 3063dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 30647ba1a0bfSKris Buschelman pA++; 30657ba1a0bfSKris Buschelman } 30667ba1a0bfSKris Buschelman 30677ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 30687ba1a0bfSKris Buschelman for (j=0; j<apnzj; j++) { 30697ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 30707ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 30717ba1a0bfSKris Buschelman } 30727ba1a0bfSKris Buschelman } 30737ba1a0bfSKris Buschelman 30747ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 30757ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 30767ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 307774ed9c26SBarry Smith ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 307895ddefa2SHong Zhang PetscFunctionReturn(0); 307995ddefa2SHong Zhang } 30807ba1a0bfSKris Buschelman 3081150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 30822121bac1SHong Zhang { 30832121bac1SHong Zhang PetscErrorCode ierr; 30842121bac1SHong Zhang 30852121bac1SHong Zhang PetscFunctionBegin; 30862121bac1SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 30873ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 30882121bac1SHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 30893ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 30902121bac1SHong Zhang } 30913ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 30922121bac1SHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 30933ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 30942121bac1SHong Zhang PetscFunctionReturn(0); 30952121bac1SHong Zhang } 30962121bac1SHong Zhang 3097f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 309895ddefa2SHong Zhang { 309995ddefa2SHong Zhang PetscFunctionBegin; 31003e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 310195ddefa2SHong Zhang PetscFunctionReturn(0); 310295ddefa2SHong Zhang } 310395ddefa2SHong Zhang 3104f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 310595ddefa2SHong Zhang { 310695ddefa2SHong Zhang PetscFunctionBegin; 31073e0c911fSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 31087ba1a0bfSKris Buschelman PetscFunctionReturn(0); 31097ba1a0bfSKris Buschelman } 31103e0c911fSBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 3111150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 31129e03dfbbSHong Zhang { 31139e03dfbbSHong Zhang PetscErrorCode ierr; 31149e03dfbbSHong Zhang 31159e03dfbbSHong Zhang PetscFunctionBegin; 31169e03dfbbSHong Zhang if (scall == MAT_INITIAL_MATRIX) { 31173e0c911fSBarry Smith ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ"); 31183e0c911fSBarry Smith ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr); 31199e03dfbbSHong Zhang } 31203e0c911fSBarry Smith ierr = MatPtAP_MPIAIJ_MPIAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 31219e03dfbbSHong Zhang PetscFunctionReturn(0); 31229e03dfbbSHong Zhang } 31239e03dfbbSHong Zhang 3124cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 31259c4fc4c7SBarry Smith { 31269c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3127ceb03754SKris Buschelman Mat a = b->AIJ,B; 31289c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 31299c4fc4c7SBarry Smith PetscErrorCode ierr; 31300fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3131ba8c8a56SBarry Smith PetscInt *cols; 3132ba8c8a56SBarry Smith PetscScalar *vals; 31339c4fc4c7SBarry Smith 31349c4fc4c7SBarry Smith PetscFunctionBegin; 31359c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3136785e854fSJed Brown ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 31379c4fc4c7SBarry Smith for (i=0; i<m; i++) { 31389c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 313926fbe8dcSKarl Rupp for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 31409c4fc4c7SBarry Smith } 3141*fdc842d1SBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3142*fdc842d1SBarry Smith ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr); 3143*fdc842d1SBarry Smith ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3144*fdc842d1SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr); 31459c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 3146785e854fSJed Brown ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 31479c4fc4c7SBarry Smith ii = 0; 31489c4fc4c7SBarry Smith for (i=0; i<m; i++) { 3149ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 31500fd73130SBarry Smith for (j=0; j<dof; j++) { 315126fbe8dcSKarl Rupp for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3152ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 31539c4fc4c7SBarry Smith ii++; 31549c4fc4c7SBarry Smith } 3155ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 31569c4fc4c7SBarry Smith } 31579c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 3158ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160ceb03754SKris Buschelman 3161511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 316228be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3163c3d102feSKris Buschelman } else { 3164ceb03754SKris Buschelman *newmat = B; 3165c3d102feSKris Buschelman } 31669c4fc4c7SBarry Smith PetscFunctionReturn(0); 31679c4fc4c7SBarry Smith } 31689c4fc4c7SBarry Smith 3169c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3170be1d678aSKris Buschelman 3171cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 31720fd73130SBarry Smith { 31730fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3174ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 31750fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 31760fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 31770fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 31780fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 31790298fd71SBarry Smith PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 31800298fd71SBarry Smith PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 31810fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 31820fd73130SBarry Smith PetscErrorCode ierr; 31830fd73130SBarry Smith PetscScalar *vals,*ovals; 31840fd73130SBarry Smith 31850fd73130SBarry Smith PetscFunctionBegin; 3186dcca6d9dSJed Brown ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3187d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 31880fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 31890fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 31900fd73130SBarry Smith for (j=0; j<dof; j++) { 31910fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 31920fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 31930fd73130SBarry Smith } 31940fd73130SBarry Smith } 3195*fdc842d1SBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3196*fdc842d1SBarry Smith ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3197*fdc842d1SBarry Smith ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3198*fdc842d1SBarry Smith ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr); 3199f27682edSJed Brown ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 32000fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 32010fd73130SBarry Smith 3202dcca6d9dSJed Brown ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3203d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 3204d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 32050fd73130SBarry Smith garray = mpiaij->garray; 32060fd73130SBarry Smith 32070fd73130SBarry Smith ii = rstart; 3208d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32090fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32100fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 32110fd73130SBarry Smith for (j=0; j<dof; j++) { 32120fd73130SBarry Smith for (k=0; k<ncols; k++) { 32130fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 32140fd73130SBarry Smith } 32150fd73130SBarry Smith for (k=0; k<oncols; k++) { 32160fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 32170fd73130SBarry Smith } 3218ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3219ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 32200fd73130SBarry Smith ii++; 32210fd73130SBarry Smith } 32220fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32230fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 32240fd73130SBarry Smith } 32250fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 32260fd73130SBarry Smith 3227ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3228ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3229ceb03754SKris Buschelman 3230511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 32317adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 32327adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 323326fbe8dcSKarl Rupp 323428be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 323526fbe8dcSKarl Rupp 32367adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3237c3d102feSKris Buschelman } else { 3238ceb03754SKris Buschelman *newmat = B; 3239c3d102feSKris Buschelman } 32400fd73130SBarry Smith PetscFunctionReturn(0); 32410fd73130SBarry Smith } 32420fd73130SBarry Smith 32437dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 32449e516c8fSBarry Smith { 32459e516c8fSBarry Smith PetscErrorCode ierr; 32469e516c8fSBarry Smith Mat A; 32479e516c8fSBarry Smith 32489e516c8fSBarry Smith PetscFunctionBegin; 32499e516c8fSBarry Smith ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 32507dae84e0SHong Zhang ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 32519e516c8fSBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 32529e516c8fSBarry Smith PetscFunctionReturn(0); 32539e516c8fSBarry Smith } 32540fd73130SBarry Smith 3255ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3256ec626240SStefano Zampini { 3257ec626240SStefano Zampini PetscErrorCode ierr; 3258ec626240SStefano Zampini Mat A; 3259ec626240SStefano Zampini 3260ec626240SStefano Zampini PetscFunctionBegin; 3261ec626240SStefano Zampini ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3262ec626240SStefano Zampini ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr); 3263ec626240SStefano Zampini ierr = MatDestroy(&A);CHKERRQ(ierr); 3264ec626240SStefano Zampini PetscFunctionReturn(0); 3265ec626240SStefano Zampini } 3266ec626240SStefano Zampini 3267ec626240SStefano Zampini PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 3268ec626240SStefano Zampini 3269ec626240SStefano Zampini 3270bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3271480dffcfSJed Brown /*@ 32720bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 32730bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 32740bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 32750bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 32760bad9183SKris Buschelman 3277ff585edeSJed Brown Collective 3278ff585edeSJed Brown 3279ff585edeSJed Brown Input Parameters: 3280ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3281ff585edeSJed Brown - dof - the block size (number of components per node) 3282ff585edeSJed Brown 3283ff585edeSJed Brown Output Parameter: 3284ff585edeSJed Brown . maij - the new MAIJ matrix 3285ff585edeSJed Brown 32860bad9183SKris Buschelman Operations provided: 32870fd73130SBarry Smith + MatMult 32880bad9183SKris Buschelman . MatMultTranspose 32890bad9183SKris Buschelman . MatMultAdd 32900bad9183SKris Buschelman . MatMultTransposeAdd 32910fd73130SBarry Smith - MatView 32920bad9183SKris Buschelman 32930bad9183SKris Buschelman Level: advanced 32940bad9183SKris Buschelman 3295b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3296ff585edeSJed Brown @*/ 32977087cfbeSBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 329882b1193eSBarry Smith { 3299dfbe8321SBarry Smith PetscErrorCode ierr; 3300b24ad042SBarry Smith PetscMPIInt size; 3301b24ad042SBarry Smith PetscInt n; 330282b1193eSBarry Smith Mat B; 3303*fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3304*fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3305*fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3306*fdc842d1SBarry Smith #endif 330782b1193eSBarry Smith 330882b1193eSBarry Smith PetscFunctionBegin; 3309*fdc842d1SBarry Smith dof = PetscAbs(dof); 3310d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3311d72c5c08SBarry Smith 331226fbe8dcSKarl Rupp if (dof == 1) *maij = A; 331326fbe8dcSKarl Rupp else { 3314ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3315d0f46423SBarry Smith ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3316bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3317bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3318bccba955SJed Brown ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3319bccba955SJed Brown ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 332026fbe8dcSKarl Rupp 3321cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3322d72c5c08SBarry Smith 3323ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3324f4a53059SBarry Smith if (size == 1) { 3325feb9fe23SJed Brown Mat_SeqMAIJ *b; 3326feb9fe23SJed Brown 3327b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 332826fbe8dcSKarl Rupp 33290298fd71SBarry Smith B->ops->setup = NULL; 33304cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 33310fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 3332feb9fe23SJed Brown b = (Mat_SeqMAIJ*)B->data; 3333b9b97703SBarry Smith b->dof = dof; 33344cb9d9b8SBarry Smith b->AIJ = A; 333526fbe8dcSKarl Rupp 3336d72c5c08SBarry Smith if (dof == 2) { 3337cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3338cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3339cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3340cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3341bcc973b7SBarry Smith } else if (dof == 3) { 3342bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3343bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3344bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3345bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3346bcc973b7SBarry Smith } else if (dof == 4) { 3347bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3348bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3349bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3350bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3351f9fae5adSBarry Smith } else if (dof == 5) { 3352f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3353f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3354f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3355f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 33566cd98798SBarry Smith } else if (dof == 6) { 33576cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 33586cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 33596cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 33606cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3361ed8eea03SBarry Smith } else if (dof == 7) { 3362ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3363ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3364ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3365ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 336666ed3db0SBarry Smith } else if (dof == 8) { 336766ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 336866ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 336966ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 337066ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 33712b692628SMatthew Knepley } else if (dof == 9) { 33722b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 33732b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 33742b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 33752b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3376b9cda22cSBarry Smith } else if (dof == 10) { 3377b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3378b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3379b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3380b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3381dbdd7285SBarry Smith } else if (dof == 11) { 3382dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3383dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3384dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3385dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 33862f7816d4SBarry Smith } else if (dof == 16) { 33872f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 33882f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 33892f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 33902f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3391ed1c418cSBarry Smith } else if (dof == 18) { 3392ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3393ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3394ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3395ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 339682b1193eSBarry Smith } else { 33976861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 33986861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 33996861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 34006861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 340182b1193eSBarry Smith } 3402*fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3403*fdc842d1SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3404*fdc842d1SBarry Smith #endif 3405bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3406bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 340759e29146SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3408279e4bd4SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3409ec626240SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqmaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3410f4a53059SBarry Smith } else { 3411f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3412feb9fe23SJed Brown Mat_MPIMAIJ *b; 3413f4a53059SBarry Smith IS from,to; 3414f4a53059SBarry Smith Vec gvec; 3415f4a53059SBarry Smith 3416b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 341726fbe8dcSKarl Rupp 34180298fd71SBarry Smith B->ops->setup = NULL; 3419d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 34200fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 342126fbe8dcSKarl Rupp 3422b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3423b9b97703SBarry Smith b->dof = dof; 3424b9b97703SBarry Smith b->A = A; 342526fbe8dcSKarl Rupp 3426*fdc842d1SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr); 3427*fdc842d1SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr); 34284cb9d9b8SBarry Smith 3429f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3430a34bdc0bSBarry Smith ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3431a34bdc0bSBarry Smith ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 343213288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3433a34bdc0bSBarry Smith ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3434f4a53059SBarry Smith 3435f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 3436ce94432eSBarry Smith ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3437f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3438f4a53059SBarry Smith 3439f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 3440ce94432eSBarry Smith ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3441f4a53059SBarry Smith 3442f4a53059SBarry Smith /* generate the scatter context */ 34439448b7f1SJunchao Zhang ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3444f4a53059SBarry Smith 34456bf464f9SBarry Smith ierr = ISDestroy(&from);CHKERRQ(ierr); 34466bf464f9SBarry Smith ierr = ISDestroy(&to);CHKERRQ(ierr); 34476bf464f9SBarry Smith ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3448f4a53059SBarry Smith 3449f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 34504cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 34514cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 34524cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 345326fbe8dcSKarl Rupp 3454*fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3455*fdc842d1SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3456*fdc842d1SBarry Smith #endif 3457bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3458bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3459ec626240SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpimaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3460f4a53059SBarry Smith } 34617dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3462ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 34634994cf47SJed Brown ierr = MatSetUp(B);CHKERRQ(ierr); 3464*fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 3465*fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 3466*fdc842d1SBarry Smith { 3467*fdc842d1SBarry Smith PetscBool flg; 3468*fdc842d1SBarry Smith if (convert) { 3469*fdc842d1SBarry Smith ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr); 3470*fdc842d1SBarry Smith if (flg) { 3471*fdc842d1SBarry Smith ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 3472*fdc842d1SBarry Smith } 3473*fdc842d1SBarry Smith } 3474*fdc842d1SBarry Smith } 3475*fdc842d1SBarry Smith #endif 3476cd3bbe55SBarry Smith *maij = B; 3477146574abSBarry Smith ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3478d72c5c08SBarry Smith } 3479*fdc842d1SBarry Smith 348082b1193eSBarry Smith PetscFunctionReturn(0); 348182b1193eSBarry Smith } 3482