1be1d678aSKris Buschelman 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 21af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 2282b1193eSBarry Smith 234a2ae208SSatish Balay #undef __FUNCT__ 244a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 25b350b9acSSatish Balay /*@ 26ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 27ff585edeSJed Brown 28ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 29ff585edeSJed Brown 30ff585edeSJed Brown Input Parameter: 31ff585edeSJed Brown . A - the MAIJ matrix 32ff585edeSJed Brown 33ff585edeSJed Brown Output Parameter: 34ff585edeSJed Brown . B - the AIJ matrix 35ff585edeSJed Brown 36ff585edeSJed Brown Level: advanced 37ff585edeSJed Brown 38ff585edeSJed Brown Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 39ff585edeSJed Brown 40ff585edeSJed Brown .seealso: MatCreateMAIJ() 41ff585edeSJed Brown @*/ 427087cfbeSBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 43b9b97703SBarry Smith { 44dfbe8321SBarry Smith PetscErrorCode ierr; 45ace3abfcSBarry Smith PetscBool ismpimaij,isseqmaij; 46b9b97703SBarry Smith 47b9b97703SBarry Smith PetscFunctionBegin; 48251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 49251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 50b9b97703SBarry Smith if (ismpimaij) { 51b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 52b9b97703SBarry Smith 53b9b97703SBarry Smith *B = b->A; 54b9b97703SBarry Smith } else if (isseqmaij) { 55b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 56b9b97703SBarry Smith 57b9b97703SBarry Smith *B = b->AIJ; 58b9b97703SBarry Smith } else { 59b9b97703SBarry Smith *B = A; 60b9b97703SBarry Smith } 61b9b97703SBarry Smith PetscFunctionReturn(0); 62b9b97703SBarry Smith } 63b9b97703SBarry Smith 644a2ae208SSatish Balay #undef __FUNCT__ 654a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 66*480dffcfSJed Brown /*@ 67ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 68ff585edeSJed Brown 693f9fe445SBarry Smith Logically Collective 70ff585edeSJed Brown 71ff585edeSJed Brown Input Parameter: 72ff585edeSJed Brown + A - the MAIJ matrix 73ff585edeSJed Brown - dof - the block size for the new matrix 74ff585edeSJed Brown 75ff585edeSJed Brown Output Parameter: 76ff585edeSJed Brown . B - the new MAIJ matrix 77ff585edeSJed Brown 78ff585edeSJed Brown Level: advanced 79ff585edeSJed Brown 80ff585edeSJed Brown .seealso: MatCreateMAIJ() 81ff585edeSJed Brown @*/ 827087cfbeSBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 83b9b97703SBarry Smith { 84dfbe8321SBarry Smith PetscErrorCode ierr; 850298fd71SBarry Smith Mat Aij = NULL; 86b9b97703SBarry Smith 87b9b97703SBarry Smith PetscFunctionBegin; 88c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A,dof,2); 89b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91b9b97703SBarry Smith PetscFunctionReturn(0); 92b9b97703SBarry Smith } 93b9b97703SBarry Smith 944a2ae208SSatish Balay #undef __FUNCT__ 954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 9782b1193eSBarry Smith { 98dfbe8321SBarry Smith PetscErrorCode ierr; 994cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 10082b1193eSBarry Smith 10182b1193eSBarry Smith PetscFunctionBegin; 1026bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 103bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 104bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 105bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 1064cb9d9b8SBarry Smith PetscFunctionReturn(0); 1074cb9d9b8SBarry Smith } 1084cb9d9b8SBarry Smith 1094a2ae208SSatish Balay #undef __FUNCT__ 110356c569eSBarry Smith #define __FUNCT__ "MatSetUp_MAIJ" 111356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A) 112356c569eSBarry Smith { 113356c569eSBarry Smith PetscFunctionBegin; 114ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 115356c569eSBarry Smith PetscFunctionReturn(0); 116356c569eSBarry Smith } 117356c569eSBarry Smith 118356c569eSBarry Smith #undef __FUNCT__ 1190fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 1200fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1210fd73130SBarry Smith { 1220fd73130SBarry Smith PetscErrorCode ierr; 1230fd73130SBarry Smith Mat B; 1240fd73130SBarry Smith 1250fd73130SBarry Smith PetscFunctionBegin; 126a2ea699eSBarry Smith ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1270fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1286bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1290fd73130SBarry Smith PetscFunctionReturn(0); 1300fd73130SBarry Smith } 1310fd73130SBarry Smith 1320fd73130SBarry Smith #undef __FUNCT__ 1330fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 1340fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1350fd73130SBarry Smith { 1360fd73130SBarry Smith PetscErrorCode ierr; 1370fd73130SBarry Smith Mat B; 1380fd73130SBarry Smith 1390fd73130SBarry Smith PetscFunctionBegin; 140a2ea699eSBarry Smith ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1410fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1426bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1430fd73130SBarry Smith PetscFunctionReturn(0); 1440fd73130SBarry Smith } 1450fd73130SBarry Smith 1460fd73130SBarry Smith #undef __FUNCT__ 1474a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 148dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1494cb9d9b8SBarry Smith { 150dfbe8321SBarry Smith PetscErrorCode ierr; 1514cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1524cb9d9b8SBarry Smith 1534cb9d9b8SBarry Smith PetscFunctionBegin; 1546bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 1556bf464f9SBarry Smith ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 1566bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 1576bf464f9SBarry Smith ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 1586bf464f9SBarry Smith ierr = VecDestroy(&b->w);CHKERRQ(ierr); 159bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 160bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 161bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 162dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 163b9b97703SBarry Smith PetscFunctionReturn(0); 164b9b97703SBarry Smith } 165b9b97703SBarry Smith 1660bad9183SKris Buschelman /*MC 167fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1680bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1690bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1700bad9183SKris Buschelman 1710bad9183SKris Buschelman Operations provided: 1720bad9183SKris Buschelman . MatMult 1730bad9183SKris Buschelman . MatMultTranspose 1740bad9183SKris Buschelman . MatMultAdd 1750bad9183SKris Buschelman . MatMultTransposeAdd 1760bad9183SKris Buschelman 1770bad9183SKris Buschelman Level: advanced 1780bad9183SKris Buschelman 179b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 1800bad9183SKris Buschelman M*/ 1810bad9183SKris Buschelman 1824a2ae208SSatish Balay #undef __FUNCT__ 1834a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 1848cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 18582b1193eSBarry Smith { 186dfbe8321SBarry Smith PetscErrorCode ierr; 1874cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 18864b52464SHong Zhang PetscMPIInt size; 18982b1193eSBarry Smith 19082b1193eSBarry Smith PetscFunctionBegin; 191b00a9115SJed Brown ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 192b0a32e0cSBarry Smith A->data = (void*)b; 19326fbe8dcSKarl Rupp 194cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 19526fbe8dcSKarl Rupp 196356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 197f4a53059SBarry Smith 198cd3bbe55SBarry Smith b->AIJ = 0; 199cd3bbe55SBarry Smith b->dof = 0; 200f4a53059SBarry Smith b->OAIJ = 0; 201f4a53059SBarry Smith b->ctx = 0; 202f4a53059SBarry Smith b->w = 0; 203ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 20464b52464SHong Zhang if (size == 1) { 20564b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 20664b52464SHong Zhang } else { 20764b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 20864b52464SHong Zhang } 20932e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 21032e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 21182b1193eSBarry Smith PetscFunctionReturn(0); 21282b1193eSBarry Smith } 21382b1193eSBarry Smith 214cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 2154a2ae208SSatish Balay #undef __FUNCT__ 216b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 217dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 21882b1193eSBarry Smith { 2194cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 220bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 221d2840e09SBarry Smith const PetscScalar *x,*v; 222d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 223dfbe8321SBarry Smith PetscErrorCode ierr; 224d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 225d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 22682b1193eSBarry Smith 227bcc973b7SBarry Smith PetscFunctionBegin; 2283649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2291ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 230bcc973b7SBarry Smith idx = a->j; 231bcc973b7SBarry Smith v = a->a; 232bcc973b7SBarry Smith ii = a->i; 233bcc973b7SBarry Smith 234bcc973b7SBarry Smith for (i=0; i<m; i++) { 235bcc973b7SBarry Smith jrow = ii[i]; 236bcc973b7SBarry Smith n = ii[i+1] - jrow; 237bcc973b7SBarry Smith sum1 = 0.0; 238bcc973b7SBarry Smith sum2 = 0.0; 23926fbe8dcSKarl Rupp 240b3c51c66SMatthew Knepley nonzerorow += (n>0); 241bcc973b7SBarry Smith for (j=0; j<n; j++) { 242bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 243bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 244bcc973b7SBarry Smith jrow++; 245bcc973b7SBarry Smith } 246bcc973b7SBarry Smith y[2*i] = sum1; 247bcc973b7SBarry Smith y[2*i+1] = sum2; 248bcc973b7SBarry Smith } 249bcc973b7SBarry Smith 250dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 2513649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2521ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 25382b1193eSBarry Smith PetscFunctionReturn(0); 25482b1193eSBarry Smith } 255bcc973b7SBarry Smith 2564a2ae208SSatish Balay #undef __FUNCT__ 257b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 258dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 25982b1193eSBarry Smith { 2604cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 261bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 262d2840e09SBarry Smith const PetscScalar *x,*v; 263d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 264dfbe8321SBarry Smith PetscErrorCode ierr; 265d2840e09SBarry Smith PetscInt n,i; 266d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 26782b1193eSBarry Smith 268bcc973b7SBarry Smith PetscFunctionBegin; 269d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2703649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2711ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 272bfec09a0SHong Zhang 273bcc973b7SBarry Smith for (i=0; i<m; i++) { 274bfec09a0SHong Zhang idx = a->j + a->i[i]; 275bfec09a0SHong Zhang v = a->a + a->i[i]; 276bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 277bcc973b7SBarry Smith alpha1 = x[2*i]; 278bcc973b7SBarry Smith alpha2 = x[2*i+1]; 27926fbe8dcSKarl Rupp while (n-->0) { 28026fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 28126fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 28226fbe8dcSKarl Rupp idx++; v++; 28326fbe8dcSKarl Rupp } 284bcc973b7SBarry Smith } 285dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 2863649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2871ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 28882b1193eSBarry Smith PetscFunctionReturn(0); 28982b1193eSBarry Smith } 290bcc973b7SBarry Smith 2914a2ae208SSatish Balay #undef __FUNCT__ 292b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 293dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 29482b1193eSBarry Smith { 2954cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 296bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 297d2840e09SBarry Smith const PetscScalar *x,*v; 298d2840e09SBarry Smith PetscScalar *y,sum1, sum2; 299dfbe8321SBarry Smith PetscErrorCode ierr; 300b24ad042SBarry Smith PetscInt n,i,jrow,j; 301d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 30282b1193eSBarry Smith 303bcc973b7SBarry Smith PetscFunctionBegin; 304f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3053649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3061ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 307bcc973b7SBarry Smith idx = a->j; 308bcc973b7SBarry Smith v = a->a; 309bcc973b7SBarry Smith ii = a->i; 310bcc973b7SBarry Smith 311bcc973b7SBarry Smith for (i=0; i<m; i++) { 312bcc973b7SBarry Smith jrow = ii[i]; 313bcc973b7SBarry Smith n = ii[i+1] - jrow; 314bcc973b7SBarry Smith sum1 = 0.0; 315bcc973b7SBarry Smith sum2 = 0.0; 316bcc973b7SBarry Smith for (j=0; j<n; j++) { 317bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 318bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 319bcc973b7SBarry Smith jrow++; 320bcc973b7SBarry Smith } 321bcc973b7SBarry Smith y[2*i] += sum1; 322bcc973b7SBarry Smith y[2*i+1] += sum2; 323bcc973b7SBarry Smith } 324bcc973b7SBarry Smith 325dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3263649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3271ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 328bcc973b7SBarry Smith PetscFunctionReturn(0); 32982b1193eSBarry Smith } 3304a2ae208SSatish Balay #undef __FUNCT__ 331b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 332dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 33382b1193eSBarry Smith { 3344cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 335bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 336d2840e09SBarry Smith const PetscScalar *x,*v; 337d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 338dfbe8321SBarry Smith PetscErrorCode ierr; 339d2840e09SBarry Smith PetscInt n,i; 340d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 34182b1193eSBarry Smith 342bcc973b7SBarry Smith PetscFunctionBegin; 343f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3443649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3451ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 346bfec09a0SHong Zhang 347bcc973b7SBarry Smith for (i=0; i<m; i++) { 348bfec09a0SHong Zhang idx = a->j + a->i[i]; 349bfec09a0SHong Zhang v = a->a + a->i[i]; 350bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 351bcc973b7SBarry Smith alpha1 = x[2*i]; 352bcc973b7SBarry Smith alpha2 = x[2*i+1]; 35326fbe8dcSKarl Rupp while (n-->0) { 35426fbe8dcSKarl Rupp y[2*(*idx)] += alpha1*(*v); 35526fbe8dcSKarl Rupp y[2*(*idx)+1] += alpha2*(*v); 35626fbe8dcSKarl Rupp idx++; v++; 35726fbe8dcSKarl Rupp } 358bcc973b7SBarry Smith } 359dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3603649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3611ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 362bcc973b7SBarry Smith PetscFunctionReturn(0); 36382b1193eSBarry Smith } 364cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3654a2ae208SSatish Balay #undef __FUNCT__ 366b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 367dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 368bcc973b7SBarry Smith { 3694cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 370bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 371d2840e09SBarry Smith const PetscScalar *x,*v; 372d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 373dfbe8321SBarry Smith PetscErrorCode ierr; 374d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 375d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 37682b1193eSBarry Smith 377bcc973b7SBarry Smith PetscFunctionBegin; 3783649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3791ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 380bcc973b7SBarry Smith idx = a->j; 381bcc973b7SBarry Smith v = a->a; 382bcc973b7SBarry Smith ii = a->i; 383bcc973b7SBarry Smith 384bcc973b7SBarry Smith for (i=0; i<m; i++) { 385bcc973b7SBarry Smith jrow = ii[i]; 386bcc973b7SBarry Smith n = ii[i+1] - jrow; 387bcc973b7SBarry Smith sum1 = 0.0; 388bcc973b7SBarry Smith sum2 = 0.0; 389bcc973b7SBarry Smith sum3 = 0.0; 39026fbe8dcSKarl Rupp 391b3c51c66SMatthew Knepley nonzerorow += (n>0); 392bcc973b7SBarry Smith for (j=0; j<n; j++) { 393bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 394bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 395bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 396bcc973b7SBarry Smith jrow++; 397bcc973b7SBarry Smith } 398bcc973b7SBarry Smith y[3*i] = sum1; 399bcc973b7SBarry Smith y[3*i+1] = sum2; 400bcc973b7SBarry Smith y[3*i+2] = sum3; 401bcc973b7SBarry Smith } 402bcc973b7SBarry Smith 403dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 4043649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4051ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 406bcc973b7SBarry Smith PetscFunctionReturn(0); 407bcc973b7SBarry Smith } 408bcc973b7SBarry Smith 4094a2ae208SSatish Balay #undef __FUNCT__ 410b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 411dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 412bcc973b7SBarry Smith { 4134cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 414bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 415d2840e09SBarry Smith const PetscScalar *x,*v; 416d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 417dfbe8321SBarry Smith PetscErrorCode ierr; 418d2840e09SBarry Smith PetscInt n,i; 419d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 420bcc973b7SBarry Smith 421bcc973b7SBarry Smith PetscFunctionBegin; 422d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 4233649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4241ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 425bfec09a0SHong Zhang 426bcc973b7SBarry Smith for (i=0; i<m; i++) { 427bfec09a0SHong Zhang idx = a->j + a->i[i]; 428bfec09a0SHong Zhang v = a->a + a->i[i]; 429bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 430bcc973b7SBarry Smith alpha1 = x[3*i]; 431bcc973b7SBarry Smith alpha2 = x[3*i+1]; 432bcc973b7SBarry Smith alpha3 = x[3*i+2]; 433bcc973b7SBarry Smith while (n-->0) { 434bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 435bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 436bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 437bcc973b7SBarry Smith idx++; v++; 438bcc973b7SBarry Smith } 439bcc973b7SBarry Smith } 440dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4413649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4421ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 443bcc973b7SBarry Smith PetscFunctionReturn(0); 444bcc973b7SBarry Smith } 445bcc973b7SBarry Smith 4464a2ae208SSatish Balay #undef __FUNCT__ 447b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 448dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 449bcc973b7SBarry Smith { 4504cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 451bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 452d2840e09SBarry Smith const PetscScalar *x,*v; 453d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 454dfbe8321SBarry Smith PetscErrorCode ierr; 455b24ad042SBarry Smith PetscInt n,i,jrow,j; 456d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 457bcc973b7SBarry Smith 458bcc973b7SBarry Smith PetscFunctionBegin; 459f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4603649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4611ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 462bcc973b7SBarry Smith idx = a->j; 463bcc973b7SBarry Smith v = a->a; 464bcc973b7SBarry Smith ii = a->i; 465bcc973b7SBarry Smith 466bcc973b7SBarry Smith for (i=0; i<m; i++) { 467bcc973b7SBarry Smith jrow = ii[i]; 468bcc973b7SBarry Smith n = ii[i+1] - jrow; 469bcc973b7SBarry Smith sum1 = 0.0; 470bcc973b7SBarry Smith sum2 = 0.0; 471bcc973b7SBarry Smith sum3 = 0.0; 472bcc973b7SBarry Smith for (j=0; j<n; j++) { 473bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 474bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 475bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 476bcc973b7SBarry Smith jrow++; 477bcc973b7SBarry Smith } 478bcc973b7SBarry Smith y[3*i] += sum1; 479bcc973b7SBarry Smith y[3*i+1] += sum2; 480bcc973b7SBarry Smith y[3*i+2] += sum3; 481bcc973b7SBarry Smith } 482bcc973b7SBarry Smith 483dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4843649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4851ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 486bcc973b7SBarry Smith PetscFunctionReturn(0); 487bcc973b7SBarry Smith } 4884a2ae208SSatish Balay #undef __FUNCT__ 489b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 490dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 491bcc973b7SBarry Smith { 4924cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 493bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 494d2840e09SBarry Smith const PetscScalar *x,*v; 495d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 496dfbe8321SBarry Smith PetscErrorCode ierr; 497d2840e09SBarry Smith PetscInt n,i; 498d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 499bcc973b7SBarry Smith 500bcc973b7SBarry Smith PetscFunctionBegin; 501f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5023649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5031ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 504bcc973b7SBarry Smith for (i=0; i<m; i++) { 505bfec09a0SHong Zhang idx = a->j + a->i[i]; 506bfec09a0SHong Zhang v = a->a + a->i[i]; 507bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 508bcc973b7SBarry Smith alpha1 = x[3*i]; 509bcc973b7SBarry Smith alpha2 = x[3*i+1]; 510bcc973b7SBarry Smith alpha3 = x[3*i+2]; 511bcc973b7SBarry Smith while (n-->0) { 512bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 513bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 514bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 515bcc973b7SBarry Smith idx++; v++; 516bcc973b7SBarry Smith } 517bcc973b7SBarry Smith } 518dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 5193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5201ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 521bcc973b7SBarry Smith PetscFunctionReturn(0); 522bcc973b7SBarry Smith } 523bcc973b7SBarry Smith 524bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 5254a2ae208SSatish Balay #undef __FUNCT__ 526b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 527dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 528bcc973b7SBarry Smith { 5294cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 530bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 531d2840e09SBarry Smith const PetscScalar *x,*v; 532d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 533dfbe8321SBarry Smith PetscErrorCode ierr; 534d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 535d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 536bcc973b7SBarry Smith 537bcc973b7SBarry Smith PetscFunctionBegin; 5383649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5391ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 540bcc973b7SBarry Smith idx = a->j; 541bcc973b7SBarry Smith v = a->a; 542bcc973b7SBarry Smith ii = a->i; 543bcc973b7SBarry Smith 544bcc973b7SBarry Smith for (i=0; i<m; i++) { 545bcc973b7SBarry Smith jrow = ii[i]; 546bcc973b7SBarry Smith n = ii[i+1] - jrow; 547bcc973b7SBarry Smith sum1 = 0.0; 548bcc973b7SBarry Smith sum2 = 0.0; 549bcc973b7SBarry Smith sum3 = 0.0; 550bcc973b7SBarry Smith sum4 = 0.0; 551b3c51c66SMatthew Knepley nonzerorow += (n>0); 552bcc973b7SBarry Smith for (j=0; j<n; j++) { 553bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 554bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 555bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 556bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 557bcc973b7SBarry Smith jrow++; 558bcc973b7SBarry Smith } 559bcc973b7SBarry Smith y[4*i] = sum1; 560bcc973b7SBarry Smith y[4*i+1] = sum2; 561bcc973b7SBarry Smith y[4*i+2] = sum3; 562bcc973b7SBarry Smith y[4*i+3] = sum4; 563bcc973b7SBarry Smith } 564bcc973b7SBarry Smith 565dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 5663649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5671ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 568bcc973b7SBarry Smith PetscFunctionReturn(0); 569bcc973b7SBarry Smith } 570bcc973b7SBarry Smith 5714a2ae208SSatish Balay #undef __FUNCT__ 572b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 573dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 574bcc973b7SBarry Smith { 5754cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 576bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 577d2840e09SBarry Smith const PetscScalar *x,*v; 578d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 579dfbe8321SBarry Smith PetscErrorCode ierr; 580d2840e09SBarry Smith PetscInt n,i; 581d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 582bcc973b7SBarry Smith 583bcc973b7SBarry Smith PetscFunctionBegin; 584d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 5853649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5861ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 587bcc973b7SBarry Smith for (i=0; i<m; i++) { 588bfec09a0SHong Zhang idx = a->j + a->i[i]; 589bfec09a0SHong Zhang v = a->a + a->i[i]; 590bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 591bcc973b7SBarry Smith alpha1 = x[4*i]; 592bcc973b7SBarry Smith alpha2 = x[4*i+1]; 593bcc973b7SBarry Smith alpha3 = x[4*i+2]; 594bcc973b7SBarry Smith alpha4 = x[4*i+3]; 595bcc973b7SBarry Smith while (n-->0) { 596bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 597bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 598bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 599bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 600bcc973b7SBarry Smith idx++; v++; 601bcc973b7SBarry Smith } 602bcc973b7SBarry Smith } 603dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6043649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6051ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 606bcc973b7SBarry Smith PetscFunctionReturn(0); 607bcc973b7SBarry Smith } 608bcc973b7SBarry Smith 6094a2ae208SSatish Balay #undef __FUNCT__ 610b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 611dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 612bcc973b7SBarry Smith { 6134cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 614f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 615d2840e09SBarry Smith const PetscScalar *x,*v; 616d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 617dfbe8321SBarry Smith PetscErrorCode ierr; 618b24ad042SBarry Smith PetscInt n,i,jrow,j; 619d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 620f9fae5adSBarry Smith 621f9fae5adSBarry Smith PetscFunctionBegin; 622f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6233649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6241ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 625f9fae5adSBarry Smith idx = a->j; 626f9fae5adSBarry Smith v = a->a; 627f9fae5adSBarry Smith ii = a->i; 628f9fae5adSBarry Smith 629f9fae5adSBarry Smith for (i=0; i<m; i++) { 630f9fae5adSBarry Smith jrow = ii[i]; 631f9fae5adSBarry Smith n = ii[i+1] - jrow; 632f9fae5adSBarry Smith sum1 = 0.0; 633f9fae5adSBarry Smith sum2 = 0.0; 634f9fae5adSBarry Smith sum3 = 0.0; 635f9fae5adSBarry Smith sum4 = 0.0; 636f9fae5adSBarry Smith for (j=0; j<n; j++) { 637f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 638f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 639f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 640f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 641f9fae5adSBarry Smith jrow++; 642f9fae5adSBarry Smith } 643f9fae5adSBarry Smith y[4*i] += sum1; 644f9fae5adSBarry Smith y[4*i+1] += sum2; 645f9fae5adSBarry Smith y[4*i+2] += sum3; 646f9fae5adSBarry Smith y[4*i+3] += sum4; 647f9fae5adSBarry Smith } 648f9fae5adSBarry Smith 649dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6503649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6511ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 652f9fae5adSBarry Smith PetscFunctionReturn(0); 653bcc973b7SBarry Smith } 6544a2ae208SSatish Balay #undef __FUNCT__ 655b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 656dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 657bcc973b7SBarry Smith { 6584cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 659f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 660d2840e09SBarry Smith const PetscScalar *x,*v; 661d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 662dfbe8321SBarry Smith PetscErrorCode ierr; 663d2840e09SBarry Smith PetscInt n,i; 664d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 665f9fae5adSBarry Smith 666f9fae5adSBarry Smith PetscFunctionBegin; 667f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6683649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6691ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 670bfec09a0SHong Zhang 671f9fae5adSBarry Smith for (i=0; i<m; i++) { 672bfec09a0SHong Zhang idx = a->j + a->i[i]; 673bfec09a0SHong Zhang v = a->a + a->i[i]; 674f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 675f9fae5adSBarry Smith alpha1 = x[4*i]; 676f9fae5adSBarry Smith alpha2 = x[4*i+1]; 677f9fae5adSBarry Smith alpha3 = x[4*i+2]; 678f9fae5adSBarry Smith alpha4 = x[4*i+3]; 679f9fae5adSBarry Smith while (n-->0) { 680f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 681f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 682f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 683f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 684f9fae5adSBarry Smith idx++; v++; 685f9fae5adSBarry Smith } 686f9fae5adSBarry Smith } 687dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6883649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6891ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 690f9fae5adSBarry Smith PetscFunctionReturn(0); 691f9fae5adSBarry Smith } 692f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6936cd98798SBarry Smith 6944a2ae208SSatish Balay #undef __FUNCT__ 695b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 696dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 697f9fae5adSBarry Smith { 6984cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 699f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 700d2840e09SBarry Smith const PetscScalar *x,*v; 701d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 702dfbe8321SBarry Smith PetscErrorCode ierr; 703d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 704d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 705f9fae5adSBarry Smith 706f9fae5adSBarry Smith PetscFunctionBegin; 7073649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7081ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 709f9fae5adSBarry Smith idx = a->j; 710f9fae5adSBarry Smith v = a->a; 711f9fae5adSBarry Smith ii = a->i; 712f9fae5adSBarry Smith 713f9fae5adSBarry Smith for (i=0; i<m; i++) { 714f9fae5adSBarry Smith jrow = ii[i]; 715f9fae5adSBarry Smith n = ii[i+1] - jrow; 716f9fae5adSBarry Smith sum1 = 0.0; 717f9fae5adSBarry Smith sum2 = 0.0; 718f9fae5adSBarry Smith sum3 = 0.0; 719f9fae5adSBarry Smith sum4 = 0.0; 720f9fae5adSBarry Smith sum5 = 0.0; 72126fbe8dcSKarl Rupp 722b3c51c66SMatthew Knepley nonzerorow += (n>0); 723f9fae5adSBarry Smith for (j=0; j<n; j++) { 724f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 725f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 726f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 727f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 728f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 729f9fae5adSBarry Smith jrow++; 730f9fae5adSBarry Smith } 731f9fae5adSBarry Smith y[5*i] = sum1; 732f9fae5adSBarry Smith y[5*i+1] = sum2; 733f9fae5adSBarry Smith y[5*i+2] = sum3; 734f9fae5adSBarry Smith y[5*i+3] = sum4; 735f9fae5adSBarry Smith y[5*i+4] = sum5; 736f9fae5adSBarry Smith } 737f9fae5adSBarry Smith 738dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 7393649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7401ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 741f9fae5adSBarry Smith PetscFunctionReturn(0); 742f9fae5adSBarry Smith } 743f9fae5adSBarry Smith 7444a2ae208SSatish Balay #undef __FUNCT__ 745b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 746dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 747f9fae5adSBarry Smith { 7484cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 749f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 750d2840e09SBarry Smith const PetscScalar *x,*v; 751d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 752dfbe8321SBarry Smith PetscErrorCode ierr; 753d2840e09SBarry Smith PetscInt n,i; 754d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 755f9fae5adSBarry Smith 756f9fae5adSBarry Smith PetscFunctionBegin; 757d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 7583649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7591ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 760bfec09a0SHong Zhang 761f9fae5adSBarry Smith for (i=0; i<m; i++) { 762bfec09a0SHong Zhang idx = a->j + a->i[i]; 763bfec09a0SHong Zhang v = a->a + a->i[i]; 764f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 765f9fae5adSBarry Smith alpha1 = x[5*i]; 766f9fae5adSBarry Smith alpha2 = x[5*i+1]; 767f9fae5adSBarry Smith alpha3 = x[5*i+2]; 768f9fae5adSBarry Smith alpha4 = x[5*i+3]; 769f9fae5adSBarry Smith alpha5 = x[5*i+4]; 770f9fae5adSBarry Smith while (n-->0) { 771f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 772f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 773f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 774f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 775f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 776f9fae5adSBarry Smith idx++; v++; 777f9fae5adSBarry Smith } 778f9fae5adSBarry Smith } 779dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7803649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7811ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 782f9fae5adSBarry Smith PetscFunctionReturn(0); 783f9fae5adSBarry Smith } 784f9fae5adSBarry Smith 7854a2ae208SSatish Balay #undef __FUNCT__ 786b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 787dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 788f9fae5adSBarry Smith { 7894cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 790f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 791d2840e09SBarry Smith const PetscScalar *x,*v; 792d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 793dfbe8321SBarry Smith PetscErrorCode ierr; 794b24ad042SBarry Smith PetscInt n,i,jrow,j; 795d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 796f9fae5adSBarry Smith 797f9fae5adSBarry Smith PetscFunctionBegin; 798f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7993649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8001ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 801f9fae5adSBarry Smith idx = a->j; 802f9fae5adSBarry Smith v = a->a; 803f9fae5adSBarry Smith ii = a->i; 804f9fae5adSBarry Smith 805f9fae5adSBarry Smith for (i=0; i<m; i++) { 806f9fae5adSBarry Smith jrow = ii[i]; 807f9fae5adSBarry Smith n = ii[i+1] - jrow; 808f9fae5adSBarry Smith sum1 = 0.0; 809f9fae5adSBarry Smith sum2 = 0.0; 810f9fae5adSBarry Smith sum3 = 0.0; 811f9fae5adSBarry Smith sum4 = 0.0; 812f9fae5adSBarry Smith sum5 = 0.0; 813f9fae5adSBarry Smith for (j=0; j<n; j++) { 814f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 815f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 816f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 817f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 818f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 819f9fae5adSBarry Smith jrow++; 820f9fae5adSBarry Smith } 821f9fae5adSBarry Smith y[5*i] += sum1; 822f9fae5adSBarry Smith y[5*i+1] += sum2; 823f9fae5adSBarry Smith y[5*i+2] += sum3; 824f9fae5adSBarry Smith y[5*i+3] += sum4; 825f9fae5adSBarry Smith y[5*i+4] += sum5; 826f9fae5adSBarry Smith } 827f9fae5adSBarry Smith 828dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8293649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8301ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 831f9fae5adSBarry Smith PetscFunctionReturn(0); 832f9fae5adSBarry Smith } 833f9fae5adSBarry Smith 8344a2ae208SSatish Balay #undef __FUNCT__ 835b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 836dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 837f9fae5adSBarry Smith { 8384cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 839f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 840d2840e09SBarry Smith const PetscScalar *x,*v; 841d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 842dfbe8321SBarry Smith PetscErrorCode ierr; 843d2840e09SBarry Smith PetscInt n,i; 844d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 845f9fae5adSBarry Smith 846f9fae5adSBarry Smith PetscFunctionBegin; 847f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8483649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8491ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 850bfec09a0SHong Zhang 851f9fae5adSBarry Smith for (i=0; i<m; i++) { 852bfec09a0SHong Zhang idx = a->j + a->i[i]; 853bfec09a0SHong Zhang v = a->a + a->i[i]; 854f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 855f9fae5adSBarry Smith alpha1 = x[5*i]; 856f9fae5adSBarry Smith alpha2 = x[5*i+1]; 857f9fae5adSBarry Smith alpha3 = x[5*i+2]; 858f9fae5adSBarry Smith alpha4 = x[5*i+3]; 859f9fae5adSBarry Smith alpha5 = x[5*i+4]; 860f9fae5adSBarry Smith while (n-->0) { 861f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 862f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 863f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 864f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 865f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 866f9fae5adSBarry Smith idx++; v++; 867f9fae5adSBarry Smith } 868f9fae5adSBarry Smith } 869dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8703649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8711ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 872f9fae5adSBarry Smith PetscFunctionReturn(0); 873bcc973b7SBarry Smith } 874bcc973b7SBarry Smith 8756cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 8764a2ae208SSatish Balay #undef __FUNCT__ 877b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 878dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8796cd98798SBarry Smith { 8806cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8816cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 882d2840e09SBarry Smith const PetscScalar *x,*v; 883d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 884dfbe8321SBarry Smith PetscErrorCode ierr; 885d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 886d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 8876cd98798SBarry Smith 8886cd98798SBarry Smith PetscFunctionBegin; 8893649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8901ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8916cd98798SBarry Smith idx = a->j; 8926cd98798SBarry Smith v = a->a; 8936cd98798SBarry Smith ii = a->i; 8946cd98798SBarry Smith 8956cd98798SBarry Smith for (i=0; i<m; i++) { 8966cd98798SBarry Smith jrow = ii[i]; 8976cd98798SBarry Smith n = ii[i+1] - jrow; 8986cd98798SBarry Smith sum1 = 0.0; 8996cd98798SBarry Smith sum2 = 0.0; 9006cd98798SBarry Smith sum3 = 0.0; 9016cd98798SBarry Smith sum4 = 0.0; 9026cd98798SBarry Smith sum5 = 0.0; 9036cd98798SBarry Smith sum6 = 0.0; 90426fbe8dcSKarl Rupp 905b3c51c66SMatthew Knepley nonzerorow += (n>0); 9066cd98798SBarry Smith for (j=0; j<n; j++) { 9076cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9086cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9096cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9106cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9116cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9126cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9136cd98798SBarry Smith jrow++; 9146cd98798SBarry Smith } 9156cd98798SBarry Smith y[6*i] = sum1; 9166cd98798SBarry Smith y[6*i+1] = sum2; 9176cd98798SBarry Smith y[6*i+2] = sum3; 9186cd98798SBarry Smith y[6*i+3] = sum4; 9196cd98798SBarry Smith y[6*i+4] = sum5; 9206cd98798SBarry Smith y[6*i+5] = sum6; 9216cd98798SBarry Smith } 9226cd98798SBarry Smith 923dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 9243649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9251ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9266cd98798SBarry Smith PetscFunctionReturn(0); 9276cd98798SBarry Smith } 9286cd98798SBarry Smith 9294a2ae208SSatish Balay #undef __FUNCT__ 930b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 931dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 9326cd98798SBarry Smith { 9336cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9346cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 935d2840e09SBarry Smith const PetscScalar *x,*v; 936d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 937dfbe8321SBarry Smith PetscErrorCode ierr; 938d2840e09SBarry Smith PetscInt n,i; 939d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 9406cd98798SBarry Smith 9416cd98798SBarry Smith PetscFunctionBegin; 942d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 9433649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9441ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 945bfec09a0SHong Zhang 9466cd98798SBarry Smith for (i=0; i<m; i++) { 947bfec09a0SHong Zhang idx = a->j + a->i[i]; 948bfec09a0SHong Zhang v = a->a + a->i[i]; 9496cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9506cd98798SBarry Smith alpha1 = x[6*i]; 9516cd98798SBarry Smith alpha2 = x[6*i+1]; 9526cd98798SBarry Smith alpha3 = x[6*i+2]; 9536cd98798SBarry Smith alpha4 = x[6*i+3]; 9546cd98798SBarry Smith alpha5 = x[6*i+4]; 9556cd98798SBarry Smith alpha6 = x[6*i+5]; 9566cd98798SBarry Smith while (n-->0) { 9576cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9586cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9596cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9606cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9616cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9626cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9636cd98798SBarry Smith idx++; v++; 9646cd98798SBarry Smith } 9656cd98798SBarry Smith } 966dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9673649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9681ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9696cd98798SBarry Smith PetscFunctionReturn(0); 9706cd98798SBarry Smith } 9716cd98798SBarry Smith 9724a2ae208SSatish Balay #undef __FUNCT__ 973b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 974dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9756cd98798SBarry Smith { 9766cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9776cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 978d2840e09SBarry Smith const PetscScalar *x,*v; 979d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 980dfbe8321SBarry Smith PetscErrorCode ierr; 981b24ad042SBarry Smith PetscInt n,i,jrow,j; 982d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 9836cd98798SBarry Smith 9846cd98798SBarry Smith PetscFunctionBegin; 9856cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9871ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9886cd98798SBarry Smith idx = a->j; 9896cd98798SBarry Smith v = a->a; 9906cd98798SBarry Smith ii = a->i; 9916cd98798SBarry Smith 9926cd98798SBarry Smith for (i=0; i<m; i++) { 9936cd98798SBarry Smith jrow = ii[i]; 9946cd98798SBarry Smith n = ii[i+1] - jrow; 9956cd98798SBarry Smith sum1 = 0.0; 9966cd98798SBarry Smith sum2 = 0.0; 9976cd98798SBarry Smith sum3 = 0.0; 9986cd98798SBarry Smith sum4 = 0.0; 9996cd98798SBarry Smith sum5 = 0.0; 10006cd98798SBarry Smith sum6 = 0.0; 10016cd98798SBarry Smith for (j=0; j<n; j++) { 10026cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 10036cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 10046cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 10056cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 10066cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 10076cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 10086cd98798SBarry Smith jrow++; 10096cd98798SBarry Smith } 10106cd98798SBarry Smith y[6*i] += sum1; 10116cd98798SBarry Smith y[6*i+1] += sum2; 10126cd98798SBarry Smith y[6*i+2] += sum3; 10136cd98798SBarry Smith y[6*i+3] += sum4; 10146cd98798SBarry Smith y[6*i+4] += sum5; 10156cd98798SBarry Smith y[6*i+5] += sum6; 10166cd98798SBarry Smith } 10176cd98798SBarry Smith 1018dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 10193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 10201ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10216cd98798SBarry Smith PetscFunctionReturn(0); 10226cd98798SBarry Smith } 10236cd98798SBarry Smith 10244a2ae208SSatish Balay #undef __FUNCT__ 1025b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1026dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 10276cd98798SBarry Smith { 10286cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 10296cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1030d2840e09SBarry Smith const PetscScalar *x,*v; 1031d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1032dfbe8321SBarry Smith PetscErrorCode ierr; 1033d2840e09SBarry Smith PetscInt n,i; 1034d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 10356cd98798SBarry Smith 10366cd98798SBarry Smith PetscFunctionBegin; 10376cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 10383649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 10391ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1040bfec09a0SHong Zhang 10416cd98798SBarry Smith for (i=0; i<m; i++) { 1042bfec09a0SHong Zhang idx = a->j + a->i[i]; 1043bfec09a0SHong Zhang v = a->a + a->i[i]; 10446cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 10456cd98798SBarry Smith alpha1 = x[6*i]; 10466cd98798SBarry Smith alpha2 = x[6*i+1]; 10476cd98798SBarry Smith alpha3 = x[6*i+2]; 10486cd98798SBarry Smith alpha4 = x[6*i+3]; 10496cd98798SBarry Smith alpha5 = x[6*i+4]; 10506cd98798SBarry Smith alpha6 = x[6*i+5]; 10516cd98798SBarry Smith while (n-->0) { 10526cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 10536cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 10546cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 10556cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 10566cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 10576cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 10586cd98798SBarry Smith idx++; v++; 10596cd98798SBarry Smith } 10606cd98798SBarry Smith } 1061dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 10623649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 10631ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10646cd98798SBarry Smith PetscFunctionReturn(0); 10656cd98798SBarry Smith } 10666cd98798SBarry Smith 106766ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 106866ed3db0SBarry Smith #undef __FUNCT__ 1069ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 1070ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1071ed8eea03SBarry Smith { 1072ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1073ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1074d2840e09SBarry Smith const PetscScalar *x,*v; 1075d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1076ed8eea03SBarry Smith PetscErrorCode ierr; 1077d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1078d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1079ed8eea03SBarry Smith 1080ed8eea03SBarry Smith PetscFunctionBegin; 10813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1082ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1083ed8eea03SBarry Smith idx = a->j; 1084ed8eea03SBarry Smith v = a->a; 1085ed8eea03SBarry Smith ii = a->i; 1086ed8eea03SBarry Smith 1087ed8eea03SBarry Smith for (i=0; i<m; i++) { 1088ed8eea03SBarry Smith jrow = ii[i]; 1089ed8eea03SBarry Smith n = ii[i+1] - jrow; 1090ed8eea03SBarry Smith sum1 = 0.0; 1091ed8eea03SBarry Smith sum2 = 0.0; 1092ed8eea03SBarry Smith sum3 = 0.0; 1093ed8eea03SBarry Smith sum4 = 0.0; 1094ed8eea03SBarry Smith sum5 = 0.0; 1095ed8eea03SBarry Smith sum6 = 0.0; 1096ed8eea03SBarry Smith sum7 = 0.0; 109726fbe8dcSKarl Rupp 1098b3c51c66SMatthew Knepley nonzerorow += (n>0); 1099ed8eea03SBarry Smith for (j=0; j<n; j++) { 1100ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1101ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1102ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1103ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1104ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1105ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1106ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1107ed8eea03SBarry Smith jrow++; 1108ed8eea03SBarry Smith } 1109ed8eea03SBarry Smith y[7*i] = sum1; 1110ed8eea03SBarry Smith y[7*i+1] = sum2; 1111ed8eea03SBarry Smith y[7*i+2] = sum3; 1112ed8eea03SBarry Smith y[7*i+3] = sum4; 1113ed8eea03SBarry Smith y[7*i+4] = sum5; 1114ed8eea03SBarry Smith y[7*i+5] = sum6; 1115ed8eea03SBarry Smith y[7*i+6] = sum7; 1116ed8eea03SBarry Smith } 1117ed8eea03SBarry Smith 1118dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 11193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1120ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1121ed8eea03SBarry Smith PetscFunctionReturn(0); 1122ed8eea03SBarry Smith } 1123ed8eea03SBarry Smith 1124ed8eea03SBarry Smith #undef __FUNCT__ 1125ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1126ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1127ed8eea03SBarry Smith { 1128ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1129ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1130d2840e09SBarry Smith const PetscScalar *x,*v; 1131d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1132ed8eea03SBarry Smith PetscErrorCode ierr; 1133d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1134d2840e09SBarry Smith PetscInt n,i; 1135ed8eea03SBarry Smith 1136ed8eea03SBarry Smith PetscFunctionBegin; 1137d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 11383649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1139ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1140ed8eea03SBarry Smith 1141ed8eea03SBarry Smith for (i=0; i<m; i++) { 1142ed8eea03SBarry Smith idx = a->j + a->i[i]; 1143ed8eea03SBarry Smith v = a->a + a->i[i]; 1144ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1145ed8eea03SBarry Smith alpha1 = x[7*i]; 1146ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1147ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1148ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1149ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1150ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1151ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1152ed8eea03SBarry Smith while (n-->0) { 1153ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1154ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1155ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1156ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1157ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1158ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1159ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1160ed8eea03SBarry Smith idx++; v++; 1161ed8eea03SBarry Smith } 1162ed8eea03SBarry Smith } 1163dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11643649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1165ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1166ed8eea03SBarry Smith PetscFunctionReturn(0); 1167ed8eea03SBarry Smith } 1168ed8eea03SBarry Smith 1169ed8eea03SBarry Smith #undef __FUNCT__ 1170ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1171ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1172ed8eea03SBarry Smith { 1173ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1174ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1175d2840e09SBarry Smith const PetscScalar *x,*v; 1176d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1177ed8eea03SBarry Smith PetscErrorCode ierr; 1178d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1179ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1180ed8eea03SBarry Smith 1181ed8eea03SBarry Smith PetscFunctionBegin; 1182ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11833649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1184ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1185ed8eea03SBarry Smith idx = a->j; 1186ed8eea03SBarry Smith v = a->a; 1187ed8eea03SBarry Smith ii = a->i; 1188ed8eea03SBarry Smith 1189ed8eea03SBarry Smith for (i=0; i<m; i++) { 1190ed8eea03SBarry Smith jrow = ii[i]; 1191ed8eea03SBarry Smith n = ii[i+1] - jrow; 1192ed8eea03SBarry Smith sum1 = 0.0; 1193ed8eea03SBarry Smith sum2 = 0.0; 1194ed8eea03SBarry Smith sum3 = 0.0; 1195ed8eea03SBarry Smith sum4 = 0.0; 1196ed8eea03SBarry Smith sum5 = 0.0; 1197ed8eea03SBarry Smith sum6 = 0.0; 1198ed8eea03SBarry Smith sum7 = 0.0; 1199ed8eea03SBarry Smith for (j=0; j<n; j++) { 1200ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1201ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1202ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1203ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1204ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1205ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1206ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1207ed8eea03SBarry Smith jrow++; 1208ed8eea03SBarry Smith } 1209ed8eea03SBarry Smith y[7*i] += sum1; 1210ed8eea03SBarry Smith y[7*i+1] += sum2; 1211ed8eea03SBarry Smith y[7*i+2] += sum3; 1212ed8eea03SBarry Smith y[7*i+3] += sum4; 1213ed8eea03SBarry Smith y[7*i+4] += sum5; 1214ed8eea03SBarry Smith y[7*i+5] += sum6; 1215ed8eea03SBarry Smith y[7*i+6] += sum7; 1216ed8eea03SBarry Smith } 1217ed8eea03SBarry Smith 1218dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 12193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1220ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1221ed8eea03SBarry Smith PetscFunctionReturn(0); 1222ed8eea03SBarry Smith } 1223ed8eea03SBarry Smith 1224ed8eea03SBarry Smith #undef __FUNCT__ 1225ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1226ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1227ed8eea03SBarry Smith { 1228ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1229ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1230d2840e09SBarry Smith const PetscScalar *x,*v; 1231d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1232ed8eea03SBarry Smith PetscErrorCode ierr; 1233d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1234d2840e09SBarry Smith PetscInt n,i; 1235ed8eea03SBarry Smith 1236ed8eea03SBarry Smith PetscFunctionBegin; 1237ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12383649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1239ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1240ed8eea03SBarry Smith for (i=0; i<m; i++) { 1241ed8eea03SBarry Smith idx = a->j + a->i[i]; 1242ed8eea03SBarry Smith v = a->a + a->i[i]; 1243ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1244ed8eea03SBarry Smith alpha1 = x[7*i]; 1245ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1246ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1247ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1248ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1249ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1250ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1251ed8eea03SBarry Smith while (n-->0) { 1252ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1253ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1254ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1255ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1256ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1257ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1258ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1259ed8eea03SBarry Smith idx++; v++; 1260ed8eea03SBarry Smith } 1261ed8eea03SBarry Smith } 1262dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 12633649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1264ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1265ed8eea03SBarry Smith PetscFunctionReturn(0); 1266ed8eea03SBarry Smith } 1267ed8eea03SBarry Smith 1268ed8eea03SBarry Smith #undef __FUNCT__ 126966ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1270dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 127166ed3db0SBarry Smith { 127266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 127366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1274d2840e09SBarry Smith const PetscScalar *x,*v; 1275d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1276dfbe8321SBarry Smith PetscErrorCode ierr; 1277d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1278d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 127966ed3db0SBarry Smith 128066ed3db0SBarry Smith PetscFunctionBegin; 12813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12821ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 128366ed3db0SBarry Smith idx = a->j; 128466ed3db0SBarry Smith v = a->a; 128566ed3db0SBarry Smith ii = a->i; 128666ed3db0SBarry Smith 128766ed3db0SBarry Smith for (i=0; i<m; i++) { 128866ed3db0SBarry Smith jrow = ii[i]; 128966ed3db0SBarry Smith n = ii[i+1] - jrow; 129066ed3db0SBarry Smith sum1 = 0.0; 129166ed3db0SBarry Smith sum2 = 0.0; 129266ed3db0SBarry Smith sum3 = 0.0; 129366ed3db0SBarry Smith sum4 = 0.0; 129466ed3db0SBarry Smith sum5 = 0.0; 129566ed3db0SBarry Smith sum6 = 0.0; 129666ed3db0SBarry Smith sum7 = 0.0; 129766ed3db0SBarry Smith sum8 = 0.0; 129826fbe8dcSKarl Rupp 1299b3c51c66SMatthew Knepley nonzerorow += (n>0); 130066ed3db0SBarry Smith for (j=0; j<n; j++) { 130166ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 130266ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 130366ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 130466ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 130566ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 130666ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 130766ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 130866ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 130966ed3db0SBarry Smith jrow++; 131066ed3db0SBarry Smith } 131166ed3db0SBarry Smith y[8*i] = sum1; 131266ed3db0SBarry Smith y[8*i+1] = sum2; 131366ed3db0SBarry Smith y[8*i+2] = sum3; 131466ed3db0SBarry Smith y[8*i+3] = sum4; 131566ed3db0SBarry Smith y[8*i+4] = sum5; 131666ed3db0SBarry Smith y[8*i+5] = sum6; 131766ed3db0SBarry Smith y[8*i+6] = sum7; 131866ed3db0SBarry Smith y[8*i+7] = sum8; 131966ed3db0SBarry Smith } 132066ed3db0SBarry Smith 1321dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 13223649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13231ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 132466ed3db0SBarry Smith PetscFunctionReturn(0); 132566ed3db0SBarry Smith } 132666ed3db0SBarry Smith 132766ed3db0SBarry Smith #undef __FUNCT__ 132866ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1329dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 133066ed3db0SBarry Smith { 133166ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 133266ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1333d2840e09SBarry Smith const PetscScalar *x,*v; 1334d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1335dfbe8321SBarry Smith PetscErrorCode ierr; 1336d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1337d2840e09SBarry Smith PetscInt n,i; 133866ed3db0SBarry Smith 133966ed3db0SBarry Smith PetscFunctionBegin; 1340d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 13413649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13421ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1343bfec09a0SHong Zhang 134466ed3db0SBarry Smith for (i=0; i<m; i++) { 1345bfec09a0SHong Zhang idx = a->j + a->i[i]; 1346bfec09a0SHong Zhang v = a->a + a->i[i]; 134766ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 134866ed3db0SBarry Smith alpha1 = x[8*i]; 134966ed3db0SBarry Smith alpha2 = x[8*i+1]; 135066ed3db0SBarry Smith alpha3 = x[8*i+2]; 135166ed3db0SBarry Smith alpha4 = x[8*i+3]; 135266ed3db0SBarry Smith alpha5 = x[8*i+4]; 135366ed3db0SBarry Smith alpha6 = x[8*i+5]; 135466ed3db0SBarry Smith alpha7 = x[8*i+6]; 135566ed3db0SBarry Smith alpha8 = x[8*i+7]; 135666ed3db0SBarry Smith while (n-->0) { 135766ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 135866ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 135966ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 136066ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 136166ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 136266ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 136366ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 136466ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 136566ed3db0SBarry Smith idx++; v++; 136666ed3db0SBarry Smith } 136766ed3db0SBarry Smith } 1368dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13693649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13701ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 137166ed3db0SBarry Smith PetscFunctionReturn(0); 137266ed3db0SBarry Smith } 137366ed3db0SBarry Smith 137466ed3db0SBarry Smith #undef __FUNCT__ 137566ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1376dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 137766ed3db0SBarry Smith { 137866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 137966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1380d2840e09SBarry Smith const PetscScalar *x,*v; 1381d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1382dfbe8321SBarry Smith PetscErrorCode ierr; 1383d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1384b24ad042SBarry Smith PetscInt n,i,jrow,j; 138566ed3db0SBarry Smith 138666ed3db0SBarry Smith PetscFunctionBegin; 138766ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13883649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13891ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 139066ed3db0SBarry Smith idx = a->j; 139166ed3db0SBarry Smith v = a->a; 139266ed3db0SBarry Smith ii = a->i; 139366ed3db0SBarry Smith 139466ed3db0SBarry Smith for (i=0; i<m; i++) { 139566ed3db0SBarry Smith jrow = ii[i]; 139666ed3db0SBarry Smith n = ii[i+1] - jrow; 139766ed3db0SBarry Smith sum1 = 0.0; 139866ed3db0SBarry Smith sum2 = 0.0; 139966ed3db0SBarry Smith sum3 = 0.0; 140066ed3db0SBarry Smith sum4 = 0.0; 140166ed3db0SBarry Smith sum5 = 0.0; 140266ed3db0SBarry Smith sum6 = 0.0; 140366ed3db0SBarry Smith sum7 = 0.0; 140466ed3db0SBarry Smith sum8 = 0.0; 140566ed3db0SBarry Smith for (j=0; j<n; j++) { 140666ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 140766ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 140866ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 140966ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 141066ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 141166ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 141266ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 141366ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 141466ed3db0SBarry Smith jrow++; 141566ed3db0SBarry Smith } 141666ed3db0SBarry Smith y[8*i] += sum1; 141766ed3db0SBarry Smith y[8*i+1] += sum2; 141866ed3db0SBarry Smith y[8*i+2] += sum3; 141966ed3db0SBarry Smith y[8*i+3] += sum4; 142066ed3db0SBarry Smith y[8*i+4] += sum5; 142166ed3db0SBarry Smith y[8*i+5] += sum6; 142266ed3db0SBarry Smith y[8*i+6] += sum7; 142366ed3db0SBarry Smith y[8*i+7] += sum8; 142466ed3db0SBarry Smith } 142566ed3db0SBarry Smith 1426dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 14273649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14281ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 142966ed3db0SBarry Smith PetscFunctionReturn(0); 143066ed3db0SBarry Smith } 143166ed3db0SBarry Smith 143266ed3db0SBarry Smith #undef __FUNCT__ 143366ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1434dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 143566ed3db0SBarry Smith { 143666ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 143766ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1438d2840e09SBarry Smith const PetscScalar *x,*v; 1439d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1440dfbe8321SBarry Smith PetscErrorCode ierr; 1441d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1442d2840e09SBarry Smith PetscInt n,i; 144366ed3db0SBarry Smith 144466ed3db0SBarry Smith PetscFunctionBegin; 144566ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 14463649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14471ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 144866ed3db0SBarry Smith for (i=0; i<m; i++) { 1449bfec09a0SHong Zhang idx = a->j + a->i[i]; 1450bfec09a0SHong Zhang v = a->a + a->i[i]; 145166ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 145266ed3db0SBarry Smith alpha1 = x[8*i]; 145366ed3db0SBarry Smith alpha2 = x[8*i+1]; 145466ed3db0SBarry Smith alpha3 = x[8*i+2]; 145566ed3db0SBarry Smith alpha4 = x[8*i+3]; 145666ed3db0SBarry Smith alpha5 = x[8*i+4]; 145766ed3db0SBarry Smith alpha6 = x[8*i+5]; 145866ed3db0SBarry Smith alpha7 = x[8*i+6]; 145966ed3db0SBarry Smith alpha8 = x[8*i+7]; 146066ed3db0SBarry Smith while (n-->0) { 146166ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 146266ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 146366ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 146466ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 146566ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 146666ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 146766ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 146866ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 146966ed3db0SBarry Smith idx++; v++; 147066ed3db0SBarry Smith } 147166ed3db0SBarry Smith } 1472dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 14733649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14741ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 14752f7816d4SBarry Smith PetscFunctionReturn(0); 14762f7816d4SBarry Smith } 14772f7816d4SBarry Smith 14782b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 14792b692628SMatthew Knepley #undef __FUNCT__ 14802b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1481dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14822b692628SMatthew Knepley { 14832b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14842b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1485d2840e09SBarry Smith const PetscScalar *x,*v; 1486d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1487dfbe8321SBarry Smith PetscErrorCode ierr; 1488d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1489d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 14902b692628SMatthew Knepley 14912b692628SMatthew Knepley PetscFunctionBegin; 14923649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14931ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14942b692628SMatthew Knepley idx = a->j; 14952b692628SMatthew Knepley v = a->a; 14962b692628SMatthew Knepley ii = a->i; 14972b692628SMatthew Knepley 14982b692628SMatthew Knepley for (i=0; i<m; i++) { 14992b692628SMatthew Knepley jrow = ii[i]; 15002b692628SMatthew Knepley n = ii[i+1] - jrow; 15012b692628SMatthew Knepley sum1 = 0.0; 15022b692628SMatthew Knepley sum2 = 0.0; 15032b692628SMatthew Knepley sum3 = 0.0; 15042b692628SMatthew Knepley sum4 = 0.0; 15052b692628SMatthew Knepley sum5 = 0.0; 15062b692628SMatthew Knepley sum6 = 0.0; 15072b692628SMatthew Knepley sum7 = 0.0; 15082b692628SMatthew Knepley sum8 = 0.0; 15092b692628SMatthew Knepley sum9 = 0.0; 151026fbe8dcSKarl Rupp 1511b3c51c66SMatthew Knepley nonzerorow += (n>0); 15122b692628SMatthew Knepley for (j=0; j<n; j++) { 15132b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15142b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15152b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15162b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15172b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15182b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15192b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15202b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15212b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15222b692628SMatthew Knepley jrow++; 15232b692628SMatthew Knepley } 15242b692628SMatthew Knepley y[9*i] = sum1; 15252b692628SMatthew Knepley y[9*i+1] = sum2; 15262b692628SMatthew Knepley y[9*i+2] = sum3; 15272b692628SMatthew Knepley y[9*i+3] = sum4; 15282b692628SMatthew Knepley y[9*i+4] = sum5; 15292b692628SMatthew Knepley y[9*i+5] = sum6; 15302b692628SMatthew Knepley y[9*i+6] = sum7; 15312b692628SMatthew Knepley y[9*i+7] = sum8; 15322b692628SMatthew Knepley y[9*i+8] = sum9; 15332b692628SMatthew Knepley } 15342b692628SMatthew Knepley 1535dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 15363649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15371ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15382b692628SMatthew Knepley PetscFunctionReturn(0); 15392b692628SMatthew Knepley } 15402b692628SMatthew Knepley 1541b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1542b9cda22cSBarry Smith 15432b692628SMatthew Knepley #undef __FUNCT__ 15442b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1545dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 15462b692628SMatthew Knepley { 15472b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15482b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1549d2840e09SBarry Smith const PetscScalar *x,*v; 1550d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1551dfbe8321SBarry Smith PetscErrorCode ierr; 1552d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1553d2840e09SBarry Smith PetscInt n,i; 15542b692628SMatthew Knepley 15552b692628SMatthew Knepley PetscFunctionBegin; 1556d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 15573649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15581ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 15592b692628SMatthew Knepley 15602b692628SMatthew Knepley for (i=0; i<m; i++) { 15612b692628SMatthew Knepley idx = a->j + a->i[i]; 15622b692628SMatthew Knepley v = a->a + a->i[i]; 15632b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15642b692628SMatthew Knepley alpha1 = x[9*i]; 15652b692628SMatthew Knepley alpha2 = x[9*i+1]; 15662b692628SMatthew Knepley alpha3 = x[9*i+2]; 15672b692628SMatthew Knepley alpha4 = x[9*i+3]; 15682b692628SMatthew Knepley alpha5 = x[9*i+4]; 15692b692628SMatthew Knepley alpha6 = x[9*i+5]; 15702b692628SMatthew Knepley alpha7 = x[9*i+6]; 15712b692628SMatthew Knepley alpha8 = x[9*i+7]; 15722f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 15732b692628SMatthew Knepley while (n-->0) { 15742b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15752b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15762b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15772b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15782b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15792b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15802b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15812b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15822b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15832b692628SMatthew Knepley idx++; v++; 15842b692628SMatthew Knepley } 15852b692628SMatthew Knepley } 1586dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 15873649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15881ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15892b692628SMatthew Knepley PetscFunctionReturn(0); 15902b692628SMatthew Knepley } 15912b692628SMatthew Knepley 15922b692628SMatthew Knepley #undef __FUNCT__ 15932b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1594dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15952b692628SMatthew Knepley { 15962b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15972b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1598d2840e09SBarry Smith const PetscScalar *x,*v; 1599d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1600dfbe8321SBarry Smith PetscErrorCode ierr; 1601d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1602b24ad042SBarry Smith PetscInt n,i,jrow,j; 16032b692628SMatthew Knepley 16042b692628SMatthew Knepley PetscFunctionBegin; 16052b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16063649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 16071ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 16082b692628SMatthew Knepley idx = a->j; 16092b692628SMatthew Knepley v = a->a; 16102b692628SMatthew Knepley ii = a->i; 16112b692628SMatthew Knepley 16122b692628SMatthew Knepley for (i=0; i<m; i++) { 16132b692628SMatthew Knepley jrow = ii[i]; 16142b692628SMatthew Knepley n = ii[i+1] - jrow; 16152b692628SMatthew Knepley sum1 = 0.0; 16162b692628SMatthew Knepley sum2 = 0.0; 16172b692628SMatthew Knepley sum3 = 0.0; 16182b692628SMatthew Knepley sum4 = 0.0; 16192b692628SMatthew Knepley sum5 = 0.0; 16202b692628SMatthew Knepley sum6 = 0.0; 16212b692628SMatthew Knepley sum7 = 0.0; 16222b692628SMatthew Knepley sum8 = 0.0; 16232b692628SMatthew Knepley sum9 = 0.0; 16242b692628SMatthew Knepley for (j=0; j<n; j++) { 16252b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 16262b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 16272b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 16282b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 16292b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 16302b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 16312b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 16322b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 16332b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 16342b692628SMatthew Knepley jrow++; 16352b692628SMatthew Knepley } 16362b692628SMatthew Knepley y[9*i] += sum1; 16372b692628SMatthew Knepley y[9*i+1] += sum2; 16382b692628SMatthew Knepley y[9*i+2] += sum3; 16392b692628SMatthew Knepley y[9*i+3] += sum4; 16402b692628SMatthew Knepley y[9*i+4] += sum5; 16412b692628SMatthew Knepley y[9*i+5] += sum6; 16422b692628SMatthew Knepley y[9*i+6] += sum7; 16432b692628SMatthew Knepley y[9*i+7] += sum8; 16442b692628SMatthew Knepley y[9*i+8] += sum9; 16452b692628SMatthew Knepley } 16462b692628SMatthew Knepley 1647efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 16483649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16491ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16502b692628SMatthew Knepley PetscFunctionReturn(0); 16512b692628SMatthew Knepley } 16522b692628SMatthew Knepley 16532b692628SMatthew Knepley #undef __FUNCT__ 16542b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1655dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 16562b692628SMatthew Knepley { 16572b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 16582b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1659d2840e09SBarry Smith const PetscScalar *x,*v; 1660d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1661dfbe8321SBarry Smith PetscErrorCode ierr; 1662d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1663d2840e09SBarry Smith PetscInt n,i; 16642b692628SMatthew Knepley 16652b692628SMatthew Knepley PetscFunctionBegin; 16662b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16673649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 16681ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 16692b692628SMatthew Knepley for (i=0; i<m; i++) { 16702b692628SMatthew Knepley idx = a->j + a->i[i]; 16712b692628SMatthew Knepley v = a->a + a->i[i]; 16722b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 16732b692628SMatthew Knepley alpha1 = x[9*i]; 16742b692628SMatthew Knepley alpha2 = x[9*i+1]; 16752b692628SMatthew Knepley alpha3 = x[9*i+2]; 16762b692628SMatthew Knepley alpha4 = x[9*i+3]; 16772b692628SMatthew Knepley alpha5 = x[9*i+4]; 16782b692628SMatthew Knepley alpha6 = x[9*i+5]; 16792b692628SMatthew Knepley alpha7 = x[9*i+6]; 16802b692628SMatthew Knepley alpha8 = x[9*i+7]; 16812b692628SMatthew Knepley alpha9 = x[9*i+8]; 16822b692628SMatthew Knepley while (n-->0) { 16832b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 16842b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 16852b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 16862b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 16872b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 16882b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 16892b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 16902b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 16912b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 16922b692628SMatthew Knepley idx++; v++; 16932b692628SMatthew Knepley } 16942b692628SMatthew Knepley } 1695dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 16963649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16971ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16982b692628SMatthew Knepley PetscFunctionReturn(0); 16992b692628SMatthew Knepley } 1700b9cda22cSBarry Smith #undef __FUNCT__ 1701b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1702b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1703b9cda22cSBarry Smith { 1704b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1705b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1706d2840e09SBarry Smith const PetscScalar *x,*v; 1707d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1708b9cda22cSBarry Smith PetscErrorCode ierr; 1709d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1710d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1711b9cda22cSBarry Smith 1712b9cda22cSBarry Smith PetscFunctionBegin; 17133649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1714b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1715b9cda22cSBarry Smith idx = a->j; 1716b9cda22cSBarry Smith v = a->a; 1717b9cda22cSBarry Smith ii = a->i; 1718b9cda22cSBarry Smith 1719b9cda22cSBarry Smith for (i=0; i<m; i++) { 1720b9cda22cSBarry Smith jrow = ii[i]; 1721b9cda22cSBarry Smith n = ii[i+1] - jrow; 1722b9cda22cSBarry Smith sum1 = 0.0; 1723b9cda22cSBarry Smith sum2 = 0.0; 1724b9cda22cSBarry Smith sum3 = 0.0; 1725b9cda22cSBarry Smith sum4 = 0.0; 1726b9cda22cSBarry Smith sum5 = 0.0; 1727b9cda22cSBarry Smith sum6 = 0.0; 1728b9cda22cSBarry Smith sum7 = 0.0; 1729b9cda22cSBarry Smith sum8 = 0.0; 1730b9cda22cSBarry Smith sum9 = 0.0; 1731b9cda22cSBarry Smith sum10 = 0.0; 173226fbe8dcSKarl Rupp 1733b3c51c66SMatthew Knepley nonzerorow += (n>0); 1734b9cda22cSBarry Smith for (j=0; j<n; j++) { 1735b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1736b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1737b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1738b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1739b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1740b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1741b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1742b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1743b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1744b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1745b9cda22cSBarry Smith jrow++; 1746b9cda22cSBarry Smith } 1747b9cda22cSBarry Smith y[10*i] = sum1; 1748b9cda22cSBarry Smith y[10*i+1] = sum2; 1749b9cda22cSBarry Smith y[10*i+2] = sum3; 1750b9cda22cSBarry Smith y[10*i+3] = sum4; 1751b9cda22cSBarry Smith y[10*i+4] = sum5; 1752b9cda22cSBarry Smith y[10*i+5] = sum6; 1753b9cda22cSBarry Smith y[10*i+6] = sum7; 1754b9cda22cSBarry Smith y[10*i+7] = sum8; 1755b9cda22cSBarry Smith y[10*i+8] = sum9; 1756b9cda22cSBarry Smith y[10*i+9] = sum10; 1757b9cda22cSBarry Smith } 1758b9cda22cSBarry Smith 1759dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 17603649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1761b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1762b9cda22cSBarry Smith PetscFunctionReturn(0); 1763b9cda22cSBarry Smith } 1764b9cda22cSBarry Smith 1765b9cda22cSBarry Smith #undef __FUNCT__ 1766dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1767b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1768b9cda22cSBarry Smith { 1769b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1770b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1771d2840e09SBarry Smith const PetscScalar *x,*v; 1772d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1773b9cda22cSBarry Smith PetscErrorCode ierr; 1774d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1775b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1776b9cda22cSBarry Smith 1777b9cda22cSBarry Smith PetscFunctionBegin; 1778b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 17793649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1780b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1781b9cda22cSBarry Smith idx = a->j; 1782b9cda22cSBarry Smith v = a->a; 1783b9cda22cSBarry Smith ii = a->i; 1784b9cda22cSBarry Smith 1785b9cda22cSBarry Smith for (i=0; i<m; i++) { 1786b9cda22cSBarry Smith jrow = ii[i]; 1787b9cda22cSBarry Smith n = ii[i+1] - jrow; 1788b9cda22cSBarry Smith sum1 = 0.0; 1789b9cda22cSBarry Smith sum2 = 0.0; 1790b9cda22cSBarry Smith sum3 = 0.0; 1791b9cda22cSBarry Smith sum4 = 0.0; 1792b9cda22cSBarry Smith sum5 = 0.0; 1793b9cda22cSBarry Smith sum6 = 0.0; 1794b9cda22cSBarry Smith sum7 = 0.0; 1795b9cda22cSBarry Smith sum8 = 0.0; 1796b9cda22cSBarry Smith sum9 = 0.0; 1797b9cda22cSBarry Smith sum10 = 0.0; 1798b9cda22cSBarry Smith for (j=0; j<n; j++) { 1799b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1800b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1801b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1802b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1803b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1804b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1805b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1806b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1807b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1808b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1809b9cda22cSBarry Smith jrow++; 1810b9cda22cSBarry Smith } 1811b9cda22cSBarry Smith y[10*i] += sum1; 1812b9cda22cSBarry Smith y[10*i+1] += sum2; 1813b9cda22cSBarry Smith y[10*i+2] += sum3; 1814b9cda22cSBarry Smith y[10*i+3] += sum4; 1815b9cda22cSBarry Smith y[10*i+4] += sum5; 1816b9cda22cSBarry Smith y[10*i+5] += sum6; 1817b9cda22cSBarry Smith y[10*i+6] += sum7; 1818b9cda22cSBarry Smith y[10*i+7] += sum8; 1819b9cda22cSBarry Smith y[10*i+8] += sum9; 1820b9cda22cSBarry Smith y[10*i+9] += sum10; 1821b9cda22cSBarry Smith } 1822b9cda22cSBarry Smith 1823dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18243649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1825b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1826b9cda22cSBarry Smith PetscFunctionReturn(0); 1827b9cda22cSBarry Smith } 1828b9cda22cSBarry Smith 1829b9cda22cSBarry Smith #undef __FUNCT__ 1830b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1831b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1832b9cda22cSBarry Smith { 1833b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1834b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1835d2840e09SBarry Smith const PetscScalar *x,*v; 1836d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1837b9cda22cSBarry Smith PetscErrorCode ierr; 1838d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1839d2840e09SBarry Smith PetscInt n,i; 1840b9cda22cSBarry Smith 1841b9cda22cSBarry Smith PetscFunctionBegin; 1842d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 18433649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1844b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1845b9cda22cSBarry Smith 1846b9cda22cSBarry Smith for (i=0; i<m; i++) { 1847b9cda22cSBarry Smith idx = a->j + a->i[i]; 1848b9cda22cSBarry Smith v = a->a + a->i[i]; 1849b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1850e29fdc22SBarry Smith alpha1 = x[10*i]; 1851e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1852e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1853e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1854e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1855e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1856e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1857e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1858e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1859b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1860b9cda22cSBarry Smith while (n-->0) { 1861e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1862e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1863e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1864e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1865e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1866e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1867e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1868e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1869e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1870b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1871b9cda22cSBarry Smith idx++; v++; 1872b9cda22cSBarry Smith } 1873b9cda22cSBarry Smith } 1874dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18753649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1876b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1877b9cda22cSBarry Smith PetscFunctionReturn(0); 1878b9cda22cSBarry Smith } 1879b9cda22cSBarry Smith 1880b9cda22cSBarry Smith #undef __FUNCT__ 1881b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1882b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1883b9cda22cSBarry Smith { 1884b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1885b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1886d2840e09SBarry Smith const PetscScalar *x,*v; 1887d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1888b9cda22cSBarry Smith PetscErrorCode ierr; 1889d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1890d2840e09SBarry Smith PetscInt n,i; 1891b9cda22cSBarry Smith 1892b9cda22cSBarry Smith PetscFunctionBegin; 1893b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 18943649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1895b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1896b9cda22cSBarry Smith for (i=0; i<m; i++) { 1897b9cda22cSBarry Smith idx = a->j + a->i[i]; 1898b9cda22cSBarry Smith v = a->a + a->i[i]; 1899b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1900b9cda22cSBarry Smith alpha1 = x[10*i]; 1901b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1902b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1903b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1904b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1905b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1906b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1907b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1908b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1909b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1910b9cda22cSBarry Smith while (n-->0) { 1911b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1912b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1913b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1914b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1915b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1916b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1917b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1918b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1919b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1920b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1921b9cda22cSBarry Smith idx++; v++; 1922b9cda22cSBarry Smith } 1923b9cda22cSBarry Smith } 1924dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 19253649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1926b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1927b9cda22cSBarry Smith PetscFunctionReturn(0); 1928b9cda22cSBarry Smith } 1929b9cda22cSBarry Smith 19302b692628SMatthew Knepley 19312f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 19322f7816d4SBarry Smith #undef __FUNCT__ 1933dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11" 1934dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1935dbdd7285SBarry Smith { 1936dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1937dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1938d2840e09SBarry Smith const PetscScalar *x,*v; 1939d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1940dbdd7285SBarry Smith PetscErrorCode ierr; 1941d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1942d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1943dbdd7285SBarry Smith 1944dbdd7285SBarry Smith PetscFunctionBegin; 19453649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1946dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1947dbdd7285SBarry Smith idx = a->j; 1948dbdd7285SBarry Smith v = a->a; 1949dbdd7285SBarry Smith ii = a->i; 1950dbdd7285SBarry Smith 1951dbdd7285SBarry Smith for (i=0; i<m; i++) { 1952dbdd7285SBarry Smith jrow = ii[i]; 1953dbdd7285SBarry Smith n = ii[i+1] - jrow; 1954dbdd7285SBarry Smith sum1 = 0.0; 1955dbdd7285SBarry Smith sum2 = 0.0; 1956dbdd7285SBarry Smith sum3 = 0.0; 1957dbdd7285SBarry Smith sum4 = 0.0; 1958dbdd7285SBarry Smith sum5 = 0.0; 1959dbdd7285SBarry Smith sum6 = 0.0; 1960dbdd7285SBarry Smith sum7 = 0.0; 1961dbdd7285SBarry Smith sum8 = 0.0; 1962dbdd7285SBarry Smith sum9 = 0.0; 1963dbdd7285SBarry Smith sum10 = 0.0; 1964dbdd7285SBarry Smith sum11 = 0.0; 196526fbe8dcSKarl Rupp 1966dbdd7285SBarry Smith nonzerorow += (n>0); 1967dbdd7285SBarry Smith for (j=0; j<n; j++) { 1968dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1969dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1970dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1971dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1972dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1973dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1974dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1975dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1976dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1977dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1978dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1979dbdd7285SBarry Smith jrow++; 1980dbdd7285SBarry Smith } 1981dbdd7285SBarry Smith y[11*i] = sum1; 1982dbdd7285SBarry Smith y[11*i+1] = sum2; 1983dbdd7285SBarry Smith y[11*i+2] = sum3; 1984dbdd7285SBarry Smith y[11*i+3] = sum4; 1985dbdd7285SBarry Smith y[11*i+4] = sum5; 1986dbdd7285SBarry Smith y[11*i+5] = sum6; 1987dbdd7285SBarry Smith y[11*i+6] = sum7; 1988dbdd7285SBarry Smith y[11*i+7] = sum8; 1989dbdd7285SBarry Smith y[11*i+8] = sum9; 1990dbdd7285SBarry Smith y[11*i+9] = sum10; 1991dbdd7285SBarry Smith y[11*i+10] = sum11; 1992dbdd7285SBarry Smith } 1993dbdd7285SBarry Smith 1994dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 19953649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1996dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1997dbdd7285SBarry Smith PetscFunctionReturn(0); 1998dbdd7285SBarry Smith } 1999dbdd7285SBarry Smith 2000dbdd7285SBarry Smith #undef __FUNCT__ 2001dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 2002dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2003dbdd7285SBarry Smith { 2004dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2005dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2006d2840e09SBarry Smith const PetscScalar *x,*v; 2007d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 2008dbdd7285SBarry Smith PetscErrorCode ierr; 2009d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2010dbdd7285SBarry Smith PetscInt n,i,jrow,j; 2011dbdd7285SBarry Smith 2012dbdd7285SBarry Smith PetscFunctionBegin; 2013dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20143649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2015dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2016dbdd7285SBarry Smith idx = a->j; 2017dbdd7285SBarry Smith v = a->a; 2018dbdd7285SBarry Smith ii = a->i; 2019dbdd7285SBarry Smith 2020dbdd7285SBarry Smith for (i=0; i<m; i++) { 2021dbdd7285SBarry Smith jrow = ii[i]; 2022dbdd7285SBarry Smith n = ii[i+1] - jrow; 2023dbdd7285SBarry Smith sum1 = 0.0; 2024dbdd7285SBarry Smith sum2 = 0.0; 2025dbdd7285SBarry Smith sum3 = 0.0; 2026dbdd7285SBarry Smith sum4 = 0.0; 2027dbdd7285SBarry Smith sum5 = 0.0; 2028dbdd7285SBarry Smith sum6 = 0.0; 2029dbdd7285SBarry Smith sum7 = 0.0; 2030dbdd7285SBarry Smith sum8 = 0.0; 2031dbdd7285SBarry Smith sum9 = 0.0; 2032dbdd7285SBarry Smith sum10 = 0.0; 2033dbdd7285SBarry Smith sum11 = 0.0; 2034dbdd7285SBarry Smith for (j=0; j<n; j++) { 2035dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 2036dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 2037dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 2038dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 2039dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 2040dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 2041dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 2042dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 2043dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 2044dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 2045dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 2046dbdd7285SBarry Smith jrow++; 2047dbdd7285SBarry Smith } 2048dbdd7285SBarry Smith y[11*i] += sum1; 2049dbdd7285SBarry Smith y[11*i+1] += sum2; 2050dbdd7285SBarry Smith y[11*i+2] += sum3; 2051dbdd7285SBarry Smith y[11*i+3] += sum4; 2052dbdd7285SBarry Smith y[11*i+4] += sum5; 2053dbdd7285SBarry Smith y[11*i+5] += sum6; 2054dbdd7285SBarry Smith y[11*i+6] += sum7; 2055dbdd7285SBarry Smith y[11*i+7] += sum8; 2056dbdd7285SBarry Smith y[11*i+8] += sum9; 2057dbdd7285SBarry Smith y[11*i+9] += sum10; 2058dbdd7285SBarry Smith y[11*i+10] += sum11; 2059dbdd7285SBarry Smith } 2060dbdd7285SBarry Smith 2061dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20623649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2063dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2064dbdd7285SBarry Smith PetscFunctionReturn(0); 2065dbdd7285SBarry Smith } 2066dbdd7285SBarry Smith 2067dbdd7285SBarry Smith #undef __FUNCT__ 2068dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2069dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2070dbdd7285SBarry Smith { 2071dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2072dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2073d2840e09SBarry Smith const PetscScalar *x,*v; 2074d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2075dbdd7285SBarry Smith PetscErrorCode ierr; 2076d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2077d2840e09SBarry Smith PetscInt n,i; 2078dbdd7285SBarry Smith 2079dbdd7285SBarry Smith PetscFunctionBegin; 2080d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 20813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2082dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2083dbdd7285SBarry Smith 2084dbdd7285SBarry Smith for (i=0; i<m; i++) { 2085dbdd7285SBarry Smith idx = a->j + a->i[i]; 2086dbdd7285SBarry Smith v = a->a + a->i[i]; 2087dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2088dbdd7285SBarry Smith alpha1 = x[11*i]; 2089dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2090dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2091dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2092dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2093dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2094dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2095dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2096dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2097dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2098dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2099dbdd7285SBarry Smith while (n-->0) { 2100dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2101dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2102dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2103dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2104dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2105dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2106dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2107dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2108dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2109dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2110dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2111dbdd7285SBarry Smith idx++; v++; 2112dbdd7285SBarry Smith } 2113dbdd7285SBarry Smith } 2114dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 21153649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2116dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2117dbdd7285SBarry Smith PetscFunctionReturn(0); 2118dbdd7285SBarry Smith } 2119dbdd7285SBarry Smith 2120dbdd7285SBarry Smith #undef __FUNCT__ 2121dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2122dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2123dbdd7285SBarry Smith { 2124dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2125dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2126d2840e09SBarry Smith const PetscScalar *x,*v; 2127d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2128dbdd7285SBarry Smith PetscErrorCode ierr; 2129d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2130d2840e09SBarry Smith PetscInt n,i; 2131dbdd7285SBarry Smith 2132dbdd7285SBarry Smith PetscFunctionBegin; 2133dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 21343649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2135dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2136dbdd7285SBarry Smith for (i=0; i<m; i++) { 2137dbdd7285SBarry Smith idx = a->j + a->i[i]; 2138dbdd7285SBarry Smith v = a->a + a->i[i]; 2139dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2140dbdd7285SBarry Smith alpha1 = x[11*i]; 2141dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2142dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2143dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2144dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2145dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2146dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2147dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2148dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2149dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2150dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2151dbdd7285SBarry Smith while (n-->0) { 2152dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2153dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2154dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2155dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2156dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2157dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2158dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2159dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2160dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2161dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2162dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2163dbdd7285SBarry Smith idx++; v++; 2164dbdd7285SBarry Smith } 2165dbdd7285SBarry Smith } 2166dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 21673649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2168dbdd7285SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2169dbdd7285SBarry Smith PetscFunctionReturn(0); 2170dbdd7285SBarry Smith } 2171dbdd7285SBarry Smith 2172dbdd7285SBarry Smith 2173dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2174dbdd7285SBarry Smith #undef __FUNCT__ 21752f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 2176dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 21772f7816d4SBarry Smith { 21782f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21792f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2180d2840e09SBarry Smith const PetscScalar *x,*v; 2181d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21822f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2183dfbe8321SBarry Smith PetscErrorCode ierr; 2184d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2185d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 21862f7816d4SBarry Smith 21872f7816d4SBarry Smith PetscFunctionBegin; 21883649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 21891ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 21902f7816d4SBarry Smith idx = a->j; 21912f7816d4SBarry Smith v = a->a; 21922f7816d4SBarry Smith ii = a->i; 21932f7816d4SBarry Smith 21942f7816d4SBarry Smith for (i=0; i<m; i++) { 21952f7816d4SBarry Smith jrow = ii[i]; 21962f7816d4SBarry Smith n = ii[i+1] - jrow; 21972f7816d4SBarry Smith sum1 = 0.0; 21982f7816d4SBarry Smith sum2 = 0.0; 21992f7816d4SBarry Smith sum3 = 0.0; 22002f7816d4SBarry Smith sum4 = 0.0; 22012f7816d4SBarry Smith sum5 = 0.0; 22022f7816d4SBarry Smith sum6 = 0.0; 22032f7816d4SBarry Smith sum7 = 0.0; 22042f7816d4SBarry Smith sum8 = 0.0; 22052f7816d4SBarry Smith sum9 = 0.0; 22062f7816d4SBarry Smith sum10 = 0.0; 22072f7816d4SBarry Smith sum11 = 0.0; 22082f7816d4SBarry Smith sum12 = 0.0; 22092f7816d4SBarry Smith sum13 = 0.0; 22102f7816d4SBarry Smith sum14 = 0.0; 22112f7816d4SBarry Smith sum15 = 0.0; 22122f7816d4SBarry Smith sum16 = 0.0; 221326fbe8dcSKarl Rupp 2214b3c51c66SMatthew Knepley nonzerorow += (n>0); 22152f7816d4SBarry Smith for (j=0; j<n; j++) { 22162f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 22172f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 22182f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 22192f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 22202f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 22212f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 22222f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 22232f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 22242f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 22252f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 22262f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 22272f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 22282f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 22292f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 22302f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 22312f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 22322f7816d4SBarry Smith jrow++; 22332f7816d4SBarry Smith } 22342f7816d4SBarry Smith y[16*i] = sum1; 22352f7816d4SBarry Smith y[16*i+1] = sum2; 22362f7816d4SBarry Smith y[16*i+2] = sum3; 22372f7816d4SBarry Smith y[16*i+3] = sum4; 22382f7816d4SBarry Smith y[16*i+4] = sum5; 22392f7816d4SBarry Smith y[16*i+5] = sum6; 22402f7816d4SBarry Smith y[16*i+6] = sum7; 22412f7816d4SBarry Smith y[16*i+7] = sum8; 22422f7816d4SBarry Smith y[16*i+8] = sum9; 22432f7816d4SBarry Smith y[16*i+9] = sum10; 22442f7816d4SBarry Smith y[16*i+10] = sum11; 22452f7816d4SBarry Smith y[16*i+11] = sum12; 22462f7816d4SBarry Smith y[16*i+12] = sum13; 22472f7816d4SBarry Smith y[16*i+13] = sum14; 22482f7816d4SBarry Smith y[16*i+14] = sum15; 22492f7816d4SBarry Smith y[16*i+15] = sum16; 22502f7816d4SBarry Smith } 22512f7816d4SBarry Smith 2252dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 22533649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22541ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 22552f7816d4SBarry Smith PetscFunctionReturn(0); 22562f7816d4SBarry Smith } 22572f7816d4SBarry Smith 22582f7816d4SBarry Smith #undef __FUNCT__ 22592f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2260dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 22612f7816d4SBarry Smith { 22622f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22632f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2264d2840e09SBarry Smith const PetscScalar *x,*v; 2265d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 22662f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2267dfbe8321SBarry Smith PetscErrorCode ierr; 2268d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2269d2840e09SBarry Smith PetscInt n,i; 22702f7816d4SBarry Smith 22712f7816d4SBarry Smith PetscFunctionBegin; 2272d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 22733649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 22741ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2275bfec09a0SHong Zhang 22762f7816d4SBarry Smith for (i=0; i<m; i++) { 2277bfec09a0SHong Zhang idx = a->j + a->i[i]; 2278bfec09a0SHong Zhang v = a->a + a->i[i]; 22792f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 22802f7816d4SBarry Smith alpha1 = x[16*i]; 22812f7816d4SBarry Smith alpha2 = x[16*i+1]; 22822f7816d4SBarry Smith alpha3 = x[16*i+2]; 22832f7816d4SBarry Smith alpha4 = x[16*i+3]; 22842f7816d4SBarry Smith alpha5 = x[16*i+4]; 22852f7816d4SBarry Smith alpha6 = x[16*i+5]; 22862f7816d4SBarry Smith alpha7 = x[16*i+6]; 22872f7816d4SBarry Smith alpha8 = x[16*i+7]; 22882f7816d4SBarry Smith alpha9 = x[16*i+8]; 22892f7816d4SBarry Smith alpha10 = x[16*i+9]; 22902f7816d4SBarry Smith alpha11 = x[16*i+10]; 22912f7816d4SBarry Smith alpha12 = x[16*i+11]; 22922f7816d4SBarry Smith alpha13 = x[16*i+12]; 22932f7816d4SBarry Smith alpha14 = x[16*i+13]; 22942f7816d4SBarry Smith alpha15 = x[16*i+14]; 22952f7816d4SBarry Smith alpha16 = x[16*i+15]; 22962f7816d4SBarry Smith while (n-->0) { 22972f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 22982f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 22992f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 23002f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 23012f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 23022f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 23032f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 23042f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 23052f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 23062f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 23072f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 23082f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 23092f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 23102f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 23112f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 23122f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 23132f7816d4SBarry Smith idx++; v++; 23142f7816d4SBarry Smith } 23152f7816d4SBarry Smith } 2316dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 23173649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 23181ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 23192f7816d4SBarry Smith PetscFunctionReturn(0); 23202f7816d4SBarry Smith } 23212f7816d4SBarry Smith 23222f7816d4SBarry Smith #undef __FUNCT__ 23232f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2324dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 23252f7816d4SBarry Smith { 23262f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 23272f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2328d2840e09SBarry Smith const PetscScalar *x,*v; 2329d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 23302f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2331dfbe8321SBarry Smith PetscErrorCode ierr; 2332d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2333b24ad042SBarry Smith PetscInt n,i,jrow,j; 23342f7816d4SBarry Smith 23352f7816d4SBarry Smith PetscFunctionBegin; 23362f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 23373649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 23381ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 23392f7816d4SBarry Smith idx = a->j; 23402f7816d4SBarry Smith v = a->a; 23412f7816d4SBarry Smith ii = a->i; 23422f7816d4SBarry Smith 23432f7816d4SBarry Smith for (i=0; i<m; i++) { 23442f7816d4SBarry Smith jrow = ii[i]; 23452f7816d4SBarry Smith n = ii[i+1] - jrow; 23462f7816d4SBarry Smith sum1 = 0.0; 23472f7816d4SBarry Smith sum2 = 0.0; 23482f7816d4SBarry Smith sum3 = 0.0; 23492f7816d4SBarry Smith sum4 = 0.0; 23502f7816d4SBarry Smith sum5 = 0.0; 23512f7816d4SBarry Smith sum6 = 0.0; 23522f7816d4SBarry Smith sum7 = 0.0; 23532f7816d4SBarry Smith sum8 = 0.0; 23542f7816d4SBarry Smith sum9 = 0.0; 23552f7816d4SBarry Smith sum10 = 0.0; 23562f7816d4SBarry Smith sum11 = 0.0; 23572f7816d4SBarry Smith sum12 = 0.0; 23582f7816d4SBarry Smith sum13 = 0.0; 23592f7816d4SBarry Smith sum14 = 0.0; 23602f7816d4SBarry Smith sum15 = 0.0; 23612f7816d4SBarry Smith sum16 = 0.0; 23622f7816d4SBarry Smith for (j=0; j<n; j++) { 23632f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 23642f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 23652f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 23662f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 23672f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 23682f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 23692f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 23702f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 23712f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 23722f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 23732f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 23742f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 23752f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 23762f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 23772f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 23782f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 23792f7816d4SBarry Smith jrow++; 23802f7816d4SBarry Smith } 23812f7816d4SBarry Smith y[16*i] += sum1; 23822f7816d4SBarry Smith y[16*i+1] += sum2; 23832f7816d4SBarry Smith y[16*i+2] += sum3; 23842f7816d4SBarry Smith y[16*i+3] += sum4; 23852f7816d4SBarry Smith y[16*i+4] += sum5; 23862f7816d4SBarry Smith y[16*i+5] += sum6; 23872f7816d4SBarry Smith y[16*i+6] += sum7; 23882f7816d4SBarry Smith y[16*i+7] += sum8; 23892f7816d4SBarry Smith y[16*i+8] += sum9; 23902f7816d4SBarry Smith y[16*i+9] += sum10; 23912f7816d4SBarry Smith y[16*i+10] += sum11; 23922f7816d4SBarry Smith y[16*i+11] += sum12; 23932f7816d4SBarry Smith y[16*i+12] += sum13; 23942f7816d4SBarry Smith y[16*i+13] += sum14; 23952f7816d4SBarry Smith y[16*i+14] += sum15; 23962f7816d4SBarry Smith y[16*i+15] += sum16; 23972f7816d4SBarry Smith } 23982f7816d4SBarry Smith 2399dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 24003649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 24011ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 24022f7816d4SBarry Smith PetscFunctionReturn(0); 24032f7816d4SBarry Smith } 24042f7816d4SBarry Smith 24052f7816d4SBarry Smith #undef __FUNCT__ 24062f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2407dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 24082f7816d4SBarry Smith { 24092f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 24102f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2411d2840e09SBarry Smith const PetscScalar *x,*v; 2412d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 24132f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2414dfbe8321SBarry Smith PetscErrorCode ierr; 2415d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2416d2840e09SBarry Smith PetscInt n,i; 24172f7816d4SBarry Smith 24182f7816d4SBarry Smith PetscFunctionBegin; 24192f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 24203649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 24211ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 24222f7816d4SBarry Smith for (i=0; i<m; i++) { 2423bfec09a0SHong Zhang idx = a->j + a->i[i]; 2424bfec09a0SHong Zhang v = a->a + a->i[i]; 24252f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 24262f7816d4SBarry Smith alpha1 = x[16*i]; 24272f7816d4SBarry Smith alpha2 = x[16*i+1]; 24282f7816d4SBarry Smith alpha3 = x[16*i+2]; 24292f7816d4SBarry Smith alpha4 = x[16*i+3]; 24302f7816d4SBarry Smith alpha5 = x[16*i+4]; 24312f7816d4SBarry Smith alpha6 = x[16*i+5]; 24322f7816d4SBarry Smith alpha7 = x[16*i+6]; 24332f7816d4SBarry Smith alpha8 = x[16*i+7]; 24342f7816d4SBarry Smith alpha9 = x[16*i+8]; 24352f7816d4SBarry Smith alpha10 = x[16*i+9]; 24362f7816d4SBarry Smith alpha11 = x[16*i+10]; 24372f7816d4SBarry Smith alpha12 = x[16*i+11]; 24382f7816d4SBarry Smith alpha13 = x[16*i+12]; 24392f7816d4SBarry Smith alpha14 = x[16*i+13]; 24402f7816d4SBarry Smith alpha15 = x[16*i+14]; 24412f7816d4SBarry Smith alpha16 = x[16*i+15]; 24422f7816d4SBarry Smith while (n-->0) { 24432f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 24442f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 24452f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 24462f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 24472f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 24482f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 24492f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 24502f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 24512f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 24522f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 24532f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 24542f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 24552f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 24562f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 24572f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 24582f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 24592f7816d4SBarry Smith idx++; v++; 24602f7816d4SBarry Smith } 24612f7816d4SBarry Smith } 2462dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 24633649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 24641ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 246566ed3db0SBarry Smith PetscFunctionReturn(0); 246666ed3db0SBarry Smith } 246766ed3db0SBarry Smith 2468ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2469ed1c418cSBarry Smith #undef __FUNCT__ 2470ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18" 2471ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2472ed1c418cSBarry Smith { 2473ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2474ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2475d2840e09SBarry Smith const PetscScalar *x,*v; 2476d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2477ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2478ed1c418cSBarry Smith PetscErrorCode ierr; 2479d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2480d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 2481ed1c418cSBarry Smith 2482ed1c418cSBarry Smith PetscFunctionBegin; 24833649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2484ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2485ed1c418cSBarry Smith idx = a->j; 2486ed1c418cSBarry Smith v = a->a; 2487ed1c418cSBarry Smith ii = a->i; 2488ed1c418cSBarry Smith 2489ed1c418cSBarry Smith for (i=0; i<m; i++) { 2490ed1c418cSBarry Smith jrow = ii[i]; 2491ed1c418cSBarry Smith n = ii[i+1] - jrow; 2492ed1c418cSBarry Smith sum1 = 0.0; 2493ed1c418cSBarry Smith sum2 = 0.0; 2494ed1c418cSBarry Smith sum3 = 0.0; 2495ed1c418cSBarry Smith sum4 = 0.0; 2496ed1c418cSBarry Smith sum5 = 0.0; 2497ed1c418cSBarry Smith sum6 = 0.0; 2498ed1c418cSBarry Smith sum7 = 0.0; 2499ed1c418cSBarry Smith sum8 = 0.0; 2500ed1c418cSBarry Smith sum9 = 0.0; 2501ed1c418cSBarry Smith sum10 = 0.0; 2502ed1c418cSBarry Smith sum11 = 0.0; 2503ed1c418cSBarry Smith sum12 = 0.0; 2504ed1c418cSBarry Smith sum13 = 0.0; 2505ed1c418cSBarry Smith sum14 = 0.0; 2506ed1c418cSBarry Smith sum15 = 0.0; 2507ed1c418cSBarry Smith sum16 = 0.0; 2508ed1c418cSBarry Smith sum17 = 0.0; 2509ed1c418cSBarry Smith sum18 = 0.0; 251026fbe8dcSKarl Rupp 2511ed1c418cSBarry Smith nonzerorow += (n>0); 2512ed1c418cSBarry Smith for (j=0; j<n; j++) { 2513ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2514ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2515ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2516ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2517ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2518ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2519ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2520ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2521ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2522ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2523ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2524ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2525ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2526ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2527ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2528ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2529ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2530ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2531ed1c418cSBarry Smith jrow++; 2532ed1c418cSBarry Smith } 2533ed1c418cSBarry Smith y[18*i] = sum1; 2534ed1c418cSBarry Smith y[18*i+1] = sum2; 2535ed1c418cSBarry Smith y[18*i+2] = sum3; 2536ed1c418cSBarry Smith y[18*i+3] = sum4; 2537ed1c418cSBarry Smith y[18*i+4] = sum5; 2538ed1c418cSBarry Smith y[18*i+5] = sum6; 2539ed1c418cSBarry Smith y[18*i+6] = sum7; 2540ed1c418cSBarry Smith y[18*i+7] = sum8; 2541ed1c418cSBarry Smith y[18*i+8] = sum9; 2542ed1c418cSBarry Smith y[18*i+9] = sum10; 2543ed1c418cSBarry Smith y[18*i+10] = sum11; 2544ed1c418cSBarry Smith y[18*i+11] = sum12; 2545ed1c418cSBarry Smith y[18*i+12] = sum13; 2546ed1c418cSBarry Smith y[18*i+13] = sum14; 2547ed1c418cSBarry Smith y[18*i+14] = sum15; 2548ed1c418cSBarry Smith y[18*i+15] = sum16; 2549ed1c418cSBarry Smith y[18*i+16] = sum17; 2550ed1c418cSBarry Smith y[18*i+17] = sum18; 2551ed1c418cSBarry Smith } 2552ed1c418cSBarry Smith 2553dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 25543649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2555ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2556ed1c418cSBarry Smith PetscFunctionReturn(0); 2557ed1c418cSBarry Smith } 2558ed1c418cSBarry Smith 2559ed1c418cSBarry Smith #undef __FUNCT__ 2560ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2561ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2562ed1c418cSBarry Smith { 2563ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2564ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2565d2840e09SBarry Smith const PetscScalar *x,*v; 2566d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2567ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2568ed1c418cSBarry Smith PetscErrorCode ierr; 2569d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2570d2840e09SBarry Smith PetscInt n,i; 2571ed1c418cSBarry Smith 2572ed1c418cSBarry Smith PetscFunctionBegin; 2573d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 25743649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2575ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2576ed1c418cSBarry Smith 2577ed1c418cSBarry Smith for (i=0; i<m; i++) { 2578ed1c418cSBarry Smith idx = a->j + a->i[i]; 2579ed1c418cSBarry Smith v = a->a + a->i[i]; 2580ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2581ed1c418cSBarry Smith alpha1 = x[18*i]; 2582ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2583ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2584ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2585ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2586ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2587ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2588ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2589ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2590ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2591ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2592ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2593ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2594ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2595ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2596ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2597ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2598ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2599ed1c418cSBarry Smith while (n-->0) { 2600ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2601ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2602ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2603ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2604ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2605ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2606ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2607ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2608ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2609ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2610ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2611ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2612ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2613ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2614ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2615ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2616ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2617ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2618ed1c418cSBarry Smith idx++; v++; 2619ed1c418cSBarry Smith } 2620ed1c418cSBarry Smith } 2621dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26223649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2623ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2624ed1c418cSBarry Smith PetscFunctionReturn(0); 2625ed1c418cSBarry Smith } 2626ed1c418cSBarry Smith 2627ed1c418cSBarry Smith #undef __FUNCT__ 2628ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2629ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2630ed1c418cSBarry Smith { 2631ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2632ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2633d2840e09SBarry Smith const PetscScalar *x,*v; 2634d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2635ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2636ed1c418cSBarry Smith PetscErrorCode ierr; 2637d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2638ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2639ed1c418cSBarry Smith 2640ed1c418cSBarry Smith PetscFunctionBegin; 2641ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 26423649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2643ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2644ed1c418cSBarry Smith idx = a->j; 2645ed1c418cSBarry Smith v = a->a; 2646ed1c418cSBarry Smith ii = a->i; 2647ed1c418cSBarry Smith 2648ed1c418cSBarry Smith for (i=0; i<m; i++) { 2649ed1c418cSBarry Smith jrow = ii[i]; 2650ed1c418cSBarry Smith n = ii[i+1] - jrow; 2651ed1c418cSBarry Smith sum1 = 0.0; 2652ed1c418cSBarry Smith sum2 = 0.0; 2653ed1c418cSBarry Smith sum3 = 0.0; 2654ed1c418cSBarry Smith sum4 = 0.0; 2655ed1c418cSBarry Smith sum5 = 0.0; 2656ed1c418cSBarry Smith sum6 = 0.0; 2657ed1c418cSBarry Smith sum7 = 0.0; 2658ed1c418cSBarry Smith sum8 = 0.0; 2659ed1c418cSBarry Smith sum9 = 0.0; 2660ed1c418cSBarry Smith sum10 = 0.0; 2661ed1c418cSBarry Smith sum11 = 0.0; 2662ed1c418cSBarry Smith sum12 = 0.0; 2663ed1c418cSBarry Smith sum13 = 0.0; 2664ed1c418cSBarry Smith sum14 = 0.0; 2665ed1c418cSBarry Smith sum15 = 0.0; 2666ed1c418cSBarry Smith sum16 = 0.0; 2667ed1c418cSBarry Smith sum17 = 0.0; 2668ed1c418cSBarry Smith sum18 = 0.0; 2669ed1c418cSBarry Smith for (j=0; j<n; j++) { 2670ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2671ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2672ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2673ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2674ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2675ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2676ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2677ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2678ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2679ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2680ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2681ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2682ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2683ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2684ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2685ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2686ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2687ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2688ed1c418cSBarry Smith jrow++; 2689ed1c418cSBarry Smith } 2690ed1c418cSBarry Smith y[18*i] += sum1; 2691ed1c418cSBarry Smith y[18*i+1] += sum2; 2692ed1c418cSBarry Smith y[18*i+2] += sum3; 2693ed1c418cSBarry Smith y[18*i+3] += sum4; 2694ed1c418cSBarry Smith y[18*i+4] += sum5; 2695ed1c418cSBarry Smith y[18*i+5] += sum6; 2696ed1c418cSBarry Smith y[18*i+6] += sum7; 2697ed1c418cSBarry Smith y[18*i+7] += sum8; 2698ed1c418cSBarry Smith y[18*i+8] += sum9; 2699ed1c418cSBarry Smith y[18*i+9] += sum10; 2700ed1c418cSBarry Smith y[18*i+10] += sum11; 2701ed1c418cSBarry Smith y[18*i+11] += sum12; 2702ed1c418cSBarry Smith y[18*i+12] += sum13; 2703ed1c418cSBarry Smith y[18*i+13] += sum14; 2704ed1c418cSBarry Smith y[18*i+14] += sum15; 2705ed1c418cSBarry Smith y[18*i+15] += sum16; 2706ed1c418cSBarry Smith y[18*i+16] += sum17; 2707ed1c418cSBarry Smith y[18*i+17] += sum18; 2708ed1c418cSBarry Smith } 2709ed1c418cSBarry Smith 2710dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 27113649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2712ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2713ed1c418cSBarry Smith PetscFunctionReturn(0); 2714ed1c418cSBarry Smith } 2715ed1c418cSBarry Smith 2716ed1c418cSBarry Smith #undef __FUNCT__ 2717ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2718ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2719ed1c418cSBarry Smith { 2720ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2721ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2722d2840e09SBarry Smith const PetscScalar *x,*v; 2723d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2724ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2725ed1c418cSBarry Smith PetscErrorCode ierr; 2726d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2727d2840e09SBarry Smith PetscInt n,i; 2728ed1c418cSBarry Smith 2729ed1c418cSBarry Smith PetscFunctionBegin; 2730ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 27313649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2732ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2733ed1c418cSBarry Smith for (i=0; i<m; i++) { 2734ed1c418cSBarry Smith idx = a->j + a->i[i]; 2735ed1c418cSBarry Smith v = a->a + a->i[i]; 2736ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2737ed1c418cSBarry Smith alpha1 = x[18*i]; 2738ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2739ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2740ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2741ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2742ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2743ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2744ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2745ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2746ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2747ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2748ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2749ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2750ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2751ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2752ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2753ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2754ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2755ed1c418cSBarry Smith while (n-->0) { 2756ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2757ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2758ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2759ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2760ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2761ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2762ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2763ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2764ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2765ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2766ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2767ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2768ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2769ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2770ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2771ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2772ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2773ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2774ed1c418cSBarry Smith idx++; v++; 2775ed1c418cSBarry Smith } 2776ed1c418cSBarry Smith } 2777dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 27783649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2779ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2780ed1c418cSBarry Smith PetscFunctionReturn(0); 2781ed1c418cSBarry Smith } 2782ed1c418cSBarry Smith 27836861f193SBarry Smith #undef __FUNCT__ 27846861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N" 27856861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 27866861f193SBarry Smith { 27876861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27886861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27896861f193SBarry Smith const PetscScalar *x,*v; 27906861f193SBarry Smith PetscScalar *y,*sums; 27916861f193SBarry Smith PetscErrorCode ierr; 27926861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27936861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 27946861f193SBarry Smith 27956861f193SBarry Smith PetscFunctionBegin; 27963649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27976861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 27986861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 27996861f193SBarry Smith idx = a->j; 28006861f193SBarry Smith v = a->a; 28016861f193SBarry Smith ii = a->i; 28026861f193SBarry Smith 28036861f193SBarry Smith for (i=0; i<m; i++) { 28046861f193SBarry Smith jrow = ii[i]; 28056861f193SBarry Smith n = ii[i+1] - jrow; 28066861f193SBarry Smith sums = y + dof*i; 28076861f193SBarry Smith for (j=0; j<n; j++) { 28086861f193SBarry Smith for (k=0; k<dof; k++) { 28096861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 28106861f193SBarry Smith } 28116861f193SBarry Smith jrow++; 28126861f193SBarry Smith } 28136861f193SBarry Smith } 28146861f193SBarry Smith 28156861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28163649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28176861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 28186861f193SBarry Smith PetscFunctionReturn(0); 28196861f193SBarry Smith } 28206861f193SBarry Smith 28216861f193SBarry Smith #undef __FUNCT__ 28226861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 28236861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 28246861f193SBarry Smith { 28256861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 28266861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28276861f193SBarry Smith const PetscScalar *x,*v; 28286861f193SBarry Smith PetscScalar *y,*sums; 28296861f193SBarry Smith PetscErrorCode ierr; 28306861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 28316861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 28326861f193SBarry Smith 28336861f193SBarry Smith PetscFunctionBegin; 28346861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 28353649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 28366861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 28376861f193SBarry Smith idx = a->j; 28386861f193SBarry Smith v = a->a; 28396861f193SBarry Smith ii = a->i; 28406861f193SBarry Smith 28416861f193SBarry Smith for (i=0; i<m; i++) { 28426861f193SBarry Smith jrow = ii[i]; 28436861f193SBarry Smith n = ii[i+1] - jrow; 28446861f193SBarry Smith sums = y + dof*i; 28456861f193SBarry Smith for (j=0; j<n; j++) { 28466861f193SBarry Smith for (k=0; k<dof; k++) { 28476861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 28486861f193SBarry Smith } 28496861f193SBarry Smith jrow++; 28506861f193SBarry Smith } 28516861f193SBarry Smith } 28526861f193SBarry Smith 28536861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28543649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28556861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 28566861f193SBarry Smith PetscFunctionReturn(0); 28576861f193SBarry Smith } 28586861f193SBarry Smith 28596861f193SBarry Smith #undef __FUNCT__ 28606861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 28616861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 28626861f193SBarry Smith { 28636861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 28646861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28656861f193SBarry Smith const PetscScalar *x,*v,*alpha; 28666861f193SBarry Smith PetscScalar *y; 28676861f193SBarry Smith PetscErrorCode ierr; 28686861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 28696861f193SBarry Smith PetscInt n,i,k; 28706861f193SBarry Smith 28716861f193SBarry Smith PetscFunctionBegin; 28723649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 28736861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 28746861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 28756861f193SBarry Smith for (i=0; i<m; i++) { 28766861f193SBarry Smith idx = a->j + a->i[i]; 28776861f193SBarry Smith v = a->a + a->i[i]; 28786861f193SBarry Smith n = a->i[i+1] - a->i[i]; 28796861f193SBarry Smith alpha = x + dof*i; 28806861f193SBarry Smith while (n-->0) { 28816861f193SBarry Smith for (k=0; k<dof; k++) { 28826861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 28836861f193SBarry Smith } 288483ce7366SBarry Smith idx++; v++; 28856861f193SBarry Smith } 28866861f193SBarry Smith } 28876861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28883649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28896861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 28906861f193SBarry Smith PetscFunctionReturn(0); 28916861f193SBarry Smith } 28926861f193SBarry Smith 28936861f193SBarry Smith #undef __FUNCT__ 28946861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 28956861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 28966861f193SBarry Smith { 28976861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 28986861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28996861f193SBarry Smith const PetscScalar *x,*v,*alpha; 29006861f193SBarry Smith PetscScalar *y; 29016861f193SBarry Smith PetscErrorCode ierr; 29026861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 29036861f193SBarry Smith PetscInt n,i,k; 29046861f193SBarry Smith 29056861f193SBarry Smith PetscFunctionBegin; 29066861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 29073649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 29086861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 29096861f193SBarry Smith for (i=0; i<m; i++) { 29106861f193SBarry Smith idx = a->j + a->i[i]; 29116861f193SBarry Smith v = a->a + a->i[i]; 29126861f193SBarry Smith n = a->i[i+1] - a->i[i]; 29136861f193SBarry Smith alpha = x + dof*i; 29146861f193SBarry Smith while (n-->0) { 29156861f193SBarry Smith for (k=0; k<dof; k++) { 29166861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 29176861f193SBarry Smith } 291883ce7366SBarry Smith idx++; v++; 29196861f193SBarry Smith } 29206861f193SBarry Smith } 29216861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 29223649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 29236861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 29246861f193SBarry Smith PetscFunctionReturn(0); 29256861f193SBarry Smith } 29266861f193SBarry Smith 2927f4a53059SBarry Smith /*===================================================================================*/ 29284a2ae208SSatish Balay #undef __FUNCT__ 29294a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2930dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2931f4a53059SBarry Smith { 29324cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2933dfbe8321SBarry Smith PetscErrorCode ierr; 2934f4a53059SBarry Smith 2935b24ad042SBarry Smith PetscFunctionBegin; 2936f4a53059SBarry Smith /* start the scatter */ 2937ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 29384cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2939ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 29404cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2941f4a53059SBarry Smith PetscFunctionReturn(0); 2942f4a53059SBarry Smith } 2943f4a53059SBarry Smith 29444a2ae208SSatish Balay #undef __FUNCT__ 29454a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2946dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 29474cb9d9b8SBarry Smith { 29484cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2949dfbe8321SBarry Smith PetscErrorCode ierr; 2950b24ad042SBarry Smith 29514cb9d9b8SBarry Smith PetscFunctionBegin; 29524cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 29534cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2954ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2955ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 29564cb9d9b8SBarry Smith PetscFunctionReturn(0); 29574cb9d9b8SBarry Smith } 29584cb9d9b8SBarry Smith 29594a2ae208SSatish Balay #undef __FUNCT__ 29604a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2961dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 29624cb9d9b8SBarry Smith { 29634cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2964dfbe8321SBarry Smith PetscErrorCode ierr; 29654cb9d9b8SBarry Smith 2966b24ad042SBarry Smith PetscFunctionBegin; 29674cb9d9b8SBarry Smith /* start the scatter */ 2968ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2969d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2970ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2971717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 29724cb9d9b8SBarry Smith PetscFunctionReturn(0); 29734cb9d9b8SBarry Smith } 29744cb9d9b8SBarry Smith 29754a2ae208SSatish Balay #undef __FUNCT__ 29764a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2977dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 29784cb9d9b8SBarry Smith { 29794cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2980dfbe8321SBarry Smith PetscErrorCode ierr; 2981b24ad042SBarry Smith 29824cb9d9b8SBarry Smith PetscFunctionBegin; 29834cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2984ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2985d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2986ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 29874cb9d9b8SBarry Smith PetscFunctionReturn(0); 29884cb9d9b8SBarry Smith } 29894cb9d9b8SBarry Smith 299095ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 29919c4fc4c7SBarry Smith #undef __FUNCT__ 29927ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 29937ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 29947ba1a0bfSKris Buschelman { 29957ba1a0bfSKris Buschelman PetscErrorCode ierr; 29960298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 29977ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 29987ba1a0bfSKris Buschelman Mat P =pp->AIJ; 29997ba1a0bfSKris Buschelman Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 3000d2840e09SBarry Smith PetscInt *pti,*ptj,*ptJ; 30017ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 3002d2840e09SBarry Smith const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 3003d2840e09SBarry Smith PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 30047ba1a0bfSKris Buschelman MatScalar *ca; 3005d2840e09SBarry Smith const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 30067ba1a0bfSKris Buschelman 30077ba1a0bfSKris Buschelman PetscFunctionBegin; 30087ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 30097ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 30107ba1a0bfSKris Buschelman 30117ba1a0bfSKris Buschelman cn = pn*ppdof; 30127ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 30137ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 3014854ce69bSBarry Smith ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 30157ba1a0bfSKris Buschelman ci[0] = 0; 30167ba1a0bfSKris Buschelman 30177ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 3018dcca6d9dSJed Brown ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 3019c43dabc9SBarry Smith ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 3020c43dabc9SBarry Smith ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 30217ba1a0bfSKris Buschelman 30227ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 30237ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 30247ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3025f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 30267ba1a0bfSKris Buschelman current_space = free_space; 30277ba1a0bfSKris Buschelman 30287ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 30297ba1a0bfSKris Buschelman for (i=0; i<pn; i++) { 30307ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 30317ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 30327ba1a0bfSKris Buschelman for (dof=0; dof<ppdof; dof++) { 30337ba1a0bfSKris Buschelman ptanzi = 0; 30347ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 30357ba1a0bfSKris Buschelman for (j=0; j<ptnzi; j++) { 30367ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 30377ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 30387ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 30397ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 30407ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 30417ba1a0bfSKris Buschelman for (k=0; k<anzj; k++) { 30427ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 30437ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 30447ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 30457ba1a0bfSKris Buschelman } 30467ba1a0bfSKris Buschelman } 30477ba1a0bfSKris Buschelman } 30487ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 30497ba1a0bfSKris Buschelman ptaj = ptasparserow; 30507ba1a0bfSKris Buschelman cnzi = 0; 30517ba1a0bfSKris Buschelman for (j=0; j<ptanzi; j++) { 30527ba1a0bfSKris Buschelman /* Get offset within block of P */ 30537ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 30547ba1a0bfSKris Buschelman /* Get block row of P */ 30557ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 30567ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 30577ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 30587ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30597ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 30607ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 30617ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 30627ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 30637ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 30647ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 30657ba1a0bfSKris Buschelman } 30667ba1a0bfSKris Buschelman } 30677ba1a0bfSKris Buschelman } 30687ba1a0bfSKris Buschelman 30697ba1a0bfSKris Buschelman /* sort sparserow */ 30707ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 30717ba1a0bfSKris Buschelman 30727ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 30737ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 30747ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 3075f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 30767ba1a0bfSKris Buschelman } 30777ba1a0bfSKris Buschelman 30787ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 30797ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 308026fbe8dcSKarl Rupp 30817ba1a0bfSKris Buschelman current_space->array += cnzi; 30827ba1a0bfSKris Buschelman current_space->local_used += cnzi; 30837ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 30847ba1a0bfSKris Buschelman 308526fbe8dcSKarl Rupp for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 308626fbe8dcSKarl Rupp for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 308726fbe8dcSKarl Rupp 30887ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 30897ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 30907ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 30917ba1a0bfSKris Buschelman } 30927ba1a0bfSKris Buschelman } 30937ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 30947ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 30957ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 3096854ce69bSBarry Smith ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 3097a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 309874ed9c26SBarry Smith ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 30997ba1a0bfSKris Buschelman 31007ba1a0bfSKris Buschelman /* Allocate space for ca */ 3101854ce69bSBarry Smith ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 31027ba1a0bfSKris Buschelman 31037ba1a0bfSKris Buschelman /* put together the new matrix */ 3104ce94432eSBarry Smith ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3105f27682edSJed Brown ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 31067ba1a0bfSKris Buschelman 31077ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 31087ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 31097ba1a0bfSKris Buschelman c = (Mat_SeqAIJ*)((*C)->data); 3110e6b907acSBarry Smith c->free_a = PETSC_TRUE; 3111e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 31127ba1a0bfSKris Buschelman c->nonew = 0; 311326fbe8dcSKarl Rupp 3114d76021d8SHong Zhang (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 31157ba1a0bfSKris Buschelman 31167ba1a0bfSKris Buschelman /* Clean up. */ 31177ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 31187ba1a0bfSKris Buschelman PetscFunctionReturn(0); 31197ba1a0bfSKris Buschelman } 31207ba1a0bfSKris Buschelman 31217ba1a0bfSKris Buschelman #undef __FUNCT__ 31227ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 31237ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 31247ba1a0bfSKris Buschelman { 31257ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 31267ba1a0bfSKris Buschelman PetscErrorCode ierr; 31277ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 31287ba1a0bfSKris Buschelman Mat P =pp->AIJ; 31297ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 31307ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 31317ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3132a2ea699eSBarry Smith const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3133d2840e09SBarry Smith const PetscInt *ci=c->i,*cj=c->j,*cjj; 3134d2840e09SBarry Smith const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3135d2840e09SBarry Smith PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3136a2ea699eSBarry Smith const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3137d2840e09SBarry Smith MatScalar *ca=c->a,*caj,*apa; 31387ba1a0bfSKris Buschelman 31397ba1a0bfSKris Buschelman PetscFunctionBegin; 31407ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 31411795a4d1SJed Brown ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 31427ba1a0bfSKris Buschelman 31437ba1a0bfSKris Buschelman /* Clear old values in C */ 31447ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 31457ba1a0bfSKris Buschelman 31467ba1a0bfSKris Buschelman for (i=0; i<am; i++) { 31477ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 31487ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 31497ba1a0bfSKris Buschelman apnzj = 0; 31507ba1a0bfSKris Buschelman for (j=0; j<anzi; j++) { 31517ba1a0bfSKris Buschelman /* Get offset within block of P */ 31527ba1a0bfSKris Buschelman pshift = *aj%ppdof; 31537ba1a0bfSKris Buschelman /* Get block row of P */ 31547ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 31557ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 31567ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 31577ba1a0bfSKris Buschelman paj = pa + pi[prow]; 31587ba1a0bfSKris Buschelman for (k=0; k<pnzj; k++) { 31597ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 31607ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 31617ba1a0bfSKris Buschelman apjdense[poffset] = -1; 31627ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 31637ba1a0bfSKris Buschelman } 31647ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 31657ba1a0bfSKris Buschelman } 3166dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 31677ba1a0bfSKris Buschelman aa++; 31687ba1a0bfSKris Buschelman } 31697ba1a0bfSKris Buschelman 31707ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 31717ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 31727ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 31737ba1a0bfSKris Buschelman 31747ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 31757ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 31767ba1a0bfSKris Buschelman pshift = i%ppdof; 31777ba1a0bfSKris Buschelman poffset = pi[prow]; 31787ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 31797ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 31807ba1a0bfSKris Buschelman pJ = pj+poffset; 31817ba1a0bfSKris Buschelman pA = pa+poffset; 31827ba1a0bfSKris Buschelman for (j=0; j<pnzi; j++) { 31837ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 31847ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 31857ba1a0bfSKris Buschelman caj = ca + ci[crow]; 31867ba1a0bfSKris Buschelman pJ++; 31877ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 31887ba1a0bfSKris Buschelman for (k=0,nextap=0; nextap<apnzj; k++) { 318926fbe8dcSKarl Rupp if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 31907ba1a0bfSKris Buschelman } 3191dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 31927ba1a0bfSKris Buschelman pA++; 31937ba1a0bfSKris Buschelman } 31947ba1a0bfSKris Buschelman 31957ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 31967ba1a0bfSKris Buschelman for (j=0; j<apnzj; j++) { 31977ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 31987ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 31997ba1a0bfSKris Buschelman } 32007ba1a0bfSKris Buschelman } 32017ba1a0bfSKris Buschelman 32027ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 32037ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32047ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320574ed9c26SBarry Smith ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 320695ddefa2SHong Zhang PetscFunctionReturn(0); 320795ddefa2SHong Zhang } 32087ba1a0bfSKris Buschelman 32092121bac1SHong Zhang #undef __FUNCT__ 32102121bac1SHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ" 3211150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 32122121bac1SHong Zhang { 32132121bac1SHong Zhang PetscErrorCode ierr; 32142121bac1SHong Zhang 32152121bac1SHong Zhang PetscFunctionBegin; 32162121bac1SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 32173ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 32182121bac1SHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 32193ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 32202121bac1SHong Zhang } 32213ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 32222121bac1SHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 32233ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 32242121bac1SHong Zhang PetscFunctionReturn(0); 32252121bac1SHong Zhang } 32262121bac1SHong Zhang 322795ddefa2SHong Zhang #undef __FUNCT__ 322895ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3229f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 323095ddefa2SHong Zhang { 323195ddefa2SHong Zhang PetscErrorCode ierr; 323295ddefa2SHong Zhang 323395ddefa2SHong Zhang PetscFunctionBegin; 323495ddefa2SHong Zhang /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3235511c6705SHong Zhang ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr); 323695ddefa2SHong Zhang ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 323795ddefa2SHong Zhang PetscFunctionReturn(0); 323895ddefa2SHong Zhang } 323995ddefa2SHong Zhang 324095ddefa2SHong Zhang #undef __FUNCT__ 324195ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3242f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 324395ddefa2SHong Zhang { 324495ddefa2SHong Zhang PetscFunctionBegin; 3245e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 32467ba1a0bfSKris Buschelman PetscFunctionReturn(0); 32477ba1a0bfSKris Buschelman } 32487ba1a0bfSKris Buschelman 32497ba1a0bfSKris Buschelman #undef __FUNCT__ 32509e03dfbbSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ" 3251150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 32529e03dfbbSHong Zhang { 32539e03dfbbSHong Zhang PetscErrorCode ierr; 32549e03dfbbSHong Zhang 32559e03dfbbSHong Zhang PetscFunctionBegin; 32569e03dfbbSHong Zhang if (scall == MAT_INITIAL_MATRIX) { 32573ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 32589e03dfbbSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 32593ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 32609e03dfbbSHong Zhang } 32613ff4c91cSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 32629e03dfbbSHong Zhang ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 32633ff4c91cSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 32649e03dfbbSHong Zhang PetscFunctionReturn(0); 32659e03dfbbSHong Zhang } 32669e03dfbbSHong Zhang 32679e03dfbbSHong Zhang #undef __FUNCT__ 32680fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3269cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32709c4fc4c7SBarry Smith { 32719c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3272ceb03754SKris Buschelman Mat a = b->AIJ,B; 32739c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 32749c4fc4c7SBarry Smith PetscErrorCode ierr; 32750fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3276ba8c8a56SBarry Smith PetscInt *cols; 3277ba8c8a56SBarry Smith PetscScalar *vals; 32789c4fc4c7SBarry Smith 32799c4fc4c7SBarry Smith PetscFunctionBegin; 32809c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3281785e854fSJed Brown ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 32829c4fc4c7SBarry Smith for (i=0; i<m; i++) { 32839c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 328426fbe8dcSKarl Rupp for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 32859c4fc4c7SBarry Smith } 3286ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 32879c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 3288785e854fSJed Brown ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 32899c4fc4c7SBarry Smith ii = 0; 32909c4fc4c7SBarry Smith for (i=0; i<m; i++) { 3291ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32920fd73130SBarry Smith for (j=0; j<dof; j++) { 329326fbe8dcSKarl Rupp for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3294ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 32959c4fc4c7SBarry Smith ii++; 32969c4fc4c7SBarry Smith } 3297ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32989c4fc4c7SBarry Smith } 32999c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 3300ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302ceb03754SKris Buschelman 3303511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 330428be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3305c3d102feSKris Buschelman } else { 3306ceb03754SKris Buschelman *newmat = B; 3307c3d102feSKris Buschelman } 33089c4fc4c7SBarry Smith PetscFunctionReturn(0); 33099c4fc4c7SBarry Smith } 33109c4fc4c7SBarry Smith 3311c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3312be1d678aSKris Buschelman 33130fd73130SBarry Smith #undef __FUNCT__ 33140fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3315cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 33160fd73130SBarry Smith { 33170fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3318ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 33190fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 33200fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 33210fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 33220fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 33230298fd71SBarry Smith PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 33240298fd71SBarry Smith PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 33250fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 33260fd73130SBarry Smith PetscErrorCode ierr; 33270fd73130SBarry Smith PetscScalar *vals,*ovals; 33280fd73130SBarry Smith 33290fd73130SBarry Smith PetscFunctionBegin; 3330dcca6d9dSJed Brown ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3331d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 33320fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 33330fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 33340fd73130SBarry Smith for (j=0; j<dof; j++) { 33350fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 33360fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 33370fd73130SBarry Smith } 33380fd73130SBarry Smith } 3339ce94432eSBarry Smith ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3340f27682edSJed Brown ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 33410fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 33420fd73130SBarry Smith 3343dcca6d9dSJed Brown ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3344d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 3345d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 33460fd73130SBarry Smith garray = mpiaij->garray; 33470fd73130SBarry Smith 33480fd73130SBarry Smith ii = rstart; 3349d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 33500fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 33510fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 33520fd73130SBarry Smith for (j=0; j<dof; j++) { 33530fd73130SBarry Smith for (k=0; k<ncols; k++) { 33540fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 33550fd73130SBarry Smith } 33560fd73130SBarry Smith for (k=0; k<oncols; k++) { 33570fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 33580fd73130SBarry Smith } 3359ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3360ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 33610fd73130SBarry Smith ii++; 33620fd73130SBarry Smith } 33630fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 33640fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 33650fd73130SBarry Smith } 33660fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 33670fd73130SBarry Smith 3368ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3369ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3370ceb03754SKris Buschelman 3371511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 33727adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 33737adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 337426fbe8dcSKarl Rupp 337528be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 337626fbe8dcSKarl Rupp 33777adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3378c3d102feSKris Buschelman } else { 3379ceb03754SKris Buschelman *newmat = B; 3380c3d102feSKris Buschelman } 33810fd73130SBarry Smith PetscFunctionReturn(0); 33820fd73130SBarry Smith } 33830fd73130SBarry Smith 33849e516c8fSBarry Smith #undef __FUNCT__ 33859e516c8fSBarry Smith #define __FUNCT__ "MatGetSubMatrix_MAIJ" 33869e516c8fSBarry Smith PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 33879e516c8fSBarry Smith { 33889e516c8fSBarry Smith PetscErrorCode ierr; 33899e516c8fSBarry Smith Mat A; 33909e516c8fSBarry Smith 33919e516c8fSBarry Smith PetscFunctionBegin; 33929e516c8fSBarry Smith ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 33939e516c8fSBarry Smith ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 33949e516c8fSBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 33959e516c8fSBarry Smith PetscFunctionReturn(0); 33969e516c8fSBarry Smith } 33970fd73130SBarry Smith 3398bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3399ff585edeSJed Brown #undef __FUNCT__ 3400ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ" 3401*480dffcfSJed Brown /*@ 34020bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 34030bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 34040bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 34050bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 34060bad9183SKris Buschelman 3407ff585edeSJed Brown Collective 3408ff585edeSJed Brown 3409ff585edeSJed Brown Input Parameters: 3410ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3411ff585edeSJed Brown - dof - the block size (number of components per node) 3412ff585edeSJed Brown 3413ff585edeSJed Brown Output Parameter: 3414ff585edeSJed Brown . maij - the new MAIJ matrix 3415ff585edeSJed Brown 34160bad9183SKris Buschelman Operations provided: 34170fd73130SBarry Smith + MatMult 34180bad9183SKris Buschelman . MatMultTranspose 34190bad9183SKris Buschelman . MatMultAdd 34200bad9183SKris Buschelman . MatMultTransposeAdd 34210fd73130SBarry Smith - MatView 34220bad9183SKris Buschelman 34230bad9183SKris Buschelman Level: advanced 34240bad9183SKris Buschelman 3425b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3426ff585edeSJed Brown @*/ 34277087cfbeSBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 342882b1193eSBarry Smith { 3429dfbe8321SBarry Smith PetscErrorCode ierr; 3430b24ad042SBarry Smith PetscMPIInt size; 3431b24ad042SBarry Smith PetscInt n; 343282b1193eSBarry Smith Mat B; 343382b1193eSBarry Smith 343482b1193eSBarry Smith PetscFunctionBegin; 3435d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3436d72c5c08SBarry Smith 343726fbe8dcSKarl Rupp if (dof == 1) *maij = A; 343826fbe8dcSKarl Rupp else { 3439ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3440d0f46423SBarry Smith ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3441bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3442bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3443bccba955SJed Brown ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3444bccba955SJed Brown ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 344526fbe8dcSKarl Rupp 3446cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3447d72c5c08SBarry Smith 3448ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3449f4a53059SBarry Smith if (size == 1) { 3450feb9fe23SJed Brown Mat_SeqMAIJ *b; 3451feb9fe23SJed Brown 3452b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 345326fbe8dcSKarl Rupp 34540298fd71SBarry Smith B->ops->setup = NULL; 34554cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 34560fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 3457feb9fe23SJed Brown b = (Mat_SeqMAIJ*)B->data; 3458b9b97703SBarry Smith b->dof = dof; 34594cb9d9b8SBarry Smith b->AIJ = A; 346026fbe8dcSKarl Rupp 3461d72c5c08SBarry Smith if (dof == 2) { 3462cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3463cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3464cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3465cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3466bcc973b7SBarry Smith } else if (dof == 3) { 3467bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3468bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3469bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3470bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3471bcc973b7SBarry Smith } else if (dof == 4) { 3472bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3473bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3474bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3475bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3476f9fae5adSBarry Smith } else if (dof == 5) { 3477f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3478f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3479f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3480f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 34816cd98798SBarry Smith } else if (dof == 6) { 34826cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 34836cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 34846cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 34856cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3486ed8eea03SBarry Smith } else if (dof == 7) { 3487ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3488ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3489ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3490ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 349166ed3db0SBarry Smith } else if (dof == 8) { 349266ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 349366ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 349466ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 349566ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 34962b692628SMatthew Knepley } else if (dof == 9) { 34972b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 34982b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 34992b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 35002b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3501b9cda22cSBarry Smith } else if (dof == 10) { 3502b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3503b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3504b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3505b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3506dbdd7285SBarry Smith } else if (dof == 11) { 3507dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3508dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3509dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3510dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 35112f7816d4SBarry Smith } else if (dof == 16) { 35122f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 35132f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 35142f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 35152f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3516ed1c418cSBarry Smith } else if (dof == 18) { 3517ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3518ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3519ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3520ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 352182b1193eSBarry Smith } else { 35226861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 35236861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 35246861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 35256861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 352682b1193eSBarry Smith } 3527bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3528bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3529f4a53059SBarry Smith } else { 3530f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3531feb9fe23SJed Brown Mat_MPIMAIJ *b; 3532f4a53059SBarry Smith IS from,to; 3533f4a53059SBarry Smith Vec gvec; 3534f4a53059SBarry Smith 3535b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 353626fbe8dcSKarl Rupp 35370298fd71SBarry Smith B->ops->setup = NULL; 3538d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 35390fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 354026fbe8dcSKarl Rupp 3541b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3542b9b97703SBarry Smith b->dof = dof; 3543b9b97703SBarry Smith b->A = A; 354426fbe8dcSKarl Rupp 35454cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 35464cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 35474cb9d9b8SBarry Smith 3548f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3549a34bdc0bSBarry Smith ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3550a34bdc0bSBarry Smith ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 355113288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3552a34bdc0bSBarry Smith ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3553f4a53059SBarry Smith 3554f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 3555ce94432eSBarry Smith ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3556f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3557f4a53059SBarry Smith 3558f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 3559ce94432eSBarry Smith ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3560f4a53059SBarry Smith 3561f4a53059SBarry Smith /* generate the scatter context */ 3562f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3563f4a53059SBarry Smith 35646bf464f9SBarry Smith ierr = ISDestroy(&from);CHKERRQ(ierr); 35656bf464f9SBarry Smith ierr = ISDestroy(&to);CHKERRQ(ierr); 35666bf464f9SBarry Smith ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3567f4a53059SBarry Smith 3568f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 35694cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 35704cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 35714cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 357226fbe8dcSKarl Rupp 3573bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3574bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3575f4a53059SBarry Smith } 35769e516c8fSBarry Smith B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 35774994cf47SJed Brown ierr = MatSetUp(B);CHKERRQ(ierr); 3578cd3bbe55SBarry Smith *maij = B; 3579146574abSBarry Smith ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3580d72c5c08SBarry Smith } 358182b1193eSBarry Smith PetscFunctionReturn(0); 358282b1193eSBarry Smith } 3583