182b1193eSBarry Smith /* 2cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 3cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 4cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 5cd3bbe55SBarry Smith independently. 6cd3bbe55SBarry Smith 7cd3bbe55SBarry Smith We provide: 8cd3bbe55SBarry Smith MatMult() 9cd3bbe55SBarry Smith MatMultTranspose() 10cd3bbe55SBarry Smith MatMultTransposeAdd() 11cd3bbe55SBarry Smith MatMultAdd() 12cd3bbe55SBarry Smith and 13cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 14f4a53059SBarry Smith 15f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1682b1193eSBarry Smith */ 1782b1193eSBarry Smith 18be911618SKris Buschelman #include "src/mat/impls/maij/maij.h" 193c94ec11SBarry Smith #include "vecimpl.h" 2082b1193eSBarry Smith 214a2ae208SSatish Balay #undef __FUNCT__ 224a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 23dfbe8321SBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 24b9b97703SBarry Smith { 25dfbe8321SBarry Smith PetscErrorCode ierr; 26b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 27b9b97703SBarry Smith 28b9b97703SBarry Smith PetscFunctionBegin; 29b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 30b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 31b9b97703SBarry Smith if (ismpimaij) { 32b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 33b9b97703SBarry Smith 34b9b97703SBarry Smith *B = b->A; 35b9b97703SBarry Smith } else if (isseqmaij) { 36b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 37b9b97703SBarry Smith 38b9b97703SBarry Smith *B = b->AIJ; 39b9b97703SBarry Smith } else { 40b9b97703SBarry Smith *B = A; 41b9b97703SBarry Smith } 42b9b97703SBarry Smith PetscFunctionReturn(0); 43b9b97703SBarry Smith } 44b9b97703SBarry Smith 454a2ae208SSatish Balay #undef __FUNCT__ 464a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 47*b24ad042SBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 48b9b97703SBarry Smith { 49dfbe8321SBarry Smith PetscErrorCode ierr; 50b9b97703SBarry Smith Mat Aij; 51b9b97703SBarry Smith 52b9b97703SBarry Smith PetscFunctionBegin; 53b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 54b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 55b9b97703SBarry Smith PetscFunctionReturn(0); 56b9b97703SBarry Smith } 57b9b97703SBarry Smith 584a2ae208SSatish Balay #undef __FUNCT__ 594a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 60dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 6182b1193eSBarry Smith { 62dfbe8321SBarry Smith PetscErrorCode ierr; 634cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6482b1193eSBarry Smith 6582b1193eSBarry Smith PetscFunctionBegin; 66cd3bbe55SBarry Smith if (b->AIJ) { 67cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 6882b1193eSBarry Smith } 694cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 704cb9d9b8SBarry Smith PetscFunctionReturn(0); 714cb9d9b8SBarry Smith } 724cb9d9b8SBarry Smith 734a2ae208SSatish Balay #undef __FUNCT__ 744a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 75dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 764cb9d9b8SBarry Smith { 77dfbe8321SBarry Smith PetscErrorCode ierr; 784cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 794cb9d9b8SBarry Smith 804cb9d9b8SBarry Smith PetscFunctionBegin; 814cb9d9b8SBarry Smith if (b->AIJ) { 824cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 834cb9d9b8SBarry Smith } 84f4a53059SBarry Smith if (b->OAIJ) { 85f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 86f4a53059SBarry Smith } 87b9b97703SBarry Smith if (b->A) { 88b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 89b9b97703SBarry Smith } 90f4a53059SBarry Smith if (b->ctx) { 91f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 92f4a53059SBarry Smith } 93f4a53059SBarry Smith if (b->w) { 94f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 95f4a53059SBarry Smith } 96cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 97b9b97703SBarry Smith PetscFunctionReturn(0); 98b9b97703SBarry Smith } 99b9b97703SBarry Smith 1000bad9183SKris Buschelman /*MC 101fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1020bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1030bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1040bad9183SKris Buschelman 1050bad9183SKris Buschelman Operations provided: 1060bad9183SKris Buschelman . MatMult 1070bad9183SKris Buschelman . MatMultTranspose 1080bad9183SKris Buschelman . MatMultAdd 1090bad9183SKris Buschelman . MatMultTransposeAdd 1100bad9183SKris Buschelman 1110bad9183SKris Buschelman Level: advanced 1120bad9183SKris Buschelman 1130bad9183SKris Buschelman .seealso: MatCreateSeqDense 1140bad9183SKris Buschelman M*/ 1150bad9183SKris Buschelman 11682b1193eSBarry Smith EXTERN_C_BEGIN 1174a2ae208SSatish Balay #undef __FUNCT__ 1184a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 119dfbe8321SBarry Smith PetscErrorCode MatCreate_MAIJ(Mat A) 12082b1193eSBarry Smith { 121dfbe8321SBarry Smith PetscErrorCode ierr; 1224cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 12382b1193eSBarry Smith 12482b1193eSBarry Smith PetscFunctionBegin; 125b0a32e0cSBarry Smith ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 126b0a32e0cSBarry Smith A->data = (void*)b; 1274cb9d9b8SBarry Smith ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 128cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 129cd3bbe55SBarry Smith A->factor = 0; 130cd3bbe55SBarry Smith A->mapping = 0; 131f4a53059SBarry Smith 132cd3bbe55SBarry Smith b->AIJ = 0; 133cd3bbe55SBarry Smith b->dof = 0; 134f4a53059SBarry Smith b->OAIJ = 0; 135f4a53059SBarry Smith b->ctx = 0; 136f4a53059SBarry Smith b->w = 0; 13782b1193eSBarry Smith PetscFunctionReturn(0); 13882b1193eSBarry Smith } 13982b1193eSBarry Smith EXTERN_C_END 14082b1193eSBarry Smith 141cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1424a2ae208SSatish Balay #undef __FUNCT__ 143b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 144dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 14582b1193eSBarry Smith { 1464cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 147bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 149dfbe8321SBarry Smith PetscErrorCode ierr; 150*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 151*b24ad042SBarry Smith PetscInt n,i,jrow,j; 15282b1193eSBarry Smith 153bcc973b7SBarry Smith PetscFunctionBegin; 1541ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1551ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 156bcc973b7SBarry Smith idx = a->j; 157bcc973b7SBarry Smith v = a->a; 158bcc973b7SBarry Smith ii = a->i; 159bcc973b7SBarry Smith 160bcc973b7SBarry Smith for (i=0; i<m; i++) { 161bcc973b7SBarry Smith jrow = ii[i]; 162bcc973b7SBarry Smith n = ii[i+1] - jrow; 163bcc973b7SBarry Smith sum1 = 0.0; 164bcc973b7SBarry Smith sum2 = 0.0; 165bcc973b7SBarry Smith for (j=0; j<n; j++) { 166bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 167bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 168bcc973b7SBarry Smith jrow++; 169bcc973b7SBarry Smith } 170bcc973b7SBarry Smith y[2*i] = sum1; 171bcc973b7SBarry Smith y[2*i+1] = sum2; 172bcc973b7SBarry Smith } 173bcc973b7SBarry Smith 174b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*m); 1751ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1761ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 17782b1193eSBarry Smith PetscFunctionReturn(0); 17882b1193eSBarry Smith } 179bcc973b7SBarry Smith 1804a2ae208SSatish Balay #undef __FUNCT__ 181b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 182dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 18382b1193eSBarry Smith { 1844cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 185bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 187dfbe8321SBarry Smith PetscErrorCode ierr; 188*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 18982b1193eSBarry Smith 190bcc973b7SBarry Smith PetscFunctionBegin; 191bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1921ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1931ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 194bfec09a0SHong Zhang 195bcc973b7SBarry Smith for (i=0; i<m; i++) { 196bfec09a0SHong Zhang idx = a->j + a->i[i] ; 197bfec09a0SHong Zhang v = a->a + a->i[i] ; 198bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 199bcc973b7SBarry Smith alpha1 = x[2*i]; 200bcc973b7SBarry Smith alpha2 = x[2*i+1]; 201bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 202bcc973b7SBarry Smith } 203b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*b->AIJ->n); 2041ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2051ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 20682b1193eSBarry Smith PetscFunctionReturn(0); 20782b1193eSBarry Smith } 208bcc973b7SBarry Smith 2094a2ae208SSatish Balay #undef __FUNCT__ 210b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 211dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 21282b1193eSBarry Smith { 2134cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 214bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 21587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 216dfbe8321SBarry Smith PetscErrorCode ierr; 217*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 218*b24ad042SBarry Smith PetscInt n,i,jrow,j; 21982b1193eSBarry Smith 220bcc973b7SBarry Smith PetscFunctionBegin; 221f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2221ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2231ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 224bcc973b7SBarry Smith idx = a->j; 225bcc973b7SBarry Smith v = a->a; 226bcc973b7SBarry Smith ii = a->i; 227bcc973b7SBarry Smith 228bcc973b7SBarry Smith for (i=0; i<m; i++) { 229bcc973b7SBarry Smith jrow = ii[i]; 230bcc973b7SBarry Smith n = ii[i+1] - jrow; 231bcc973b7SBarry Smith sum1 = 0.0; 232bcc973b7SBarry Smith sum2 = 0.0; 233bcc973b7SBarry Smith for (j=0; j<n; j++) { 234bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 235bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 236bcc973b7SBarry Smith jrow++; 237bcc973b7SBarry Smith } 238bcc973b7SBarry Smith y[2*i] += sum1; 239bcc973b7SBarry Smith y[2*i+1] += sum2; 240bcc973b7SBarry Smith } 241bcc973b7SBarry Smith 242b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*m); 2431ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2441ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 245bcc973b7SBarry Smith PetscFunctionReturn(0); 24682b1193eSBarry Smith } 2474a2ae208SSatish Balay #undef __FUNCT__ 248b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 249dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 25082b1193eSBarry Smith { 2514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 252bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 25387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 254dfbe8321SBarry Smith PetscErrorCode ierr; 255*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 25682b1193eSBarry Smith 257bcc973b7SBarry Smith PetscFunctionBegin; 258f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2591ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2601ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 261bfec09a0SHong Zhang 262bcc973b7SBarry Smith for (i=0; i<m; i++) { 263bfec09a0SHong Zhang idx = a->j + a->i[i] ; 264bfec09a0SHong Zhang v = a->a + a->i[i] ; 265bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 266bcc973b7SBarry Smith alpha1 = x[2*i]; 267bcc973b7SBarry Smith alpha2 = x[2*i+1]; 268bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 269bcc973b7SBarry Smith } 270b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*b->AIJ->n); 2711ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2721ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 273bcc973b7SBarry Smith PetscFunctionReturn(0); 27482b1193eSBarry Smith } 275cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 2764a2ae208SSatish Balay #undef __FUNCT__ 277b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 278dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 279bcc973b7SBarry Smith { 2804cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 281bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 283dfbe8321SBarry Smith PetscErrorCode ierr; 284*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 285*b24ad042SBarry Smith PetscInt n,i,jrow,j; 28682b1193eSBarry Smith 287bcc973b7SBarry Smith PetscFunctionBegin; 2881ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2891ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 290bcc973b7SBarry Smith idx = a->j; 291bcc973b7SBarry Smith v = a->a; 292bcc973b7SBarry Smith ii = a->i; 293bcc973b7SBarry Smith 294bcc973b7SBarry Smith for (i=0; i<m; i++) { 295bcc973b7SBarry Smith jrow = ii[i]; 296bcc973b7SBarry Smith n = ii[i+1] - jrow; 297bcc973b7SBarry Smith sum1 = 0.0; 298bcc973b7SBarry Smith sum2 = 0.0; 299bcc973b7SBarry Smith sum3 = 0.0; 300bcc973b7SBarry Smith for (j=0; j<n; j++) { 301bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 302bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 303bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 304bcc973b7SBarry Smith jrow++; 305bcc973b7SBarry Smith } 306bcc973b7SBarry Smith y[3*i] = sum1; 307bcc973b7SBarry Smith y[3*i+1] = sum2; 308bcc973b7SBarry Smith y[3*i+2] = sum3; 309bcc973b7SBarry Smith } 310bcc973b7SBarry Smith 311b0a32e0cSBarry Smith PetscLogFlops(6*a->nz - 3*m); 3121ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3131ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 314bcc973b7SBarry Smith PetscFunctionReturn(0); 315bcc973b7SBarry Smith } 316bcc973b7SBarry Smith 3174a2ae208SSatish Balay #undef __FUNCT__ 318b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 319dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 320bcc973b7SBarry Smith { 3214cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 322bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 32387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 324dfbe8321SBarry Smith PetscErrorCode ierr; 325*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 326bcc973b7SBarry Smith 327bcc973b7SBarry Smith PetscFunctionBegin; 328bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 3291ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3301ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 331bfec09a0SHong Zhang 332bcc973b7SBarry Smith for (i=0; i<m; i++) { 333bfec09a0SHong Zhang idx = a->j + a->i[i]; 334bfec09a0SHong Zhang v = a->a + a->i[i]; 335bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 336bcc973b7SBarry Smith alpha1 = x[3*i]; 337bcc973b7SBarry Smith alpha2 = x[3*i+1]; 338bcc973b7SBarry Smith alpha3 = x[3*i+2]; 339bcc973b7SBarry Smith while (n-->0) { 340bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 341bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 342bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 343bcc973b7SBarry Smith idx++; v++; 344bcc973b7SBarry Smith } 345bcc973b7SBarry Smith } 346b0a32e0cSBarry Smith PetscLogFlops(6*a->nz - 3*b->AIJ->n); 3471ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3481ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 349bcc973b7SBarry Smith PetscFunctionReturn(0); 350bcc973b7SBarry Smith } 351bcc973b7SBarry Smith 3524a2ae208SSatish Balay #undef __FUNCT__ 353b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 354dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 355bcc973b7SBarry Smith { 3564cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 357bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 35887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 359dfbe8321SBarry Smith PetscErrorCode ierr; 360*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 361*b24ad042SBarry Smith PetscInt n,i,jrow,j; 362bcc973b7SBarry Smith 363bcc973b7SBarry Smith PetscFunctionBegin; 364f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3651ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3661ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 367bcc973b7SBarry Smith idx = a->j; 368bcc973b7SBarry Smith v = a->a; 369bcc973b7SBarry Smith ii = a->i; 370bcc973b7SBarry Smith 371bcc973b7SBarry Smith for (i=0; i<m; i++) { 372bcc973b7SBarry Smith jrow = ii[i]; 373bcc973b7SBarry Smith n = ii[i+1] - jrow; 374bcc973b7SBarry Smith sum1 = 0.0; 375bcc973b7SBarry Smith sum2 = 0.0; 376bcc973b7SBarry Smith sum3 = 0.0; 377bcc973b7SBarry Smith for (j=0; j<n; j++) { 378bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 379bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 380bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 381bcc973b7SBarry Smith jrow++; 382bcc973b7SBarry Smith } 383bcc973b7SBarry Smith y[3*i] += sum1; 384bcc973b7SBarry Smith y[3*i+1] += sum2; 385bcc973b7SBarry Smith y[3*i+2] += sum3; 386bcc973b7SBarry Smith } 387bcc973b7SBarry Smith 388b0a32e0cSBarry Smith PetscLogFlops(6*a->nz); 3891ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3901ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 391bcc973b7SBarry Smith PetscFunctionReturn(0); 392bcc973b7SBarry Smith } 3934a2ae208SSatish Balay #undef __FUNCT__ 394b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 395dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 396bcc973b7SBarry Smith { 3974cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 398bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 39987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 400dfbe8321SBarry Smith PetscErrorCode ierr; 401*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 402bcc973b7SBarry Smith 403bcc973b7SBarry Smith PetscFunctionBegin; 404f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4051ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4061ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 407bcc973b7SBarry Smith for (i=0; i<m; i++) { 408bfec09a0SHong Zhang idx = a->j + a->i[i] ; 409bfec09a0SHong Zhang v = a->a + a->i[i] ; 410bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 411bcc973b7SBarry Smith alpha1 = x[3*i]; 412bcc973b7SBarry Smith alpha2 = x[3*i+1]; 413bcc973b7SBarry Smith alpha3 = x[3*i+2]; 414bcc973b7SBarry Smith while (n-->0) { 415bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 416bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 417bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 418bcc973b7SBarry Smith idx++; v++; 419bcc973b7SBarry Smith } 420bcc973b7SBarry Smith } 421b0a32e0cSBarry Smith PetscLogFlops(6*a->nz); 4221ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4231ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 424bcc973b7SBarry Smith PetscFunctionReturn(0); 425bcc973b7SBarry Smith } 426bcc973b7SBarry Smith 427bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4284a2ae208SSatish Balay #undef __FUNCT__ 429b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 430dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 431bcc973b7SBarry Smith { 4324cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 433bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 43487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 435dfbe8321SBarry Smith PetscErrorCode ierr; 436*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 437*b24ad042SBarry Smith PetscInt n,i,jrow,j; 438bcc973b7SBarry Smith 439bcc973b7SBarry Smith PetscFunctionBegin; 4401ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4411ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 442bcc973b7SBarry Smith idx = a->j; 443bcc973b7SBarry Smith v = a->a; 444bcc973b7SBarry Smith ii = a->i; 445bcc973b7SBarry Smith 446bcc973b7SBarry Smith for (i=0; i<m; i++) { 447bcc973b7SBarry Smith jrow = ii[i]; 448bcc973b7SBarry Smith n = ii[i+1] - jrow; 449bcc973b7SBarry Smith sum1 = 0.0; 450bcc973b7SBarry Smith sum2 = 0.0; 451bcc973b7SBarry Smith sum3 = 0.0; 452bcc973b7SBarry Smith sum4 = 0.0; 453bcc973b7SBarry Smith for (j=0; j<n; j++) { 454bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 455bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 456bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 457bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 458bcc973b7SBarry Smith jrow++; 459bcc973b7SBarry Smith } 460bcc973b7SBarry Smith y[4*i] = sum1; 461bcc973b7SBarry Smith y[4*i+1] = sum2; 462bcc973b7SBarry Smith y[4*i+2] = sum3; 463bcc973b7SBarry Smith y[4*i+3] = sum4; 464bcc973b7SBarry Smith } 465bcc973b7SBarry Smith 466b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*m); 4671ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4681ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 469bcc973b7SBarry Smith PetscFunctionReturn(0); 470bcc973b7SBarry Smith } 471bcc973b7SBarry Smith 4724a2ae208SSatish Balay #undef __FUNCT__ 473b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 474dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 475bcc973b7SBarry Smith { 4764cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 477bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 47887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 479dfbe8321SBarry Smith PetscErrorCode ierr; 480*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 481bcc973b7SBarry Smith 482bcc973b7SBarry Smith PetscFunctionBegin; 483bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 4841ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4851ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 486bcc973b7SBarry Smith for (i=0; i<m; i++) { 487bfec09a0SHong Zhang idx = a->j + a->i[i] ; 488bfec09a0SHong Zhang v = a->a + a->i[i] ; 489bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 490bcc973b7SBarry Smith alpha1 = x[4*i]; 491bcc973b7SBarry Smith alpha2 = x[4*i+1]; 492bcc973b7SBarry Smith alpha3 = x[4*i+2]; 493bcc973b7SBarry Smith alpha4 = x[4*i+3]; 494bcc973b7SBarry Smith while (n-->0) { 495bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 496bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 497bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 498bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 499bcc973b7SBarry Smith idx++; v++; 500bcc973b7SBarry Smith } 501bcc973b7SBarry Smith } 502b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*b->AIJ->n); 5031ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5041ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 505bcc973b7SBarry Smith PetscFunctionReturn(0); 506bcc973b7SBarry Smith } 507bcc973b7SBarry Smith 5084a2ae208SSatish Balay #undef __FUNCT__ 509b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 510dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 511bcc973b7SBarry Smith { 5124cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 513f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 51487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 515dfbe8321SBarry Smith PetscErrorCode ierr; 516*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 517*b24ad042SBarry Smith PetscInt n,i,jrow,j; 518f9fae5adSBarry Smith 519f9fae5adSBarry Smith PetscFunctionBegin; 520f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5211ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5221ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 523f9fae5adSBarry Smith idx = a->j; 524f9fae5adSBarry Smith v = a->a; 525f9fae5adSBarry Smith ii = a->i; 526f9fae5adSBarry Smith 527f9fae5adSBarry Smith for (i=0; i<m; i++) { 528f9fae5adSBarry Smith jrow = ii[i]; 529f9fae5adSBarry Smith n = ii[i+1] - jrow; 530f9fae5adSBarry Smith sum1 = 0.0; 531f9fae5adSBarry Smith sum2 = 0.0; 532f9fae5adSBarry Smith sum3 = 0.0; 533f9fae5adSBarry Smith sum4 = 0.0; 534f9fae5adSBarry Smith for (j=0; j<n; j++) { 535f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 536f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 537f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 538f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 539f9fae5adSBarry Smith jrow++; 540f9fae5adSBarry Smith } 541f9fae5adSBarry Smith y[4*i] += sum1; 542f9fae5adSBarry Smith y[4*i+1] += sum2; 543f9fae5adSBarry Smith y[4*i+2] += sum3; 544f9fae5adSBarry Smith y[4*i+3] += sum4; 545f9fae5adSBarry Smith } 546f9fae5adSBarry Smith 547b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*m); 5481ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5491ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 550f9fae5adSBarry Smith PetscFunctionReturn(0); 551bcc973b7SBarry Smith } 5524a2ae208SSatish Balay #undef __FUNCT__ 553b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 554dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 555bcc973b7SBarry Smith { 5564cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 557f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 55887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 559dfbe8321SBarry Smith PetscErrorCode ierr; 560*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 561f9fae5adSBarry Smith 562f9fae5adSBarry Smith PetscFunctionBegin; 563f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5641ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5651ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 566bfec09a0SHong Zhang 567f9fae5adSBarry Smith for (i=0; i<m; i++) { 568bfec09a0SHong Zhang idx = a->j + a->i[i] ; 569bfec09a0SHong Zhang v = a->a + a->i[i] ; 570f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 571f9fae5adSBarry Smith alpha1 = x[4*i]; 572f9fae5adSBarry Smith alpha2 = x[4*i+1]; 573f9fae5adSBarry Smith alpha3 = x[4*i+2]; 574f9fae5adSBarry Smith alpha4 = x[4*i+3]; 575f9fae5adSBarry Smith while (n-->0) { 576f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 577f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 578f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 579f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 580f9fae5adSBarry Smith idx++; v++; 581f9fae5adSBarry Smith } 582f9fae5adSBarry Smith } 583b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*b->AIJ->n); 5841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5851ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 586f9fae5adSBarry Smith PetscFunctionReturn(0); 587f9fae5adSBarry Smith } 588f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 5896cd98798SBarry Smith 5904a2ae208SSatish Balay #undef __FUNCT__ 591b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 592dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 593f9fae5adSBarry Smith { 5944cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 595f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 59687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 597dfbe8321SBarry Smith PetscErrorCode ierr; 598*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 599*b24ad042SBarry Smith PetscInt n,i,jrow,j; 600f9fae5adSBarry Smith 601f9fae5adSBarry Smith PetscFunctionBegin; 6021ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6031ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 604f9fae5adSBarry Smith idx = a->j; 605f9fae5adSBarry Smith v = a->a; 606f9fae5adSBarry Smith ii = a->i; 607f9fae5adSBarry Smith 608f9fae5adSBarry Smith for (i=0; i<m; i++) { 609f9fae5adSBarry Smith jrow = ii[i]; 610f9fae5adSBarry Smith n = ii[i+1] - jrow; 611f9fae5adSBarry Smith sum1 = 0.0; 612f9fae5adSBarry Smith sum2 = 0.0; 613f9fae5adSBarry Smith sum3 = 0.0; 614f9fae5adSBarry Smith sum4 = 0.0; 615f9fae5adSBarry Smith sum5 = 0.0; 616f9fae5adSBarry Smith for (j=0; j<n; j++) { 617f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 618f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 619f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 620f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 621f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 622f9fae5adSBarry Smith jrow++; 623f9fae5adSBarry Smith } 624f9fae5adSBarry Smith y[5*i] = sum1; 625f9fae5adSBarry Smith y[5*i+1] = sum2; 626f9fae5adSBarry Smith y[5*i+2] = sum3; 627f9fae5adSBarry Smith y[5*i+3] = sum4; 628f9fae5adSBarry Smith y[5*i+4] = sum5; 629f9fae5adSBarry Smith } 630f9fae5adSBarry Smith 631b0a32e0cSBarry Smith PetscLogFlops(10*a->nz - 5*m); 6321ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6331ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 634f9fae5adSBarry Smith PetscFunctionReturn(0); 635f9fae5adSBarry Smith } 636f9fae5adSBarry Smith 6374a2ae208SSatish Balay #undef __FUNCT__ 638b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 639dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 640f9fae5adSBarry Smith { 6414cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 642f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 64387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 644dfbe8321SBarry Smith PetscErrorCode ierr; 645*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 646f9fae5adSBarry Smith 647f9fae5adSBarry Smith PetscFunctionBegin; 648f9fae5adSBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 6491ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6501ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 651bfec09a0SHong Zhang 652f9fae5adSBarry Smith for (i=0; i<m; i++) { 653bfec09a0SHong Zhang idx = a->j + a->i[i] ; 654bfec09a0SHong Zhang v = a->a + a->i[i] ; 655f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 656f9fae5adSBarry Smith alpha1 = x[5*i]; 657f9fae5adSBarry Smith alpha2 = x[5*i+1]; 658f9fae5adSBarry Smith alpha3 = x[5*i+2]; 659f9fae5adSBarry Smith alpha4 = x[5*i+3]; 660f9fae5adSBarry Smith alpha5 = x[5*i+4]; 661f9fae5adSBarry Smith while (n-->0) { 662f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 663f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 664f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 665f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 666f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 667f9fae5adSBarry Smith idx++; v++; 668f9fae5adSBarry Smith } 669f9fae5adSBarry Smith } 670b0a32e0cSBarry Smith PetscLogFlops(10*a->nz - 5*b->AIJ->n); 6711ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6721ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 673f9fae5adSBarry Smith PetscFunctionReturn(0); 674f9fae5adSBarry Smith } 675f9fae5adSBarry Smith 6764a2ae208SSatish Balay #undef __FUNCT__ 677b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 678dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 679f9fae5adSBarry Smith { 6804cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 681f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 68287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 683dfbe8321SBarry Smith PetscErrorCode ierr; 684*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 685*b24ad042SBarry Smith PetscInt n,i,jrow,j; 686f9fae5adSBarry Smith 687f9fae5adSBarry Smith PetscFunctionBegin; 688f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6891ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6901ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 691f9fae5adSBarry Smith idx = a->j; 692f9fae5adSBarry Smith v = a->a; 693f9fae5adSBarry Smith ii = a->i; 694f9fae5adSBarry Smith 695f9fae5adSBarry Smith for (i=0; i<m; i++) { 696f9fae5adSBarry Smith jrow = ii[i]; 697f9fae5adSBarry Smith n = ii[i+1] - jrow; 698f9fae5adSBarry Smith sum1 = 0.0; 699f9fae5adSBarry Smith sum2 = 0.0; 700f9fae5adSBarry Smith sum3 = 0.0; 701f9fae5adSBarry Smith sum4 = 0.0; 702f9fae5adSBarry Smith sum5 = 0.0; 703f9fae5adSBarry Smith for (j=0; j<n; j++) { 704f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 705f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 706f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 707f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 708f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 709f9fae5adSBarry Smith jrow++; 710f9fae5adSBarry Smith } 711f9fae5adSBarry Smith y[5*i] += sum1; 712f9fae5adSBarry Smith y[5*i+1] += sum2; 713f9fae5adSBarry Smith y[5*i+2] += sum3; 714f9fae5adSBarry Smith y[5*i+3] += sum4; 715f9fae5adSBarry Smith y[5*i+4] += sum5; 716f9fae5adSBarry Smith } 717f9fae5adSBarry Smith 718b0a32e0cSBarry Smith PetscLogFlops(10*a->nz); 7191ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7201ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 721f9fae5adSBarry Smith PetscFunctionReturn(0); 722f9fae5adSBarry Smith } 723f9fae5adSBarry Smith 7244a2ae208SSatish Balay #undef __FUNCT__ 725b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 726dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 727f9fae5adSBarry Smith { 7284cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 729f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 73087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 731dfbe8321SBarry Smith PetscErrorCode ierr; 732*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 733f9fae5adSBarry Smith 734f9fae5adSBarry Smith PetscFunctionBegin; 735f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7361ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7371ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 738bfec09a0SHong Zhang 739f9fae5adSBarry Smith for (i=0; i<m; i++) { 740bfec09a0SHong Zhang idx = a->j + a->i[i] ; 741bfec09a0SHong Zhang v = a->a + a->i[i] ; 742f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 743f9fae5adSBarry Smith alpha1 = x[5*i]; 744f9fae5adSBarry Smith alpha2 = x[5*i+1]; 745f9fae5adSBarry Smith alpha3 = x[5*i+2]; 746f9fae5adSBarry Smith alpha4 = x[5*i+3]; 747f9fae5adSBarry Smith alpha5 = x[5*i+4]; 748f9fae5adSBarry Smith while (n-->0) { 749f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 750f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 751f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 752f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 753f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 754f9fae5adSBarry Smith idx++; v++; 755f9fae5adSBarry Smith } 756f9fae5adSBarry Smith } 757b0a32e0cSBarry Smith PetscLogFlops(10*a->nz); 7581ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7591ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 760f9fae5adSBarry Smith PetscFunctionReturn(0); 761bcc973b7SBarry Smith } 762bcc973b7SBarry Smith 7636cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7644a2ae208SSatish Balay #undef __FUNCT__ 765b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 766dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 7676cd98798SBarry Smith { 7686cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 7696cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 77087828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 771dfbe8321SBarry Smith PetscErrorCode ierr; 772*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 773*b24ad042SBarry Smith PetscInt n,i,jrow,j; 7746cd98798SBarry Smith 7756cd98798SBarry Smith PetscFunctionBegin; 7761ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7771ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 7786cd98798SBarry Smith idx = a->j; 7796cd98798SBarry Smith v = a->a; 7806cd98798SBarry Smith ii = a->i; 7816cd98798SBarry Smith 7826cd98798SBarry Smith for (i=0; i<m; i++) { 7836cd98798SBarry Smith jrow = ii[i]; 7846cd98798SBarry Smith n = ii[i+1] - jrow; 7856cd98798SBarry Smith sum1 = 0.0; 7866cd98798SBarry Smith sum2 = 0.0; 7876cd98798SBarry Smith sum3 = 0.0; 7886cd98798SBarry Smith sum4 = 0.0; 7896cd98798SBarry Smith sum5 = 0.0; 7906cd98798SBarry Smith sum6 = 0.0; 7916cd98798SBarry Smith for (j=0; j<n; j++) { 7926cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 7936cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 7946cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 7956cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 7966cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 7976cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 7986cd98798SBarry Smith jrow++; 7996cd98798SBarry Smith } 8006cd98798SBarry Smith y[6*i] = sum1; 8016cd98798SBarry Smith y[6*i+1] = sum2; 8026cd98798SBarry Smith y[6*i+2] = sum3; 8036cd98798SBarry Smith y[6*i+3] = sum4; 8046cd98798SBarry Smith y[6*i+4] = sum5; 8056cd98798SBarry Smith y[6*i+5] = sum6; 8066cd98798SBarry Smith } 8076cd98798SBarry Smith 808b0a32e0cSBarry Smith PetscLogFlops(12*a->nz - 6*m); 8091ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8101ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8116cd98798SBarry Smith PetscFunctionReturn(0); 8126cd98798SBarry Smith } 8136cd98798SBarry Smith 8144a2ae208SSatish Balay #undef __FUNCT__ 815b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 816dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8176cd98798SBarry Smith { 8186cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8196cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 82087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 821dfbe8321SBarry Smith PetscErrorCode ierr; 822*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 8236cd98798SBarry Smith 8246cd98798SBarry Smith PetscFunctionBegin; 8256cd98798SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 8261ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8271ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 828bfec09a0SHong Zhang 8296cd98798SBarry Smith for (i=0; i<m; i++) { 830bfec09a0SHong Zhang idx = a->j + a->i[i] ; 831bfec09a0SHong Zhang v = a->a + a->i[i] ; 8326cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8336cd98798SBarry Smith alpha1 = x[6*i]; 8346cd98798SBarry Smith alpha2 = x[6*i+1]; 8356cd98798SBarry Smith alpha3 = x[6*i+2]; 8366cd98798SBarry Smith alpha4 = x[6*i+3]; 8376cd98798SBarry Smith alpha5 = x[6*i+4]; 8386cd98798SBarry Smith alpha6 = x[6*i+5]; 8396cd98798SBarry Smith while (n-->0) { 8406cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8416cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8426cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8436cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8446cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8456cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8466cd98798SBarry Smith idx++; v++; 8476cd98798SBarry Smith } 8486cd98798SBarry Smith } 849b0a32e0cSBarry Smith PetscLogFlops(12*a->nz - 6*b->AIJ->n); 8501ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8511ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8526cd98798SBarry Smith PetscFunctionReturn(0); 8536cd98798SBarry Smith } 8546cd98798SBarry Smith 8554a2ae208SSatish Balay #undef __FUNCT__ 856b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 857dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8586cd98798SBarry Smith { 8596cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8606cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 86187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 862dfbe8321SBarry Smith PetscErrorCode ierr; 863*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 864*b24ad042SBarry Smith PetscInt n,i,jrow,j; 8656cd98798SBarry Smith 8666cd98798SBarry Smith PetscFunctionBegin; 8676cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8681ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8691ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 8706cd98798SBarry Smith idx = a->j; 8716cd98798SBarry Smith v = a->a; 8726cd98798SBarry Smith ii = a->i; 8736cd98798SBarry Smith 8746cd98798SBarry Smith for (i=0; i<m; i++) { 8756cd98798SBarry Smith jrow = ii[i]; 8766cd98798SBarry Smith n = ii[i+1] - jrow; 8776cd98798SBarry Smith sum1 = 0.0; 8786cd98798SBarry Smith sum2 = 0.0; 8796cd98798SBarry Smith sum3 = 0.0; 8806cd98798SBarry Smith sum4 = 0.0; 8816cd98798SBarry Smith sum5 = 0.0; 8826cd98798SBarry Smith sum6 = 0.0; 8836cd98798SBarry Smith for (j=0; j<n; j++) { 8846cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8856cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8866cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8876cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8886cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8896cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8906cd98798SBarry Smith jrow++; 8916cd98798SBarry Smith } 8926cd98798SBarry Smith y[6*i] += sum1; 8936cd98798SBarry Smith y[6*i+1] += sum2; 8946cd98798SBarry Smith y[6*i+2] += sum3; 8956cd98798SBarry Smith y[6*i+3] += sum4; 8966cd98798SBarry Smith y[6*i+4] += sum5; 8976cd98798SBarry Smith y[6*i+5] += sum6; 8986cd98798SBarry Smith } 8996cd98798SBarry Smith 900b0a32e0cSBarry Smith PetscLogFlops(12*a->nz); 9011ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9021ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9036cd98798SBarry Smith PetscFunctionReturn(0); 9046cd98798SBarry Smith } 9056cd98798SBarry Smith 9064a2ae208SSatish Balay #undef __FUNCT__ 907b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 908dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9096cd98798SBarry Smith { 9106cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9116cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 91287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 913dfbe8321SBarry Smith PetscErrorCode ierr; 914*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 9156cd98798SBarry Smith 9166cd98798SBarry Smith PetscFunctionBegin; 9176cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9181ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9191ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 920bfec09a0SHong Zhang 9216cd98798SBarry Smith for (i=0; i<m; i++) { 922bfec09a0SHong Zhang idx = a->j + a->i[i] ; 923bfec09a0SHong Zhang v = a->a + a->i[i] ; 9246cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9256cd98798SBarry Smith alpha1 = x[6*i]; 9266cd98798SBarry Smith alpha2 = x[6*i+1]; 9276cd98798SBarry Smith alpha3 = x[6*i+2]; 9286cd98798SBarry Smith alpha4 = x[6*i+3]; 9296cd98798SBarry Smith alpha5 = x[6*i+4]; 9306cd98798SBarry Smith alpha6 = x[6*i+5]; 9316cd98798SBarry Smith while (n-->0) { 9326cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9336cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9346cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9356cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9366cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9376cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9386cd98798SBarry Smith idx++; v++; 9396cd98798SBarry Smith } 9406cd98798SBarry Smith } 941b0a32e0cSBarry Smith PetscLogFlops(12*a->nz); 9421ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9431ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9446cd98798SBarry Smith PetscFunctionReturn(0); 9456cd98798SBarry Smith } 9466cd98798SBarry Smith 94766ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 94866ed3db0SBarry Smith #undef __FUNCT__ 94966ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 950dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 95166ed3db0SBarry Smith { 95266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 95366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 95487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 955dfbe8321SBarry Smith PetscErrorCode ierr; 956*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 957*b24ad042SBarry Smith PetscInt n,i,jrow,j; 95866ed3db0SBarry Smith 95966ed3db0SBarry Smith PetscFunctionBegin; 9601ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9611ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 96266ed3db0SBarry Smith idx = a->j; 96366ed3db0SBarry Smith v = a->a; 96466ed3db0SBarry Smith ii = a->i; 96566ed3db0SBarry Smith 96666ed3db0SBarry Smith for (i=0; i<m; i++) { 96766ed3db0SBarry Smith jrow = ii[i]; 96866ed3db0SBarry Smith n = ii[i+1] - jrow; 96966ed3db0SBarry Smith sum1 = 0.0; 97066ed3db0SBarry Smith sum2 = 0.0; 97166ed3db0SBarry Smith sum3 = 0.0; 97266ed3db0SBarry Smith sum4 = 0.0; 97366ed3db0SBarry Smith sum5 = 0.0; 97466ed3db0SBarry Smith sum6 = 0.0; 97566ed3db0SBarry Smith sum7 = 0.0; 97666ed3db0SBarry Smith sum8 = 0.0; 97766ed3db0SBarry Smith for (j=0; j<n; j++) { 97866ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 97966ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 98066ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 98166ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 98266ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 98366ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 98466ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 98566ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 98666ed3db0SBarry Smith jrow++; 98766ed3db0SBarry Smith } 98866ed3db0SBarry Smith y[8*i] = sum1; 98966ed3db0SBarry Smith y[8*i+1] = sum2; 99066ed3db0SBarry Smith y[8*i+2] = sum3; 99166ed3db0SBarry Smith y[8*i+3] = sum4; 99266ed3db0SBarry Smith y[8*i+4] = sum5; 99366ed3db0SBarry Smith y[8*i+5] = sum6; 99466ed3db0SBarry Smith y[8*i+6] = sum7; 99566ed3db0SBarry Smith y[8*i+7] = sum8; 99666ed3db0SBarry Smith } 99766ed3db0SBarry Smith 99866ed3db0SBarry Smith PetscLogFlops(16*a->nz - 8*m); 9991ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 10001ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 100166ed3db0SBarry Smith PetscFunctionReturn(0); 100266ed3db0SBarry Smith } 100366ed3db0SBarry Smith 100466ed3db0SBarry Smith #undef __FUNCT__ 100566ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1006dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 100766ed3db0SBarry Smith { 100866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 101087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1011dfbe8321SBarry Smith PetscErrorCode ierr; 1012*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 101366ed3db0SBarry Smith 101466ed3db0SBarry Smith PetscFunctionBegin; 101566ed3db0SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 10161ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 10171ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1018bfec09a0SHong Zhang 101966ed3db0SBarry Smith for (i=0; i<m; i++) { 1020bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1021bfec09a0SHong Zhang v = a->a + a->i[i] ; 102266ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 102366ed3db0SBarry Smith alpha1 = x[8*i]; 102466ed3db0SBarry Smith alpha2 = x[8*i+1]; 102566ed3db0SBarry Smith alpha3 = x[8*i+2]; 102666ed3db0SBarry Smith alpha4 = x[8*i+3]; 102766ed3db0SBarry Smith alpha5 = x[8*i+4]; 102866ed3db0SBarry Smith alpha6 = x[8*i+5]; 102966ed3db0SBarry Smith alpha7 = x[8*i+6]; 103066ed3db0SBarry Smith alpha8 = x[8*i+7]; 103166ed3db0SBarry Smith while (n-->0) { 103266ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 103366ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 103466ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 103566ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 103666ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 103766ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 103866ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 103966ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 104066ed3db0SBarry Smith idx++; v++; 104166ed3db0SBarry Smith } 104266ed3db0SBarry Smith } 104366ed3db0SBarry Smith PetscLogFlops(16*a->nz - 8*b->AIJ->n); 10441ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 10451ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 104666ed3db0SBarry Smith PetscFunctionReturn(0); 104766ed3db0SBarry Smith } 104866ed3db0SBarry Smith 104966ed3db0SBarry Smith #undef __FUNCT__ 105066ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1051dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 105266ed3db0SBarry Smith { 105366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 105466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 105587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1056dfbe8321SBarry Smith PetscErrorCode ierr; 1057*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1058*b24ad042SBarry Smith PetscInt n,i,jrow,j; 105966ed3db0SBarry Smith 106066ed3db0SBarry Smith PetscFunctionBegin; 106166ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 10621ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 10631ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 106466ed3db0SBarry Smith idx = a->j; 106566ed3db0SBarry Smith v = a->a; 106666ed3db0SBarry Smith ii = a->i; 106766ed3db0SBarry Smith 106866ed3db0SBarry Smith for (i=0; i<m; i++) { 106966ed3db0SBarry Smith jrow = ii[i]; 107066ed3db0SBarry Smith n = ii[i+1] - jrow; 107166ed3db0SBarry Smith sum1 = 0.0; 107266ed3db0SBarry Smith sum2 = 0.0; 107366ed3db0SBarry Smith sum3 = 0.0; 107466ed3db0SBarry Smith sum4 = 0.0; 107566ed3db0SBarry Smith sum5 = 0.0; 107666ed3db0SBarry Smith sum6 = 0.0; 107766ed3db0SBarry Smith sum7 = 0.0; 107866ed3db0SBarry Smith sum8 = 0.0; 107966ed3db0SBarry Smith for (j=0; j<n; j++) { 108066ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 108166ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 108266ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 108366ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 108466ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 108566ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 108666ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 108766ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 108866ed3db0SBarry Smith jrow++; 108966ed3db0SBarry Smith } 109066ed3db0SBarry Smith y[8*i] += sum1; 109166ed3db0SBarry Smith y[8*i+1] += sum2; 109266ed3db0SBarry Smith y[8*i+2] += sum3; 109366ed3db0SBarry Smith y[8*i+3] += sum4; 109466ed3db0SBarry Smith y[8*i+4] += sum5; 109566ed3db0SBarry Smith y[8*i+5] += sum6; 109666ed3db0SBarry Smith y[8*i+6] += sum7; 109766ed3db0SBarry Smith y[8*i+7] += sum8; 109866ed3db0SBarry Smith } 109966ed3db0SBarry Smith 110066ed3db0SBarry Smith PetscLogFlops(16*a->nz); 11011ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 11021ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 110366ed3db0SBarry Smith PetscFunctionReturn(0); 110466ed3db0SBarry Smith } 110566ed3db0SBarry Smith 110666ed3db0SBarry Smith #undef __FUNCT__ 110766ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1108dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 110966ed3db0SBarry Smith { 111066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 111166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 111287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1113dfbe8321SBarry Smith PetscErrorCode ierr; 1114*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 111566ed3db0SBarry Smith 111666ed3db0SBarry Smith PetscFunctionBegin; 111766ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11181ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 11191ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 112066ed3db0SBarry Smith for (i=0; i<m; i++) { 1121bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1122bfec09a0SHong Zhang v = a->a + a->i[i] ; 112366ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 112466ed3db0SBarry Smith alpha1 = x[8*i]; 112566ed3db0SBarry Smith alpha2 = x[8*i+1]; 112666ed3db0SBarry Smith alpha3 = x[8*i+2]; 112766ed3db0SBarry Smith alpha4 = x[8*i+3]; 112866ed3db0SBarry Smith alpha5 = x[8*i+4]; 112966ed3db0SBarry Smith alpha6 = x[8*i+5]; 113066ed3db0SBarry Smith alpha7 = x[8*i+6]; 113166ed3db0SBarry Smith alpha8 = x[8*i+7]; 113266ed3db0SBarry Smith while (n-->0) { 113366ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 113466ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 113566ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 113666ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 113766ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 113866ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 113966ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 114066ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 114166ed3db0SBarry Smith idx++; v++; 114266ed3db0SBarry Smith } 114366ed3db0SBarry Smith } 114466ed3db0SBarry Smith PetscLogFlops(16*a->nz); 11451ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 11461ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 11472f7816d4SBarry Smith PetscFunctionReturn(0); 11482f7816d4SBarry Smith } 11492f7816d4SBarry Smith 11502b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 11512b692628SMatthew Knepley #undef __FUNCT__ 11522b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1153dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 11542b692628SMatthew Knepley { 11552b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 11562b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 11572b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1158dfbe8321SBarry Smith PetscErrorCode ierr; 1159*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1160*b24ad042SBarry Smith PetscInt n,i,jrow,j; 11612b692628SMatthew Knepley 11622b692628SMatthew Knepley PetscFunctionBegin; 11631ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 11641ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 11652b692628SMatthew Knepley idx = a->j; 11662b692628SMatthew Knepley v = a->a; 11672b692628SMatthew Knepley ii = a->i; 11682b692628SMatthew Knepley 11692b692628SMatthew Knepley for (i=0; i<m; i++) { 11702b692628SMatthew Knepley jrow = ii[i]; 11712b692628SMatthew Knepley n = ii[i+1] - jrow; 11722b692628SMatthew Knepley sum1 = 0.0; 11732b692628SMatthew Knepley sum2 = 0.0; 11742b692628SMatthew Knepley sum3 = 0.0; 11752b692628SMatthew Knepley sum4 = 0.0; 11762b692628SMatthew Knepley sum5 = 0.0; 11772b692628SMatthew Knepley sum6 = 0.0; 11782b692628SMatthew Knepley sum7 = 0.0; 11792b692628SMatthew Knepley sum8 = 0.0; 11802b692628SMatthew Knepley sum9 = 0.0; 11812b692628SMatthew Knepley for (j=0; j<n; j++) { 11822b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 11832b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 11842b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 11852b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 11862b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 11872b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 11882b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 11892b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 11902b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 11912b692628SMatthew Knepley jrow++; 11922b692628SMatthew Knepley } 11932b692628SMatthew Knepley y[9*i] = sum1; 11942b692628SMatthew Knepley y[9*i+1] = sum2; 11952b692628SMatthew Knepley y[9*i+2] = sum3; 11962b692628SMatthew Knepley y[9*i+3] = sum4; 11972b692628SMatthew Knepley y[9*i+4] = sum5; 11982b692628SMatthew Knepley y[9*i+5] = sum6; 11992b692628SMatthew Knepley y[9*i+6] = sum7; 12002b692628SMatthew Knepley y[9*i+7] = sum8; 12012b692628SMatthew Knepley y[9*i+8] = sum9; 12022b692628SMatthew Knepley } 12032b692628SMatthew Knepley 12042b692628SMatthew Knepley PetscLogFlops(18*a->nz - 9*m); 12051ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12061ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 12072b692628SMatthew Knepley PetscFunctionReturn(0); 12082b692628SMatthew Knepley } 12092b692628SMatthew Knepley 12102b692628SMatthew Knepley #undef __FUNCT__ 12112b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1212dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 12132b692628SMatthew Knepley { 12142b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 12152b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 12162b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1217dfbe8321SBarry Smith PetscErrorCode ierr; 1218*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 12192b692628SMatthew Knepley 12202b692628SMatthew Knepley PetscFunctionBegin; 12212b692628SMatthew Knepley ierr = VecSet(&zero,yy);CHKERRQ(ierr); 12221ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12231ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 12242b692628SMatthew Knepley 12252b692628SMatthew Knepley for (i=0; i<m; i++) { 12262b692628SMatthew Knepley idx = a->j + a->i[i] ; 12272b692628SMatthew Knepley v = a->a + a->i[i] ; 12282b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 12292b692628SMatthew Knepley alpha1 = x[9*i]; 12302b692628SMatthew Knepley alpha2 = x[9*i+1]; 12312b692628SMatthew Knepley alpha3 = x[9*i+2]; 12322b692628SMatthew Knepley alpha4 = x[9*i+3]; 12332b692628SMatthew Knepley alpha5 = x[9*i+4]; 12342b692628SMatthew Knepley alpha6 = x[9*i+5]; 12352b692628SMatthew Knepley alpha7 = x[9*i+6]; 12362b692628SMatthew Knepley alpha8 = x[9*i+7]; 12372f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 12382b692628SMatthew Knepley while (n-->0) { 12392b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 12402b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 12412b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 12422b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 12432b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 12442b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 12452b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 12462b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 12472b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 12482b692628SMatthew Knepley idx++; v++; 12492b692628SMatthew Knepley } 12502b692628SMatthew Knepley } 12512b692628SMatthew Knepley PetscLogFlops(18*a->nz - 9*b->AIJ->n); 12521ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12531ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 12542b692628SMatthew Knepley PetscFunctionReturn(0); 12552b692628SMatthew Knepley } 12562b692628SMatthew Knepley 12572b692628SMatthew Knepley #undef __FUNCT__ 12582b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1259dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 12602b692628SMatthew Knepley { 12612b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 12622b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 12632b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1264dfbe8321SBarry Smith PetscErrorCode ierr; 1265*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1266*b24ad042SBarry Smith PetscInt n,i,jrow,j; 12672b692628SMatthew Knepley 12682b692628SMatthew Knepley PetscFunctionBegin; 12692b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12701ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12711ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 12722b692628SMatthew Knepley idx = a->j; 12732b692628SMatthew Knepley v = a->a; 12742b692628SMatthew Knepley ii = a->i; 12752b692628SMatthew Knepley 12762b692628SMatthew Knepley for (i=0; i<m; i++) { 12772b692628SMatthew Knepley jrow = ii[i]; 12782b692628SMatthew Knepley n = ii[i+1] - jrow; 12792b692628SMatthew Knepley sum1 = 0.0; 12802b692628SMatthew Knepley sum2 = 0.0; 12812b692628SMatthew Knepley sum3 = 0.0; 12822b692628SMatthew Knepley sum4 = 0.0; 12832b692628SMatthew Knepley sum5 = 0.0; 12842b692628SMatthew Knepley sum6 = 0.0; 12852b692628SMatthew Knepley sum7 = 0.0; 12862b692628SMatthew Knepley sum8 = 0.0; 12872b692628SMatthew Knepley sum9 = 0.0; 12882b692628SMatthew Knepley for (j=0; j<n; j++) { 12892b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 12902b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 12912b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 12922b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 12932b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 12942b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 12952b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 12962b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 12972b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 12982b692628SMatthew Knepley jrow++; 12992b692628SMatthew Knepley } 13002b692628SMatthew Knepley y[9*i] += sum1; 13012b692628SMatthew Knepley y[9*i+1] += sum2; 13022b692628SMatthew Knepley y[9*i+2] += sum3; 13032b692628SMatthew Knepley y[9*i+3] += sum4; 13042b692628SMatthew Knepley y[9*i+4] += sum5; 13052b692628SMatthew Knepley y[9*i+5] += sum6; 13062b692628SMatthew Knepley y[9*i+6] += sum7; 13072b692628SMatthew Knepley y[9*i+7] += sum8; 13082b692628SMatthew Knepley y[9*i+8] += sum9; 13092b692628SMatthew Knepley } 13102b692628SMatthew Knepley 13112b692628SMatthew Knepley PetscLogFlops(18*a->nz); 13121ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13131ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 13142b692628SMatthew Knepley PetscFunctionReturn(0); 13152b692628SMatthew Knepley } 13162b692628SMatthew Knepley 13172b692628SMatthew Knepley #undef __FUNCT__ 13182b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1319dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 13202b692628SMatthew Knepley { 13212b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13222b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13232b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1324dfbe8321SBarry Smith PetscErrorCode ierr; 1325*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 13262b692628SMatthew Knepley 13272b692628SMatthew Knepley PetscFunctionBegin; 13282b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13291ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13301ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 13312b692628SMatthew Knepley for (i=0; i<m; i++) { 13322b692628SMatthew Knepley idx = a->j + a->i[i] ; 13332b692628SMatthew Knepley v = a->a + a->i[i] ; 13342b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 13352b692628SMatthew Knepley alpha1 = x[9*i]; 13362b692628SMatthew Knepley alpha2 = x[9*i+1]; 13372b692628SMatthew Knepley alpha3 = x[9*i+2]; 13382b692628SMatthew Knepley alpha4 = x[9*i+3]; 13392b692628SMatthew Knepley alpha5 = x[9*i+4]; 13402b692628SMatthew Knepley alpha6 = x[9*i+5]; 13412b692628SMatthew Knepley alpha7 = x[9*i+6]; 13422b692628SMatthew Knepley alpha8 = x[9*i+7]; 13432b692628SMatthew Knepley alpha9 = x[9*i+8]; 13442b692628SMatthew Knepley while (n-->0) { 13452b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 13462b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 13472b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 13482b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 13492b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 13502b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 13512b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 13522b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 13532b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 13542b692628SMatthew Knepley idx++; v++; 13552b692628SMatthew Knepley } 13562b692628SMatthew Knepley } 13572b692628SMatthew Knepley PetscLogFlops(18*a->nz); 13581ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13591ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 13602b692628SMatthew Knepley PetscFunctionReturn(0); 13612b692628SMatthew Knepley } 13622b692628SMatthew Knepley 13632f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 13642f7816d4SBarry Smith #undef __FUNCT__ 13652f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 1366dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 13672f7816d4SBarry Smith { 13682f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13692f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13702f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 13712f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1372dfbe8321SBarry Smith PetscErrorCode ierr; 1373*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1374*b24ad042SBarry Smith PetscInt n,i,jrow,j; 13752f7816d4SBarry Smith 13762f7816d4SBarry Smith PetscFunctionBegin; 13771ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13781ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 13792f7816d4SBarry Smith idx = a->j; 13802f7816d4SBarry Smith v = a->a; 13812f7816d4SBarry Smith ii = a->i; 13822f7816d4SBarry Smith 13832f7816d4SBarry Smith for (i=0; i<m; i++) { 13842f7816d4SBarry Smith jrow = ii[i]; 13852f7816d4SBarry Smith n = ii[i+1] - jrow; 13862f7816d4SBarry Smith sum1 = 0.0; 13872f7816d4SBarry Smith sum2 = 0.0; 13882f7816d4SBarry Smith sum3 = 0.0; 13892f7816d4SBarry Smith sum4 = 0.0; 13902f7816d4SBarry Smith sum5 = 0.0; 13912f7816d4SBarry Smith sum6 = 0.0; 13922f7816d4SBarry Smith sum7 = 0.0; 13932f7816d4SBarry Smith sum8 = 0.0; 13942f7816d4SBarry Smith sum9 = 0.0; 13952f7816d4SBarry Smith sum10 = 0.0; 13962f7816d4SBarry Smith sum11 = 0.0; 13972f7816d4SBarry Smith sum12 = 0.0; 13982f7816d4SBarry Smith sum13 = 0.0; 13992f7816d4SBarry Smith sum14 = 0.0; 14002f7816d4SBarry Smith sum15 = 0.0; 14012f7816d4SBarry Smith sum16 = 0.0; 14022f7816d4SBarry Smith for (j=0; j<n; j++) { 14032f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 14042f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 14052f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 14062f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 14072f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 14082f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 14092f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 14102f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 14112f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 14122f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 14132f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 14142f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 14152f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 14162f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 14172f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 14182f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 14192f7816d4SBarry Smith jrow++; 14202f7816d4SBarry Smith } 14212f7816d4SBarry Smith y[16*i] = sum1; 14222f7816d4SBarry Smith y[16*i+1] = sum2; 14232f7816d4SBarry Smith y[16*i+2] = sum3; 14242f7816d4SBarry Smith y[16*i+3] = sum4; 14252f7816d4SBarry Smith y[16*i+4] = sum5; 14262f7816d4SBarry Smith y[16*i+5] = sum6; 14272f7816d4SBarry Smith y[16*i+6] = sum7; 14282f7816d4SBarry Smith y[16*i+7] = sum8; 14292f7816d4SBarry Smith y[16*i+8] = sum9; 14302f7816d4SBarry Smith y[16*i+9] = sum10; 14312f7816d4SBarry Smith y[16*i+10] = sum11; 14322f7816d4SBarry Smith y[16*i+11] = sum12; 14332f7816d4SBarry Smith y[16*i+12] = sum13; 14342f7816d4SBarry Smith y[16*i+13] = sum14; 14352f7816d4SBarry Smith y[16*i+14] = sum15; 14362f7816d4SBarry Smith y[16*i+15] = sum16; 14372f7816d4SBarry Smith } 14382f7816d4SBarry Smith 14392f7816d4SBarry Smith PetscLogFlops(32*a->nz - 16*m); 14401ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14411ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14422f7816d4SBarry Smith PetscFunctionReturn(0); 14432f7816d4SBarry Smith } 14442f7816d4SBarry Smith 14452f7816d4SBarry Smith #undef __FUNCT__ 14462f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1447dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 14482f7816d4SBarry Smith { 14492f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14502f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14512f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 14522f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1453dfbe8321SBarry Smith PetscErrorCode ierr; 1454*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 14552f7816d4SBarry Smith 14562f7816d4SBarry Smith PetscFunctionBegin; 14572f7816d4SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 14581ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14591ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1460bfec09a0SHong Zhang 14612f7816d4SBarry Smith for (i=0; i<m; i++) { 1462bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1463bfec09a0SHong Zhang v = a->a + a->i[i] ; 14642f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 14652f7816d4SBarry Smith alpha1 = x[16*i]; 14662f7816d4SBarry Smith alpha2 = x[16*i+1]; 14672f7816d4SBarry Smith alpha3 = x[16*i+2]; 14682f7816d4SBarry Smith alpha4 = x[16*i+3]; 14692f7816d4SBarry Smith alpha5 = x[16*i+4]; 14702f7816d4SBarry Smith alpha6 = x[16*i+5]; 14712f7816d4SBarry Smith alpha7 = x[16*i+6]; 14722f7816d4SBarry Smith alpha8 = x[16*i+7]; 14732f7816d4SBarry Smith alpha9 = x[16*i+8]; 14742f7816d4SBarry Smith alpha10 = x[16*i+9]; 14752f7816d4SBarry Smith alpha11 = x[16*i+10]; 14762f7816d4SBarry Smith alpha12 = x[16*i+11]; 14772f7816d4SBarry Smith alpha13 = x[16*i+12]; 14782f7816d4SBarry Smith alpha14 = x[16*i+13]; 14792f7816d4SBarry Smith alpha15 = x[16*i+14]; 14802f7816d4SBarry Smith alpha16 = x[16*i+15]; 14812f7816d4SBarry Smith while (n-->0) { 14822f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 14832f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 14842f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 14852f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 14862f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 14872f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 14882f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 14892f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 14902f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 14912f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 14922f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 14932f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 14942f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 14952f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 14962f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 14972f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 14982f7816d4SBarry Smith idx++; v++; 14992f7816d4SBarry Smith } 15002f7816d4SBarry Smith } 15012f7816d4SBarry Smith PetscLogFlops(32*a->nz - 16*b->AIJ->n); 15021ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15031ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15042f7816d4SBarry Smith PetscFunctionReturn(0); 15052f7816d4SBarry Smith } 15062f7816d4SBarry Smith 15072f7816d4SBarry Smith #undef __FUNCT__ 15082f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1509dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 15102f7816d4SBarry Smith { 15112f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15122f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15132f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 15142f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1515dfbe8321SBarry Smith PetscErrorCode ierr; 1516*b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1517*b24ad042SBarry Smith PetscInt n,i,jrow,j; 15182f7816d4SBarry Smith 15192f7816d4SBarry Smith PetscFunctionBegin; 15202f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15211ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15221ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15232f7816d4SBarry Smith idx = a->j; 15242f7816d4SBarry Smith v = a->a; 15252f7816d4SBarry Smith ii = a->i; 15262f7816d4SBarry Smith 15272f7816d4SBarry Smith for (i=0; i<m; i++) { 15282f7816d4SBarry Smith jrow = ii[i]; 15292f7816d4SBarry Smith n = ii[i+1] - jrow; 15302f7816d4SBarry Smith sum1 = 0.0; 15312f7816d4SBarry Smith sum2 = 0.0; 15322f7816d4SBarry Smith sum3 = 0.0; 15332f7816d4SBarry Smith sum4 = 0.0; 15342f7816d4SBarry Smith sum5 = 0.0; 15352f7816d4SBarry Smith sum6 = 0.0; 15362f7816d4SBarry Smith sum7 = 0.0; 15372f7816d4SBarry Smith sum8 = 0.0; 15382f7816d4SBarry Smith sum9 = 0.0; 15392f7816d4SBarry Smith sum10 = 0.0; 15402f7816d4SBarry Smith sum11 = 0.0; 15412f7816d4SBarry Smith sum12 = 0.0; 15422f7816d4SBarry Smith sum13 = 0.0; 15432f7816d4SBarry Smith sum14 = 0.0; 15442f7816d4SBarry Smith sum15 = 0.0; 15452f7816d4SBarry Smith sum16 = 0.0; 15462f7816d4SBarry Smith for (j=0; j<n; j++) { 15472f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 15482f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 15492f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 15502f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 15512f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 15522f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 15532f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 15542f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 15552f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 15562f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 15572f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 15582f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 15592f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 15602f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 15612f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 15622f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 15632f7816d4SBarry Smith jrow++; 15642f7816d4SBarry Smith } 15652f7816d4SBarry Smith y[16*i] += sum1; 15662f7816d4SBarry Smith y[16*i+1] += sum2; 15672f7816d4SBarry Smith y[16*i+2] += sum3; 15682f7816d4SBarry Smith y[16*i+3] += sum4; 15692f7816d4SBarry Smith y[16*i+4] += sum5; 15702f7816d4SBarry Smith y[16*i+5] += sum6; 15712f7816d4SBarry Smith y[16*i+6] += sum7; 15722f7816d4SBarry Smith y[16*i+7] += sum8; 15732f7816d4SBarry Smith y[16*i+8] += sum9; 15742f7816d4SBarry Smith y[16*i+9] += sum10; 15752f7816d4SBarry Smith y[16*i+10] += sum11; 15762f7816d4SBarry Smith y[16*i+11] += sum12; 15772f7816d4SBarry Smith y[16*i+12] += sum13; 15782f7816d4SBarry Smith y[16*i+13] += sum14; 15792f7816d4SBarry Smith y[16*i+14] += sum15; 15802f7816d4SBarry Smith y[16*i+15] += sum16; 15812f7816d4SBarry Smith } 15822f7816d4SBarry Smith 15832f7816d4SBarry Smith PetscLogFlops(32*a->nz); 15841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15851ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15862f7816d4SBarry Smith PetscFunctionReturn(0); 15872f7816d4SBarry Smith } 15882f7816d4SBarry Smith 15892f7816d4SBarry Smith #undef __FUNCT__ 15902f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1591dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 15922f7816d4SBarry Smith { 15932f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15942f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15952f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 15962f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1597dfbe8321SBarry Smith PetscErrorCode ierr; 1598*b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 15992f7816d4SBarry Smith 16002f7816d4SBarry Smith PetscFunctionBegin; 16012f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16021ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 16031ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 16042f7816d4SBarry Smith for (i=0; i<m; i++) { 1605bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1606bfec09a0SHong Zhang v = a->a + a->i[i] ; 16072f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 16082f7816d4SBarry Smith alpha1 = x[16*i]; 16092f7816d4SBarry Smith alpha2 = x[16*i+1]; 16102f7816d4SBarry Smith alpha3 = x[16*i+2]; 16112f7816d4SBarry Smith alpha4 = x[16*i+3]; 16122f7816d4SBarry Smith alpha5 = x[16*i+4]; 16132f7816d4SBarry Smith alpha6 = x[16*i+5]; 16142f7816d4SBarry Smith alpha7 = x[16*i+6]; 16152f7816d4SBarry Smith alpha8 = x[16*i+7]; 16162f7816d4SBarry Smith alpha9 = x[16*i+8]; 16172f7816d4SBarry Smith alpha10 = x[16*i+9]; 16182f7816d4SBarry Smith alpha11 = x[16*i+10]; 16192f7816d4SBarry Smith alpha12 = x[16*i+11]; 16202f7816d4SBarry Smith alpha13 = x[16*i+12]; 16212f7816d4SBarry Smith alpha14 = x[16*i+13]; 16222f7816d4SBarry Smith alpha15 = x[16*i+14]; 16232f7816d4SBarry Smith alpha16 = x[16*i+15]; 16242f7816d4SBarry Smith while (n-->0) { 16252f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 16262f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 16272f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 16282f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 16292f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 16302f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 16312f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 16322f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 16332f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 16342f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 16352f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 16362f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 16372f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 16382f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 16392f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 16402f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 16412f7816d4SBarry Smith idx++; v++; 16422f7816d4SBarry Smith } 16432f7816d4SBarry Smith } 16442f7816d4SBarry Smith PetscLogFlops(32*a->nz); 16451ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 16461ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 164766ed3db0SBarry Smith PetscFunctionReturn(0); 164866ed3db0SBarry Smith } 164966ed3db0SBarry Smith 1650f4a53059SBarry Smith /*===================================================================================*/ 16514a2ae208SSatish Balay #undef __FUNCT__ 16524a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1653dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1654f4a53059SBarry Smith { 16554cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1656dfbe8321SBarry Smith PetscErrorCode ierr; 1657f4a53059SBarry Smith 1658*b24ad042SBarry Smith PetscFunctionBegin; 1659f4a53059SBarry Smith /* start the scatter */ 1660f4a53059SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 16614cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1662f4a53059SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 16634cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1664f4a53059SBarry Smith PetscFunctionReturn(0); 1665f4a53059SBarry Smith } 1666f4a53059SBarry Smith 16674a2ae208SSatish Balay #undef __FUNCT__ 16684a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1669dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 16704cb9d9b8SBarry Smith { 16714cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1672dfbe8321SBarry Smith PetscErrorCode ierr; 1673*b24ad042SBarry Smith 16744cb9d9b8SBarry Smith PetscFunctionBegin; 16754cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 16764cb9d9b8SBarry Smith ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 16774cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 16784cb9d9b8SBarry Smith ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 16794cb9d9b8SBarry Smith PetscFunctionReturn(0); 16804cb9d9b8SBarry Smith } 16814cb9d9b8SBarry Smith 16824a2ae208SSatish Balay #undef __FUNCT__ 16834a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1684dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 16854cb9d9b8SBarry Smith { 16864cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1687dfbe8321SBarry Smith PetscErrorCode ierr; 16884cb9d9b8SBarry Smith 1689*b24ad042SBarry Smith PetscFunctionBegin; 16904cb9d9b8SBarry Smith /* start the scatter */ 16914cb9d9b8SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1692d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 16934cb9d9b8SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1694d72c5c08SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 16954cb9d9b8SBarry Smith PetscFunctionReturn(0); 16964cb9d9b8SBarry Smith } 16974cb9d9b8SBarry Smith 16984a2ae208SSatish Balay #undef __FUNCT__ 16994a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1700dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 17014cb9d9b8SBarry Smith { 17024cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1703dfbe8321SBarry Smith PetscErrorCode ierr; 1704*b24ad042SBarry Smith 17054cb9d9b8SBarry Smith PetscFunctionBegin; 17064cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1707d72c5c08SBarry Smith ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1708d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1709d72c5c08SBarry Smith ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 17104cb9d9b8SBarry Smith PetscFunctionReturn(0); 17114cb9d9b8SBarry Smith } 17124cb9d9b8SBarry Smith 1713bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 17145983afb6SSatish Balay /*MC 17150bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 17160bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 17170bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 17180bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 17190bad9183SKris Buschelman 17200bad9183SKris Buschelman Operations provided: 17210bad9183SKris Buschelman . MatMult 17220bad9183SKris Buschelman . MatMultTranspose 17230bad9183SKris Buschelman . MatMultAdd 17240bad9183SKris Buschelman . MatMultTransposeAdd 17250bad9183SKris Buschelman 17260bad9183SKris Buschelman Level: advanced 17270bad9183SKris Buschelman 17280bad9183SKris Buschelman M*/ 17294a2ae208SSatish Balay #undef __FUNCT__ 17304a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 1731*b24ad042SBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 173282b1193eSBarry Smith { 1733dfbe8321SBarry Smith PetscErrorCode ierr; 1734*b24ad042SBarry Smith PetscMPIInt size; 1735*b24ad042SBarry Smith PetscInt n; 17364cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 173782b1193eSBarry Smith Mat B; 173882b1193eSBarry Smith 173982b1193eSBarry Smith PetscFunctionBegin; 1740d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1741d72c5c08SBarry Smith 1742d72c5c08SBarry Smith if (dof == 1) { 1743d72c5c08SBarry Smith *maij = A; 1744d72c5c08SBarry Smith } else { 174583903688SBarry Smith ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1746cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1747d72c5c08SBarry Smith 1748f4a53059SBarry Smith ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1749f4a53059SBarry Smith if (size == 1) { 1750b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 17514cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 1752b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 1753b9b97703SBarry Smith b->dof = dof; 17544cb9d9b8SBarry Smith b->AIJ = A; 1755d72c5c08SBarry Smith if (dof == 2) { 1756cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1757cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1758cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1759cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1760bcc973b7SBarry Smith } else if (dof == 3) { 1761bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1762bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1763bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1764bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1765bcc973b7SBarry Smith } else if (dof == 4) { 1766bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1767bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1768bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1769bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1770f9fae5adSBarry Smith } else if (dof == 5) { 1771f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1772f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1773f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1774f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 17756cd98798SBarry Smith } else if (dof == 6) { 17766cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 17776cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 17786cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 17796cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 178066ed3db0SBarry Smith } else if (dof == 8) { 178166ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 178266ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 178366ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 178466ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 17852b692628SMatthew Knepley } else if (dof == 9) { 17862b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 17872b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 17882b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 17892b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 17902f7816d4SBarry Smith } else if (dof == 16) { 17912f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 17922f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 17932f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 17942f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 179582b1193eSBarry Smith } else { 179679a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 179782b1193eSBarry Smith } 1798f4a53059SBarry Smith } else { 1799f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1800f4a53059SBarry Smith IS from,to; 1801f4a53059SBarry Smith Vec gvec; 1802*b24ad042SBarry Smith PetscInt *garray,i; 1803f4a53059SBarry Smith 1804b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1805d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 1806b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 1807b9b97703SBarry Smith b->dof = dof; 1808b9b97703SBarry Smith b->A = A; 18094cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 18104cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 18114cb9d9b8SBarry Smith 1812f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1813f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1814f4a53059SBarry Smith 1815f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 1816*b24ad042SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 1817f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1818f4a53059SBarry Smith ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1819f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 1820f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1821f4a53059SBarry Smith 1822f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 1823f4a53059SBarry Smith ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1824f4a53059SBarry Smith 1825f4a53059SBarry Smith /* generate the scatter context */ 1826f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1827f4a53059SBarry Smith 1828f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 1829f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 1830f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 1831f4a53059SBarry Smith 1832f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 18334cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 18344cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 18354cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1836f4a53059SBarry Smith } 1837cd3bbe55SBarry Smith *maij = B; 1838d72c5c08SBarry Smith } 183982b1193eSBarry Smith PetscFunctionReturn(0); 184082b1193eSBarry Smith } 184182b1193eSBarry Smith 184282b1193eSBarry Smith 184382b1193eSBarry Smith 184482b1193eSBarry Smith 184582b1193eSBarry Smith 184682b1193eSBarry Smith 184782b1193eSBarry Smith 184882b1193eSBarry Smith 184982b1193eSBarry Smith 185082b1193eSBarry Smith 185182b1193eSBarry Smith 185282b1193eSBarry Smith 1853