1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 382b1193eSBarry Smith /* 4cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 5cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 6cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 7cd3bbe55SBarry Smith independently. 8cd3bbe55SBarry Smith 9cd3bbe55SBarry Smith We provide: 10cd3bbe55SBarry Smith MatMult() 11cd3bbe55SBarry Smith MatMultTranspose() 12cd3bbe55SBarry Smith MatMultTransposeAdd() 13cd3bbe55SBarry Smith MatMultAdd() 14cd3bbe55SBarry Smith and 15cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 16f4a53059SBarry Smith 17f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1882b1193eSBarry Smith */ 1982b1193eSBarry Smith 20be911618SKris Buschelman #include "src/mat/impls/maij/maij.h" 217ba1a0bfSKris Buschelman #include "src/mat/utils/freespace.h" 221d8d5f9aSSatish Balay #include "private/vecimpl.h" 2382b1193eSBarry Smith 244a2ae208SSatish Balay #undef __FUNCT__ 254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 26be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27b9b97703SBarry Smith { 28dfbe8321SBarry Smith PetscErrorCode ierr; 29b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 30b9b97703SBarry Smith 31b9b97703SBarry Smith PetscFunctionBegin; 32b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34b9b97703SBarry Smith if (ismpimaij) { 35b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->A; 38b9b97703SBarry Smith } else if (isseqmaij) { 39b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40b9b97703SBarry Smith 41b9b97703SBarry Smith *B = b->AIJ; 42b9b97703SBarry Smith } else { 43b9b97703SBarry Smith *B = A; 44b9b97703SBarry Smith } 45b9b97703SBarry Smith PetscFunctionReturn(0); 46b9b97703SBarry Smith } 47b9b97703SBarry Smith 484a2ae208SSatish Balay #undef __FUNCT__ 494a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 50be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51b9b97703SBarry Smith { 52dfbe8321SBarry Smith PetscErrorCode ierr; 53b9b97703SBarry Smith Mat Aij; 54b9b97703SBarry Smith 55b9b97703SBarry Smith PetscFunctionBegin; 56b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58b9b97703SBarry Smith PetscFunctionReturn(0); 59b9b97703SBarry Smith } 60b9b97703SBarry Smith 614a2ae208SSatish Balay #undef __FUNCT__ 624a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 63dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 6482b1193eSBarry Smith { 65dfbe8321SBarry Smith PetscErrorCode ierr; 664cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6782b1193eSBarry Smith 6882b1193eSBarry Smith PetscFunctionBegin; 69cd3bbe55SBarry Smith if (b->AIJ) { 70cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 7182b1193eSBarry Smith } 724cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 734cb9d9b8SBarry Smith PetscFunctionReturn(0); 744cb9d9b8SBarry Smith } 754cb9d9b8SBarry Smith 764a2ae208SSatish Balay #undef __FUNCT__ 770fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 780fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 790fd73130SBarry Smith { 800fd73130SBarry Smith PetscErrorCode ierr; 810fd73130SBarry Smith Mat B; 820fd73130SBarry Smith 830fd73130SBarry Smith PetscFunctionBegin; 84ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 850fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 860fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 870fd73130SBarry Smith PetscFunctionReturn(0); 880fd73130SBarry Smith } 890fd73130SBarry Smith 900fd73130SBarry Smith #undef __FUNCT__ 910fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 920fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 930fd73130SBarry Smith { 940fd73130SBarry Smith PetscErrorCode ierr; 950fd73130SBarry Smith Mat B; 960fd73130SBarry Smith 970fd73130SBarry Smith PetscFunctionBegin; 98ceb03754SKris Buschelman ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 990fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1000fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1010fd73130SBarry Smith PetscFunctionReturn(0); 1020fd73130SBarry Smith } 1030fd73130SBarry Smith 1040fd73130SBarry Smith #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 106dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1074cb9d9b8SBarry Smith { 108dfbe8321SBarry Smith PetscErrorCode ierr; 1094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1104cb9d9b8SBarry Smith 1114cb9d9b8SBarry Smith PetscFunctionBegin; 1124cb9d9b8SBarry Smith if (b->AIJ) { 1134cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 1144cb9d9b8SBarry Smith } 115f4a53059SBarry Smith if (b->OAIJ) { 116f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117f4a53059SBarry Smith } 118b9b97703SBarry Smith if (b->A) { 119b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 120b9b97703SBarry Smith } 121f4a53059SBarry Smith if (b->ctx) { 122f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123f4a53059SBarry Smith } 124f4a53059SBarry Smith if (b->w) { 125f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 126f4a53059SBarry Smith } 127cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 128b9b97703SBarry Smith PetscFunctionReturn(0); 129b9b97703SBarry Smith } 130b9b97703SBarry Smith 1310bad9183SKris Buschelman /*MC 132fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1330bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1340bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1350bad9183SKris Buschelman 1360bad9183SKris Buschelman Operations provided: 1370bad9183SKris Buschelman . MatMult 1380bad9183SKris Buschelman . MatMultTranspose 1390bad9183SKris Buschelman . MatMultAdd 1400bad9183SKris Buschelman . MatMultTransposeAdd 1410bad9183SKris Buschelman 1420bad9183SKris Buschelman Level: advanced 1430bad9183SKris Buschelman 1440bad9183SKris Buschelman .seealso: MatCreateSeqDense 1450bad9183SKris Buschelman M*/ 1460bad9183SKris Buschelman 14782b1193eSBarry Smith EXTERN_C_BEGIN 1484a2ae208SSatish Balay #undef __FUNCT__ 1494a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 150be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 15182b1193eSBarry Smith { 152dfbe8321SBarry Smith PetscErrorCode ierr; 1534cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15482b1193eSBarry Smith 15582b1193eSBarry Smith PetscFunctionBegin; 156b0a32e0cSBarry Smith ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 157b0a32e0cSBarry Smith A->data = (void*)b; 158cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 159cd3bbe55SBarry Smith A->factor = 0; 160cd3bbe55SBarry Smith A->mapping = 0; 161f4a53059SBarry Smith 162cd3bbe55SBarry Smith b->AIJ = 0; 163cd3bbe55SBarry Smith b->dof = 0; 164f4a53059SBarry Smith b->OAIJ = 0; 165f4a53059SBarry Smith b->ctx = 0; 166f4a53059SBarry Smith b->w = 0; 16782b1193eSBarry Smith PetscFunctionReturn(0); 16882b1193eSBarry Smith } 16982b1193eSBarry Smith EXTERN_C_END 17082b1193eSBarry Smith 171cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1724a2ae208SSatish Balay #undef __FUNCT__ 173b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 174dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 17582b1193eSBarry Smith { 1764cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 177bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 17887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 179dfbe8321SBarry Smith PetscErrorCode ierr; 180899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 181b24ad042SBarry Smith PetscInt n,i,jrow,j; 18282b1193eSBarry Smith 183bcc973b7SBarry Smith PetscFunctionBegin; 1841ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1851ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 186bcc973b7SBarry Smith idx = a->j; 187bcc973b7SBarry Smith v = a->a; 188bcc973b7SBarry Smith ii = a->i; 189bcc973b7SBarry Smith 190bcc973b7SBarry Smith for (i=0; i<m; i++) { 191bcc973b7SBarry Smith jrow = ii[i]; 192bcc973b7SBarry Smith n = ii[i+1] - jrow; 193bcc973b7SBarry Smith sum1 = 0.0; 194bcc973b7SBarry Smith sum2 = 0.0; 195bcc973b7SBarry Smith for (j=0; j<n; j++) { 196bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 197bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 198bcc973b7SBarry Smith jrow++; 199bcc973b7SBarry Smith } 200bcc973b7SBarry Smith y[2*i] = sum1; 201bcc973b7SBarry Smith y[2*i+1] = sum2; 202bcc973b7SBarry Smith } 203bcc973b7SBarry Smith 204efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 2051ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2061ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 20782b1193eSBarry Smith PetscFunctionReturn(0); 20882b1193eSBarry Smith } 209bcc973b7SBarry Smith 2104a2ae208SSatish Balay #undef __FUNCT__ 211b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 212dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 21382b1193eSBarry Smith { 2144cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 215bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 21687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 217dfbe8321SBarry Smith PetscErrorCode ierr; 218899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 21982b1193eSBarry Smith 220bcc973b7SBarry Smith PetscFunctionBegin; 2212dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 2221ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2231ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 224bfec09a0SHong Zhang 225bcc973b7SBarry Smith for (i=0; i<m; i++) { 226bfec09a0SHong Zhang idx = a->j + a->i[i] ; 227bfec09a0SHong Zhang v = a->a + a->i[i] ; 228bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 229bcc973b7SBarry Smith alpha1 = x[2*i]; 230bcc973b7SBarry Smith alpha2 = x[2*i+1]; 231bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 232bcc973b7SBarry Smith } 233899cda47SBarry Smith ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 2341ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2351ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 23682b1193eSBarry Smith PetscFunctionReturn(0); 23782b1193eSBarry Smith } 238bcc973b7SBarry Smith 2394a2ae208SSatish Balay #undef __FUNCT__ 240b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 241dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 24282b1193eSBarry Smith { 2434cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 244bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 24587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 246dfbe8321SBarry Smith PetscErrorCode ierr; 247899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 248b24ad042SBarry Smith PetscInt n,i,jrow,j; 24982b1193eSBarry Smith 250bcc973b7SBarry Smith PetscFunctionBegin; 251f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2521ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2531ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 254bcc973b7SBarry Smith idx = a->j; 255bcc973b7SBarry Smith v = a->a; 256bcc973b7SBarry Smith ii = a->i; 257bcc973b7SBarry Smith 258bcc973b7SBarry Smith for (i=0; i<m; i++) { 259bcc973b7SBarry Smith jrow = ii[i]; 260bcc973b7SBarry Smith n = ii[i+1] - jrow; 261bcc973b7SBarry Smith sum1 = 0.0; 262bcc973b7SBarry Smith sum2 = 0.0; 263bcc973b7SBarry Smith for (j=0; j<n; j++) { 264bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 265bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 266bcc973b7SBarry Smith jrow++; 267bcc973b7SBarry Smith } 268bcc973b7SBarry Smith y[2*i] += sum1; 269bcc973b7SBarry Smith y[2*i+1] += sum2; 270bcc973b7SBarry Smith } 271bcc973b7SBarry Smith 272efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 2731ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2741ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 275bcc973b7SBarry Smith PetscFunctionReturn(0); 27682b1193eSBarry Smith } 2774a2ae208SSatish Balay #undef __FUNCT__ 278b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 279dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 28082b1193eSBarry Smith { 2814cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 282bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 284dfbe8321SBarry Smith PetscErrorCode ierr; 285899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 28682b1193eSBarry Smith 287bcc973b7SBarry Smith PetscFunctionBegin; 288f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2891ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2901ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 291bfec09a0SHong Zhang 292bcc973b7SBarry Smith for (i=0; i<m; i++) { 293bfec09a0SHong Zhang idx = a->j + a->i[i] ; 294bfec09a0SHong Zhang v = a->a + a->i[i] ; 295bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 296bcc973b7SBarry Smith alpha1 = x[2*i]; 297bcc973b7SBarry Smith alpha2 = x[2*i+1]; 298bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 299bcc973b7SBarry Smith } 300899cda47SBarry Smith ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 3011ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3021ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 303bcc973b7SBarry Smith PetscFunctionReturn(0); 30482b1193eSBarry Smith } 305cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3064a2ae208SSatish Balay #undef __FUNCT__ 307b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 308dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 309bcc973b7SBarry Smith { 3104cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 311bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 31287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 313dfbe8321SBarry Smith PetscErrorCode ierr; 314899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 315b24ad042SBarry Smith PetscInt n,i,jrow,j; 31682b1193eSBarry Smith 317bcc973b7SBarry Smith PetscFunctionBegin; 3181ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3191ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 320bcc973b7SBarry Smith idx = a->j; 321bcc973b7SBarry Smith v = a->a; 322bcc973b7SBarry Smith ii = a->i; 323bcc973b7SBarry Smith 324bcc973b7SBarry Smith for (i=0; i<m; i++) { 325bcc973b7SBarry Smith jrow = ii[i]; 326bcc973b7SBarry Smith n = ii[i+1] - jrow; 327bcc973b7SBarry Smith sum1 = 0.0; 328bcc973b7SBarry Smith sum2 = 0.0; 329bcc973b7SBarry Smith sum3 = 0.0; 330bcc973b7SBarry Smith for (j=0; j<n; j++) { 331bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 332bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 333bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 334bcc973b7SBarry Smith jrow++; 335bcc973b7SBarry Smith } 336bcc973b7SBarry Smith y[3*i] = sum1; 337bcc973b7SBarry Smith y[3*i+1] = sum2; 338bcc973b7SBarry Smith y[3*i+2] = sum3; 339bcc973b7SBarry Smith } 340bcc973b7SBarry Smith 341efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 3421ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3431ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 344bcc973b7SBarry Smith PetscFunctionReturn(0); 345bcc973b7SBarry Smith } 346bcc973b7SBarry Smith 3474a2ae208SSatish Balay #undef __FUNCT__ 348b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 349dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 350bcc973b7SBarry Smith { 3514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 35387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 354dfbe8321SBarry Smith PetscErrorCode ierr; 355899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 356bcc973b7SBarry Smith 357bcc973b7SBarry Smith PetscFunctionBegin; 3582dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 3591ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3601ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 361bfec09a0SHong Zhang 362bcc973b7SBarry Smith for (i=0; i<m; i++) { 363bfec09a0SHong Zhang idx = a->j + a->i[i]; 364bfec09a0SHong Zhang v = a->a + a->i[i]; 365bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 366bcc973b7SBarry Smith alpha1 = x[3*i]; 367bcc973b7SBarry Smith alpha2 = x[3*i+1]; 368bcc973b7SBarry Smith alpha3 = x[3*i+2]; 369bcc973b7SBarry Smith while (n-->0) { 370bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 371bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 372bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 373bcc973b7SBarry Smith idx++; v++; 374bcc973b7SBarry Smith } 375bcc973b7SBarry Smith } 376899cda47SBarry Smith ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr); 3771ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3781ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 379bcc973b7SBarry Smith PetscFunctionReturn(0); 380bcc973b7SBarry Smith } 381bcc973b7SBarry Smith 3824a2ae208SSatish Balay #undef __FUNCT__ 383b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 384dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 385bcc973b7SBarry Smith { 3864cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 387bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 38887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 389dfbe8321SBarry Smith PetscErrorCode ierr; 390899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 391b24ad042SBarry Smith PetscInt n,i,jrow,j; 392bcc973b7SBarry Smith 393bcc973b7SBarry Smith PetscFunctionBegin; 394f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3951ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3961ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 397bcc973b7SBarry Smith idx = a->j; 398bcc973b7SBarry Smith v = a->a; 399bcc973b7SBarry Smith ii = a->i; 400bcc973b7SBarry Smith 401bcc973b7SBarry Smith for (i=0; i<m; i++) { 402bcc973b7SBarry Smith jrow = ii[i]; 403bcc973b7SBarry Smith n = ii[i+1] - jrow; 404bcc973b7SBarry Smith sum1 = 0.0; 405bcc973b7SBarry Smith sum2 = 0.0; 406bcc973b7SBarry Smith sum3 = 0.0; 407bcc973b7SBarry Smith for (j=0; j<n; j++) { 408bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 409bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 410bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 411bcc973b7SBarry Smith jrow++; 412bcc973b7SBarry Smith } 413bcc973b7SBarry Smith y[3*i] += sum1; 414bcc973b7SBarry Smith y[3*i+1] += sum2; 415bcc973b7SBarry Smith y[3*i+2] += sum3; 416bcc973b7SBarry Smith } 417bcc973b7SBarry Smith 418efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4191ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4201ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 421bcc973b7SBarry Smith PetscFunctionReturn(0); 422bcc973b7SBarry Smith } 4234a2ae208SSatish Balay #undef __FUNCT__ 424b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 425dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 426bcc973b7SBarry Smith { 4274cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 428bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 42987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 430dfbe8321SBarry Smith PetscErrorCode ierr; 431899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 432bcc973b7SBarry Smith 433bcc973b7SBarry Smith PetscFunctionBegin; 434f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4351ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4361ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 437bcc973b7SBarry Smith for (i=0; i<m; i++) { 438bfec09a0SHong Zhang idx = a->j + a->i[i] ; 439bfec09a0SHong Zhang v = a->a + a->i[i] ; 440bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 441bcc973b7SBarry Smith alpha1 = x[3*i]; 442bcc973b7SBarry Smith alpha2 = x[3*i+1]; 443bcc973b7SBarry Smith alpha3 = x[3*i+2]; 444bcc973b7SBarry Smith while (n-->0) { 445bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 446bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 447bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 448bcc973b7SBarry Smith idx++; v++; 449bcc973b7SBarry Smith } 450bcc973b7SBarry Smith } 451efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4521ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4531ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 454bcc973b7SBarry Smith PetscFunctionReturn(0); 455bcc973b7SBarry Smith } 456bcc973b7SBarry Smith 457bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4584a2ae208SSatish Balay #undef __FUNCT__ 459b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 460dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 461bcc973b7SBarry Smith { 4624cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 463bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 46487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 465dfbe8321SBarry Smith PetscErrorCode ierr; 466899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 467b24ad042SBarry Smith PetscInt n,i,jrow,j; 468bcc973b7SBarry Smith 469bcc973b7SBarry Smith PetscFunctionBegin; 4701ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4711ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 472bcc973b7SBarry Smith idx = a->j; 473bcc973b7SBarry Smith v = a->a; 474bcc973b7SBarry Smith ii = a->i; 475bcc973b7SBarry Smith 476bcc973b7SBarry Smith for (i=0; i<m; i++) { 477bcc973b7SBarry Smith jrow = ii[i]; 478bcc973b7SBarry Smith n = ii[i+1] - jrow; 479bcc973b7SBarry Smith sum1 = 0.0; 480bcc973b7SBarry Smith sum2 = 0.0; 481bcc973b7SBarry Smith sum3 = 0.0; 482bcc973b7SBarry Smith sum4 = 0.0; 483bcc973b7SBarry Smith for (j=0; j<n; j++) { 484bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 485bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 486bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 487bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 488bcc973b7SBarry Smith jrow++; 489bcc973b7SBarry Smith } 490bcc973b7SBarry Smith y[4*i] = sum1; 491bcc973b7SBarry Smith y[4*i+1] = sum2; 492bcc973b7SBarry Smith y[4*i+2] = sum3; 493bcc973b7SBarry Smith y[4*i+3] = sum4; 494bcc973b7SBarry Smith } 495bcc973b7SBarry Smith 496efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 4971ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4981ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 499bcc973b7SBarry Smith PetscFunctionReturn(0); 500bcc973b7SBarry Smith } 501bcc973b7SBarry Smith 5024a2ae208SSatish Balay #undef __FUNCT__ 503b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 504dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 505bcc973b7SBarry Smith { 5064cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 507bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 50887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 509dfbe8321SBarry Smith PetscErrorCode ierr; 510899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 511bcc973b7SBarry Smith 512bcc973b7SBarry Smith PetscFunctionBegin; 5132dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 5141ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5151ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 516bcc973b7SBarry Smith for (i=0; i<m; i++) { 517bfec09a0SHong Zhang idx = a->j + a->i[i] ; 518bfec09a0SHong Zhang v = a->a + a->i[i] ; 519bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 520bcc973b7SBarry Smith alpha1 = x[4*i]; 521bcc973b7SBarry Smith alpha2 = x[4*i+1]; 522bcc973b7SBarry Smith alpha3 = x[4*i+2]; 523bcc973b7SBarry Smith alpha4 = x[4*i+3]; 524bcc973b7SBarry Smith while (n-->0) { 525bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 526bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 527bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 528bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 529bcc973b7SBarry Smith idx++; v++; 530bcc973b7SBarry Smith } 531bcc973b7SBarry Smith } 532899cda47SBarry Smith ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 5331ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5341ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 535bcc973b7SBarry Smith PetscFunctionReturn(0); 536bcc973b7SBarry Smith } 537bcc973b7SBarry Smith 5384a2ae208SSatish Balay #undef __FUNCT__ 539b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 540dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 541bcc973b7SBarry Smith { 5424cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 543f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 54487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 545dfbe8321SBarry Smith PetscErrorCode ierr; 546899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 547b24ad042SBarry Smith PetscInt n,i,jrow,j; 548f9fae5adSBarry Smith 549f9fae5adSBarry Smith PetscFunctionBegin; 550f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5511ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5521ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 553f9fae5adSBarry Smith idx = a->j; 554f9fae5adSBarry Smith v = a->a; 555f9fae5adSBarry Smith ii = a->i; 556f9fae5adSBarry Smith 557f9fae5adSBarry Smith for (i=0; i<m; i++) { 558f9fae5adSBarry Smith jrow = ii[i]; 559f9fae5adSBarry Smith n = ii[i+1] - jrow; 560f9fae5adSBarry Smith sum1 = 0.0; 561f9fae5adSBarry Smith sum2 = 0.0; 562f9fae5adSBarry Smith sum3 = 0.0; 563f9fae5adSBarry Smith sum4 = 0.0; 564f9fae5adSBarry Smith for (j=0; j<n; j++) { 565f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 566f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 567f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 568f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 569f9fae5adSBarry Smith jrow++; 570f9fae5adSBarry Smith } 571f9fae5adSBarry Smith y[4*i] += sum1; 572f9fae5adSBarry Smith y[4*i+1] += sum2; 573f9fae5adSBarry Smith y[4*i+2] += sum3; 574f9fae5adSBarry Smith y[4*i+3] += sum4; 575f9fae5adSBarry Smith } 576f9fae5adSBarry Smith 577efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 5781ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5791ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 580f9fae5adSBarry Smith PetscFunctionReturn(0); 581bcc973b7SBarry Smith } 5824a2ae208SSatish Balay #undef __FUNCT__ 583b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 584dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 585bcc973b7SBarry Smith { 5864cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 587f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 58887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 589dfbe8321SBarry Smith PetscErrorCode ierr; 590899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 591f9fae5adSBarry Smith 592f9fae5adSBarry Smith PetscFunctionBegin; 593f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5941ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5951ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 596bfec09a0SHong Zhang 597f9fae5adSBarry Smith for (i=0; i<m; i++) { 598bfec09a0SHong Zhang idx = a->j + a->i[i] ; 599bfec09a0SHong Zhang v = a->a + a->i[i] ; 600f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 601f9fae5adSBarry Smith alpha1 = x[4*i]; 602f9fae5adSBarry Smith alpha2 = x[4*i+1]; 603f9fae5adSBarry Smith alpha3 = x[4*i+2]; 604f9fae5adSBarry Smith alpha4 = x[4*i+3]; 605f9fae5adSBarry Smith while (n-->0) { 606f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 607f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 608f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 609f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 610f9fae5adSBarry Smith idx++; v++; 611f9fae5adSBarry Smith } 612f9fae5adSBarry Smith } 613899cda47SBarry Smith ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 6141ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6151ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 616f9fae5adSBarry Smith PetscFunctionReturn(0); 617f9fae5adSBarry Smith } 618f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6196cd98798SBarry Smith 6204a2ae208SSatish Balay #undef __FUNCT__ 621b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 622dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 623f9fae5adSBarry Smith { 6244cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 625f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 62687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 627dfbe8321SBarry Smith PetscErrorCode ierr; 628899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 629b24ad042SBarry Smith PetscInt n,i,jrow,j; 630f9fae5adSBarry Smith 631f9fae5adSBarry Smith PetscFunctionBegin; 6321ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6331ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 634f9fae5adSBarry Smith idx = a->j; 635f9fae5adSBarry Smith v = a->a; 636f9fae5adSBarry Smith ii = a->i; 637f9fae5adSBarry Smith 638f9fae5adSBarry Smith for (i=0; i<m; i++) { 639f9fae5adSBarry Smith jrow = ii[i]; 640f9fae5adSBarry Smith n = ii[i+1] - jrow; 641f9fae5adSBarry Smith sum1 = 0.0; 642f9fae5adSBarry Smith sum2 = 0.0; 643f9fae5adSBarry Smith sum3 = 0.0; 644f9fae5adSBarry Smith sum4 = 0.0; 645f9fae5adSBarry Smith sum5 = 0.0; 646f9fae5adSBarry Smith for (j=0; j<n; j++) { 647f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 648f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 649f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 650f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 651f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 652f9fae5adSBarry Smith jrow++; 653f9fae5adSBarry Smith } 654f9fae5adSBarry Smith y[5*i] = sum1; 655f9fae5adSBarry Smith y[5*i+1] = sum2; 656f9fae5adSBarry Smith y[5*i+2] = sum3; 657f9fae5adSBarry Smith y[5*i+3] = sum4; 658f9fae5adSBarry Smith y[5*i+4] = sum5; 659f9fae5adSBarry Smith } 660f9fae5adSBarry Smith 661efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 6621ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6631ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 664f9fae5adSBarry Smith PetscFunctionReturn(0); 665f9fae5adSBarry Smith } 666f9fae5adSBarry Smith 6674a2ae208SSatish Balay #undef __FUNCT__ 668b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 669dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 670f9fae5adSBarry Smith { 6714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 672f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 67387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 674dfbe8321SBarry Smith PetscErrorCode ierr; 675899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 676f9fae5adSBarry Smith 677f9fae5adSBarry Smith PetscFunctionBegin; 6782dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 6791ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6801ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 681bfec09a0SHong Zhang 682f9fae5adSBarry Smith for (i=0; i<m; i++) { 683bfec09a0SHong Zhang idx = a->j + a->i[i] ; 684bfec09a0SHong Zhang v = a->a + a->i[i] ; 685f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 686f9fae5adSBarry Smith alpha1 = x[5*i]; 687f9fae5adSBarry Smith alpha2 = x[5*i+1]; 688f9fae5adSBarry Smith alpha3 = x[5*i+2]; 689f9fae5adSBarry Smith alpha4 = x[5*i+3]; 690f9fae5adSBarry Smith alpha5 = x[5*i+4]; 691f9fae5adSBarry Smith while (n-->0) { 692f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 693f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 694f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 695f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 696f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 697f9fae5adSBarry Smith idx++; v++; 698f9fae5adSBarry Smith } 699f9fae5adSBarry Smith } 700899cda47SBarry Smith ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr); 7011ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7021ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 703f9fae5adSBarry Smith PetscFunctionReturn(0); 704f9fae5adSBarry Smith } 705f9fae5adSBarry Smith 7064a2ae208SSatish Balay #undef __FUNCT__ 707b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 708dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 709f9fae5adSBarry Smith { 7104cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 711f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 71287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 713dfbe8321SBarry Smith PetscErrorCode ierr; 714899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 715b24ad042SBarry Smith PetscInt n,i,jrow,j; 716f9fae5adSBarry Smith 717f9fae5adSBarry Smith PetscFunctionBegin; 718f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7191ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7201ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 721f9fae5adSBarry Smith idx = a->j; 722f9fae5adSBarry Smith v = a->a; 723f9fae5adSBarry Smith ii = a->i; 724f9fae5adSBarry Smith 725f9fae5adSBarry Smith for (i=0; i<m; i++) { 726f9fae5adSBarry Smith jrow = ii[i]; 727f9fae5adSBarry Smith n = ii[i+1] - jrow; 728f9fae5adSBarry Smith sum1 = 0.0; 729f9fae5adSBarry Smith sum2 = 0.0; 730f9fae5adSBarry Smith sum3 = 0.0; 731f9fae5adSBarry Smith sum4 = 0.0; 732f9fae5adSBarry Smith sum5 = 0.0; 733f9fae5adSBarry Smith for (j=0; j<n; j++) { 734f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 735f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 736f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 737f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 738f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 739f9fae5adSBarry Smith jrow++; 740f9fae5adSBarry Smith } 741f9fae5adSBarry Smith y[5*i] += sum1; 742f9fae5adSBarry Smith y[5*i+1] += sum2; 743f9fae5adSBarry Smith y[5*i+2] += sum3; 744f9fae5adSBarry Smith y[5*i+3] += sum4; 745f9fae5adSBarry Smith y[5*i+4] += sum5; 746f9fae5adSBarry Smith } 747f9fae5adSBarry Smith 748efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7491ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7501ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 751f9fae5adSBarry Smith PetscFunctionReturn(0); 752f9fae5adSBarry Smith } 753f9fae5adSBarry Smith 7544a2ae208SSatish Balay #undef __FUNCT__ 755b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 756dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 757f9fae5adSBarry Smith { 7584cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 759f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 76087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 761dfbe8321SBarry Smith PetscErrorCode ierr; 762899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 763f9fae5adSBarry Smith 764f9fae5adSBarry Smith PetscFunctionBegin; 765f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7661ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7671ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 768bfec09a0SHong Zhang 769f9fae5adSBarry Smith for (i=0; i<m; i++) { 770bfec09a0SHong Zhang idx = a->j + a->i[i] ; 771bfec09a0SHong Zhang v = a->a + a->i[i] ; 772f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 773f9fae5adSBarry Smith alpha1 = x[5*i]; 774f9fae5adSBarry Smith alpha2 = x[5*i+1]; 775f9fae5adSBarry Smith alpha3 = x[5*i+2]; 776f9fae5adSBarry Smith alpha4 = x[5*i+3]; 777f9fae5adSBarry Smith alpha5 = x[5*i+4]; 778f9fae5adSBarry Smith while (n-->0) { 779f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 780f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 781f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 782f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 783f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 784f9fae5adSBarry Smith idx++; v++; 785f9fae5adSBarry Smith } 786f9fae5adSBarry Smith } 787efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7881ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7891ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 790f9fae5adSBarry Smith PetscFunctionReturn(0); 791bcc973b7SBarry Smith } 792bcc973b7SBarry Smith 7936cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7944a2ae208SSatish Balay #undef __FUNCT__ 795b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 796dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 7976cd98798SBarry Smith { 7986cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 7996cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 80087828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 801dfbe8321SBarry Smith PetscErrorCode ierr; 802899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 803b24ad042SBarry Smith PetscInt n,i,jrow,j; 8046cd98798SBarry Smith 8056cd98798SBarry Smith PetscFunctionBegin; 8061ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8071ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8086cd98798SBarry Smith idx = a->j; 8096cd98798SBarry Smith v = a->a; 8106cd98798SBarry Smith ii = a->i; 8116cd98798SBarry Smith 8126cd98798SBarry Smith for (i=0; i<m; i++) { 8136cd98798SBarry Smith jrow = ii[i]; 8146cd98798SBarry Smith n = ii[i+1] - jrow; 8156cd98798SBarry Smith sum1 = 0.0; 8166cd98798SBarry Smith sum2 = 0.0; 8176cd98798SBarry Smith sum3 = 0.0; 8186cd98798SBarry Smith sum4 = 0.0; 8196cd98798SBarry Smith sum5 = 0.0; 8206cd98798SBarry Smith sum6 = 0.0; 8216cd98798SBarry Smith for (j=0; j<n; j++) { 8226cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8236cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8246cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8256cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8266cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8276cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8286cd98798SBarry Smith jrow++; 8296cd98798SBarry Smith } 8306cd98798SBarry Smith y[6*i] = sum1; 8316cd98798SBarry Smith y[6*i+1] = sum2; 8326cd98798SBarry Smith y[6*i+2] = sum3; 8336cd98798SBarry Smith y[6*i+3] = sum4; 8346cd98798SBarry Smith y[6*i+4] = sum5; 8356cd98798SBarry Smith y[6*i+5] = sum6; 8366cd98798SBarry Smith } 8376cd98798SBarry Smith 838efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 8391ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8401ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8416cd98798SBarry Smith PetscFunctionReturn(0); 8426cd98798SBarry Smith } 8436cd98798SBarry Smith 8444a2ae208SSatish Balay #undef __FUNCT__ 845b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 846dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8476cd98798SBarry Smith { 8486cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8496cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 85087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 851dfbe8321SBarry Smith PetscErrorCode ierr; 852899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 8536cd98798SBarry Smith 8546cd98798SBarry Smith PetscFunctionBegin; 8552dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 8561ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8571ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 858bfec09a0SHong Zhang 8596cd98798SBarry Smith for (i=0; i<m; i++) { 860bfec09a0SHong Zhang idx = a->j + a->i[i] ; 861bfec09a0SHong Zhang v = a->a + a->i[i] ; 8626cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8636cd98798SBarry Smith alpha1 = x[6*i]; 8646cd98798SBarry Smith alpha2 = x[6*i+1]; 8656cd98798SBarry Smith alpha3 = x[6*i+2]; 8666cd98798SBarry Smith alpha4 = x[6*i+3]; 8676cd98798SBarry Smith alpha5 = x[6*i+4]; 8686cd98798SBarry Smith alpha6 = x[6*i+5]; 8696cd98798SBarry Smith while (n-->0) { 8706cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8716cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8726cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8736cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8746cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8756cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8766cd98798SBarry Smith idx++; v++; 8776cd98798SBarry Smith } 8786cd98798SBarry Smith } 879899cda47SBarry Smith ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr); 8801ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8811ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8826cd98798SBarry Smith PetscFunctionReturn(0); 8836cd98798SBarry Smith } 8846cd98798SBarry Smith 8854a2ae208SSatish Balay #undef __FUNCT__ 886b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 887dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8886cd98798SBarry Smith { 8896cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8906cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 89187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 892dfbe8321SBarry Smith PetscErrorCode ierr; 893899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 894b24ad042SBarry Smith PetscInt n,i,jrow,j; 8956cd98798SBarry Smith 8966cd98798SBarry Smith PetscFunctionBegin; 8976cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8981ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8991ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9006cd98798SBarry Smith idx = a->j; 9016cd98798SBarry Smith v = a->a; 9026cd98798SBarry Smith ii = a->i; 9036cd98798SBarry Smith 9046cd98798SBarry Smith for (i=0; i<m; i++) { 9056cd98798SBarry Smith jrow = ii[i]; 9066cd98798SBarry Smith n = ii[i+1] - jrow; 9076cd98798SBarry Smith sum1 = 0.0; 9086cd98798SBarry Smith sum2 = 0.0; 9096cd98798SBarry Smith sum3 = 0.0; 9106cd98798SBarry Smith sum4 = 0.0; 9116cd98798SBarry Smith sum5 = 0.0; 9126cd98798SBarry Smith sum6 = 0.0; 9136cd98798SBarry Smith for (j=0; j<n; j++) { 9146cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9156cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9166cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9176cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9186cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9196cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9206cd98798SBarry Smith jrow++; 9216cd98798SBarry Smith } 9226cd98798SBarry Smith y[6*i] += sum1; 9236cd98798SBarry Smith y[6*i+1] += sum2; 9246cd98798SBarry Smith y[6*i+2] += sum3; 9256cd98798SBarry Smith y[6*i+3] += sum4; 9266cd98798SBarry Smith y[6*i+4] += sum5; 9276cd98798SBarry Smith y[6*i+5] += sum6; 9286cd98798SBarry Smith } 9296cd98798SBarry Smith 930efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9311ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9321ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9336cd98798SBarry Smith PetscFunctionReturn(0); 9346cd98798SBarry Smith } 9356cd98798SBarry Smith 9364a2ae208SSatish Balay #undef __FUNCT__ 937b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 938dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9396cd98798SBarry Smith { 9406cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9416cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 94287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 943dfbe8321SBarry Smith PetscErrorCode ierr; 944899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 9456cd98798SBarry Smith 9466cd98798SBarry Smith PetscFunctionBegin; 9476cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9481ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9491ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 950bfec09a0SHong Zhang 9516cd98798SBarry Smith for (i=0; i<m; i++) { 952bfec09a0SHong Zhang idx = a->j + a->i[i] ; 953bfec09a0SHong Zhang v = a->a + a->i[i] ; 9546cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9556cd98798SBarry Smith alpha1 = x[6*i]; 9566cd98798SBarry Smith alpha2 = x[6*i+1]; 9576cd98798SBarry Smith alpha3 = x[6*i+2]; 9586cd98798SBarry Smith alpha4 = x[6*i+3]; 9596cd98798SBarry Smith alpha5 = x[6*i+4]; 9606cd98798SBarry Smith alpha6 = x[6*i+5]; 9616cd98798SBarry Smith while (n-->0) { 9626cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9636cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9646cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9656cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9666cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9676cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9686cd98798SBarry Smith idx++; v++; 9696cd98798SBarry Smith } 9706cd98798SBarry Smith } 971efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9721ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9731ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9746cd98798SBarry Smith PetscFunctionReturn(0); 9756cd98798SBarry Smith } 9766cd98798SBarry Smith 97766ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 97866ed3db0SBarry Smith #undef __FUNCT__ 979ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 980ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 981ed8eea03SBarry Smith { 982ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 983ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 984ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 985ed8eea03SBarry Smith PetscErrorCode ierr; 986899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 987ed8eea03SBarry Smith PetscInt n,i,jrow,j; 988ed8eea03SBarry Smith 989ed8eea03SBarry Smith PetscFunctionBegin; 990ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 991ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 992ed8eea03SBarry Smith idx = a->j; 993ed8eea03SBarry Smith v = a->a; 994ed8eea03SBarry Smith ii = a->i; 995ed8eea03SBarry Smith 996ed8eea03SBarry Smith for (i=0; i<m; i++) { 997ed8eea03SBarry Smith jrow = ii[i]; 998ed8eea03SBarry Smith n = ii[i+1] - jrow; 999ed8eea03SBarry Smith sum1 = 0.0; 1000ed8eea03SBarry Smith sum2 = 0.0; 1001ed8eea03SBarry Smith sum3 = 0.0; 1002ed8eea03SBarry Smith sum4 = 0.0; 1003ed8eea03SBarry Smith sum5 = 0.0; 1004ed8eea03SBarry Smith sum6 = 0.0; 1005ed8eea03SBarry Smith sum7 = 0.0; 1006ed8eea03SBarry Smith for (j=0; j<n; j++) { 1007ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1008ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1009ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1010ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1011ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1012ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1013ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1014ed8eea03SBarry Smith jrow++; 1015ed8eea03SBarry Smith } 1016ed8eea03SBarry Smith y[7*i] = sum1; 1017ed8eea03SBarry Smith y[7*i+1] = sum2; 1018ed8eea03SBarry Smith y[7*i+2] = sum3; 1019ed8eea03SBarry Smith y[7*i+3] = sum4; 1020ed8eea03SBarry Smith y[7*i+4] = sum5; 1021ed8eea03SBarry Smith y[7*i+5] = sum6; 1022ed8eea03SBarry Smith y[7*i+6] = sum7; 1023ed8eea03SBarry Smith } 1024ed8eea03SBarry Smith 1025efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1026ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1027ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1028ed8eea03SBarry Smith PetscFunctionReturn(0); 1029ed8eea03SBarry Smith } 1030ed8eea03SBarry Smith 1031ed8eea03SBarry Smith #undef __FUNCT__ 1032ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1033ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1034ed8eea03SBarry Smith { 1035ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1036ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1037ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1038ed8eea03SBarry Smith PetscErrorCode ierr; 1039899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1040ed8eea03SBarry Smith 1041ed8eea03SBarry Smith PetscFunctionBegin; 10422dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 1043ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1044ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1045ed8eea03SBarry Smith 1046ed8eea03SBarry Smith for (i=0; i<m; i++) { 1047ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1048ed8eea03SBarry Smith v = a->a + a->i[i] ; 1049ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1050ed8eea03SBarry Smith alpha1 = x[7*i]; 1051ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1052ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1053ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1054ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1055ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1056ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1057ed8eea03SBarry Smith while (n-->0) { 1058ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1059ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1060ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1061ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1062ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1063ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1064ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1065ed8eea03SBarry Smith idx++; v++; 1066ed8eea03SBarry Smith } 1067ed8eea03SBarry Smith } 1068899cda47SBarry Smith ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr); 1069ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1070ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1071ed8eea03SBarry Smith PetscFunctionReturn(0); 1072ed8eea03SBarry Smith } 1073ed8eea03SBarry Smith 1074ed8eea03SBarry Smith #undef __FUNCT__ 1075ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1076ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1077ed8eea03SBarry Smith { 1078ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1079ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1080ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1081ed8eea03SBarry Smith PetscErrorCode ierr; 1082899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1083ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1084ed8eea03SBarry Smith 1085ed8eea03SBarry Smith PetscFunctionBegin; 1086ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1087ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1088ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1089ed8eea03SBarry Smith idx = a->j; 1090ed8eea03SBarry Smith v = a->a; 1091ed8eea03SBarry Smith ii = a->i; 1092ed8eea03SBarry Smith 1093ed8eea03SBarry Smith for (i=0; i<m; i++) { 1094ed8eea03SBarry Smith jrow = ii[i]; 1095ed8eea03SBarry Smith n = ii[i+1] - jrow; 1096ed8eea03SBarry Smith sum1 = 0.0; 1097ed8eea03SBarry Smith sum2 = 0.0; 1098ed8eea03SBarry Smith sum3 = 0.0; 1099ed8eea03SBarry Smith sum4 = 0.0; 1100ed8eea03SBarry Smith sum5 = 0.0; 1101ed8eea03SBarry Smith sum6 = 0.0; 1102ed8eea03SBarry Smith sum7 = 0.0; 1103ed8eea03SBarry Smith for (j=0; j<n; j++) { 1104ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1105ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1106ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1107ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1108ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1109ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1110ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1111ed8eea03SBarry Smith jrow++; 1112ed8eea03SBarry Smith } 1113ed8eea03SBarry Smith y[7*i] += sum1; 1114ed8eea03SBarry Smith y[7*i+1] += sum2; 1115ed8eea03SBarry Smith y[7*i+2] += sum3; 1116ed8eea03SBarry Smith y[7*i+3] += sum4; 1117ed8eea03SBarry Smith y[7*i+4] += sum5; 1118ed8eea03SBarry Smith y[7*i+5] += sum6; 1119ed8eea03SBarry Smith y[7*i+6] += sum7; 1120ed8eea03SBarry Smith } 1121ed8eea03SBarry Smith 1122efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1123ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1124ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1125ed8eea03SBarry Smith PetscFunctionReturn(0); 1126ed8eea03SBarry Smith } 1127ed8eea03SBarry Smith 1128ed8eea03SBarry Smith #undef __FUNCT__ 1129ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1130ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1131ed8eea03SBarry Smith { 1132ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1133ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1134ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1135ed8eea03SBarry Smith PetscErrorCode ierr; 1136899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1137ed8eea03SBarry Smith 1138ed8eea03SBarry Smith PetscFunctionBegin; 1139ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1140ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1141ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1142ed8eea03SBarry Smith for (i=0; i<m; i++) { 1143ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1144ed8eea03SBarry Smith v = a->a + a->i[i] ; 1145ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1146ed8eea03SBarry Smith alpha1 = x[7*i]; 1147ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1148ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1149ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1150ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1151ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1152ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1153ed8eea03SBarry Smith while (n-->0) { 1154ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1155ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1156ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1157ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1158ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1159ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1160ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1161ed8eea03SBarry Smith idx++; v++; 1162ed8eea03SBarry Smith } 1163ed8eea03SBarry Smith } 1164efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1165ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1166ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1167ed8eea03SBarry Smith PetscFunctionReturn(0); 1168ed8eea03SBarry Smith } 1169ed8eea03SBarry Smith 1170ed8eea03SBarry Smith #undef __FUNCT__ 117166ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1172dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 117366ed3db0SBarry Smith { 117466ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 117566ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 117687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1177dfbe8321SBarry Smith PetscErrorCode ierr; 1178899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1179b24ad042SBarry Smith PetscInt n,i,jrow,j; 118066ed3db0SBarry Smith 118166ed3db0SBarry Smith PetscFunctionBegin; 11821ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 11831ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 118466ed3db0SBarry Smith idx = a->j; 118566ed3db0SBarry Smith v = a->a; 118666ed3db0SBarry Smith ii = a->i; 118766ed3db0SBarry Smith 118866ed3db0SBarry Smith for (i=0; i<m; i++) { 118966ed3db0SBarry Smith jrow = ii[i]; 119066ed3db0SBarry Smith n = ii[i+1] - jrow; 119166ed3db0SBarry Smith sum1 = 0.0; 119266ed3db0SBarry Smith sum2 = 0.0; 119366ed3db0SBarry Smith sum3 = 0.0; 119466ed3db0SBarry Smith sum4 = 0.0; 119566ed3db0SBarry Smith sum5 = 0.0; 119666ed3db0SBarry Smith sum6 = 0.0; 119766ed3db0SBarry Smith sum7 = 0.0; 119866ed3db0SBarry Smith sum8 = 0.0; 119966ed3db0SBarry Smith for (j=0; j<n; j++) { 120066ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 120166ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 120266ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 120366ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 120466ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 120566ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 120666ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 120766ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 120866ed3db0SBarry Smith jrow++; 120966ed3db0SBarry Smith } 121066ed3db0SBarry Smith y[8*i] = sum1; 121166ed3db0SBarry Smith y[8*i+1] = sum2; 121266ed3db0SBarry Smith y[8*i+2] = sum3; 121366ed3db0SBarry Smith y[8*i+3] = sum4; 121466ed3db0SBarry Smith y[8*i+4] = sum5; 121566ed3db0SBarry Smith y[8*i+5] = sum6; 121666ed3db0SBarry Smith y[8*i+6] = sum7; 121766ed3db0SBarry Smith y[8*i+7] = sum8; 121866ed3db0SBarry Smith } 121966ed3db0SBarry Smith 1220efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 12211ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12221ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 122366ed3db0SBarry Smith PetscFunctionReturn(0); 122466ed3db0SBarry Smith } 122566ed3db0SBarry Smith 122666ed3db0SBarry Smith #undef __FUNCT__ 122766ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1228dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 122966ed3db0SBarry Smith { 123066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 123166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 123287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1233dfbe8321SBarry Smith PetscErrorCode ierr; 1234899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 123566ed3db0SBarry Smith 123666ed3db0SBarry Smith PetscFunctionBegin; 12372dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 12381ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12391ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1240bfec09a0SHong Zhang 124166ed3db0SBarry Smith for (i=0; i<m; i++) { 1242bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1243bfec09a0SHong Zhang v = a->a + a->i[i] ; 124466ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 124566ed3db0SBarry Smith alpha1 = x[8*i]; 124666ed3db0SBarry Smith alpha2 = x[8*i+1]; 124766ed3db0SBarry Smith alpha3 = x[8*i+2]; 124866ed3db0SBarry Smith alpha4 = x[8*i+3]; 124966ed3db0SBarry Smith alpha5 = x[8*i+4]; 125066ed3db0SBarry Smith alpha6 = x[8*i+5]; 125166ed3db0SBarry Smith alpha7 = x[8*i+6]; 125266ed3db0SBarry Smith alpha8 = x[8*i+7]; 125366ed3db0SBarry Smith while (n-->0) { 125466ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 125566ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 125666ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 125766ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 125866ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 125966ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 126066ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 126166ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 126266ed3db0SBarry Smith idx++; v++; 126366ed3db0SBarry Smith } 126466ed3db0SBarry Smith } 1265899cda47SBarry Smith ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr); 12661ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12671ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 126866ed3db0SBarry Smith PetscFunctionReturn(0); 126966ed3db0SBarry Smith } 127066ed3db0SBarry Smith 127166ed3db0SBarry Smith #undef __FUNCT__ 127266ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1273dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 127466ed3db0SBarry Smith { 127566ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 127666ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 127787828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1278dfbe8321SBarry Smith PetscErrorCode ierr; 1279899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1280b24ad042SBarry Smith PetscInt n,i,jrow,j; 128166ed3db0SBarry Smith 128266ed3db0SBarry Smith PetscFunctionBegin; 128366ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12841ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12851ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 128666ed3db0SBarry Smith idx = a->j; 128766ed3db0SBarry Smith v = a->a; 128866ed3db0SBarry Smith ii = a->i; 128966ed3db0SBarry Smith 129066ed3db0SBarry Smith for (i=0; i<m; i++) { 129166ed3db0SBarry Smith jrow = ii[i]; 129266ed3db0SBarry Smith n = ii[i+1] - jrow; 129366ed3db0SBarry Smith sum1 = 0.0; 129466ed3db0SBarry Smith sum2 = 0.0; 129566ed3db0SBarry Smith sum3 = 0.0; 129666ed3db0SBarry Smith sum4 = 0.0; 129766ed3db0SBarry Smith sum5 = 0.0; 129866ed3db0SBarry Smith sum6 = 0.0; 129966ed3db0SBarry Smith sum7 = 0.0; 130066ed3db0SBarry Smith sum8 = 0.0; 130166ed3db0SBarry Smith for (j=0; j<n; j++) { 130266ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 130366ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 130466ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 130566ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 130666ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 130766ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 130866ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 130966ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 131066ed3db0SBarry Smith jrow++; 131166ed3db0SBarry Smith } 131266ed3db0SBarry Smith y[8*i] += sum1; 131366ed3db0SBarry Smith y[8*i+1] += sum2; 131466ed3db0SBarry Smith y[8*i+2] += sum3; 131566ed3db0SBarry Smith y[8*i+3] += sum4; 131666ed3db0SBarry Smith y[8*i+4] += sum5; 131766ed3db0SBarry Smith y[8*i+5] += sum6; 131866ed3db0SBarry Smith y[8*i+6] += sum7; 131966ed3db0SBarry Smith y[8*i+7] += sum8; 132066ed3db0SBarry Smith } 132166ed3db0SBarry Smith 1322efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13231ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13241ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 132566ed3db0SBarry Smith PetscFunctionReturn(0); 132666ed3db0SBarry Smith } 132766ed3db0SBarry Smith 132866ed3db0SBarry Smith #undef __FUNCT__ 132966ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1330dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 133166ed3db0SBarry Smith { 133266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 133366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 133487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1335dfbe8321SBarry Smith PetscErrorCode ierr; 1336899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 133766ed3db0SBarry Smith 133866ed3db0SBarry Smith PetscFunctionBegin; 133966ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13401ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13411ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 134266ed3db0SBarry Smith for (i=0; i<m; i++) { 1343bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1344bfec09a0SHong Zhang v = a->a + a->i[i] ; 134566ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 134666ed3db0SBarry Smith alpha1 = x[8*i]; 134766ed3db0SBarry Smith alpha2 = x[8*i+1]; 134866ed3db0SBarry Smith alpha3 = x[8*i+2]; 134966ed3db0SBarry Smith alpha4 = x[8*i+3]; 135066ed3db0SBarry Smith alpha5 = x[8*i+4]; 135166ed3db0SBarry Smith alpha6 = x[8*i+5]; 135266ed3db0SBarry Smith alpha7 = x[8*i+6]; 135366ed3db0SBarry Smith alpha8 = x[8*i+7]; 135466ed3db0SBarry Smith while (n-->0) { 135566ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 135666ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 135766ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 135866ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 135966ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 136066ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 136166ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 136266ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 136366ed3db0SBarry Smith idx++; v++; 136466ed3db0SBarry Smith } 136566ed3db0SBarry Smith } 1366efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13671ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13681ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 13692f7816d4SBarry Smith PetscFunctionReturn(0); 13702f7816d4SBarry Smith } 13712f7816d4SBarry Smith 13722b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 13732b692628SMatthew Knepley #undef __FUNCT__ 13742b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1375dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 13762b692628SMatthew Knepley { 13772b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13782b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13792b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1380dfbe8321SBarry Smith PetscErrorCode ierr; 1381899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1382b24ad042SBarry Smith PetscInt n,i,jrow,j; 13832b692628SMatthew Knepley 13842b692628SMatthew Knepley PetscFunctionBegin; 13851ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13861ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 13872b692628SMatthew Knepley idx = a->j; 13882b692628SMatthew Knepley v = a->a; 13892b692628SMatthew Knepley ii = a->i; 13902b692628SMatthew Knepley 13912b692628SMatthew Knepley for (i=0; i<m; i++) { 13922b692628SMatthew Knepley jrow = ii[i]; 13932b692628SMatthew Knepley n = ii[i+1] - jrow; 13942b692628SMatthew Knepley sum1 = 0.0; 13952b692628SMatthew Knepley sum2 = 0.0; 13962b692628SMatthew Knepley sum3 = 0.0; 13972b692628SMatthew Knepley sum4 = 0.0; 13982b692628SMatthew Knepley sum5 = 0.0; 13992b692628SMatthew Knepley sum6 = 0.0; 14002b692628SMatthew Knepley sum7 = 0.0; 14012b692628SMatthew Knepley sum8 = 0.0; 14022b692628SMatthew Knepley sum9 = 0.0; 14032b692628SMatthew Knepley for (j=0; j<n; j++) { 14042b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14052b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14062b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14072b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14082b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14092b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14102b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14112b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14122b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14132b692628SMatthew Knepley jrow++; 14142b692628SMatthew Knepley } 14152b692628SMatthew Knepley y[9*i] = sum1; 14162b692628SMatthew Knepley y[9*i+1] = sum2; 14172b692628SMatthew Knepley y[9*i+2] = sum3; 14182b692628SMatthew Knepley y[9*i+3] = sum4; 14192b692628SMatthew Knepley y[9*i+4] = sum5; 14202b692628SMatthew Knepley y[9*i+5] = sum6; 14212b692628SMatthew Knepley y[9*i+6] = sum7; 14222b692628SMatthew Knepley y[9*i+7] = sum8; 14232b692628SMatthew Knepley y[9*i+8] = sum9; 14242b692628SMatthew Knepley } 14252b692628SMatthew Knepley 1426efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 14271ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14281ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14292b692628SMatthew Knepley PetscFunctionReturn(0); 14302b692628SMatthew Knepley } 14312b692628SMatthew Knepley 1432b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1433b9cda22cSBarry Smith 14342b692628SMatthew Knepley #undef __FUNCT__ 14352b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1436dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14372b692628SMatthew Knepley { 14382b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14392b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14402b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1441dfbe8321SBarry Smith PetscErrorCode ierr; 1442899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 14432b692628SMatthew Knepley 14442b692628SMatthew Knepley PetscFunctionBegin; 14452dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 14461ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14471ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14482b692628SMatthew Knepley 14492b692628SMatthew Knepley for (i=0; i<m; i++) { 14502b692628SMatthew Knepley idx = a->j + a->i[i] ; 14512b692628SMatthew Knepley v = a->a + a->i[i] ; 14522b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14532b692628SMatthew Knepley alpha1 = x[9*i]; 14542b692628SMatthew Knepley alpha2 = x[9*i+1]; 14552b692628SMatthew Knepley alpha3 = x[9*i+2]; 14562b692628SMatthew Knepley alpha4 = x[9*i+3]; 14572b692628SMatthew Knepley alpha5 = x[9*i+4]; 14582b692628SMatthew Knepley alpha6 = x[9*i+5]; 14592b692628SMatthew Knepley alpha7 = x[9*i+6]; 14602b692628SMatthew Knepley alpha8 = x[9*i+7]; 14612f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14622b692628SMatthew Knepley while (n-->0) { 14632b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 14642b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 14652b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 14662b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 14672b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 14682b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 14692b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 14702b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 14712b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 14722b692628SMatthew Knepley idx++; v++; 14732b692628SMatthew Knepley } 14742b692628SMatthew Knepley } 1475899cda47SBarry Smith ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr); 14761ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14771ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14782b692628SMatthew Knepley PetscFunctionReturn(0); 14792b692628SMatthew Knepley } 14802b692628SMatthew Knepley 14812b692628SMatthew Knepley #undef __FUNCT__ 14822b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1483dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 14842b692628SMatthew Knepley { 14852b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14862b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14872b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1488dfbe8321SBarry Smith PetscErrorCode ierr; 1489899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1490b24ad042SBarry Smith PetscInt n,i,jrow,j; 14912b692628SMatthew Knepley 14922b692628SMatthew Knepley PetscFunctionBegin; 14932b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 14941ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14951ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 14962b692628SMatthew Knepley idx = a->j; 14972b692628SMatthew Knepley v = a->a; 14982b692628SMatthew Knepley ii = a->i; 14992b692628SMatthew Knepley 15002b692628SMatthew Knepley for (i=0; i<m; i++) { 15012b692628SMatthew Knepley jrow = ii[i]; 15022b692628SMatthew Knepley n = ii[i+1] - jrow; 15032b692628SMatthew Knepley sum1 = 0.0; 15042b692628SMatthew Knepley sum2 = 0.0; 15052b692628SMatthew Knepley sum3 = 0.0; 15062b692628SMatthew Knepley sum4 = 0.0; 15072b692628SMatthew Knepley sum5 = 0.0; 15082b692628SMatthew Knepley sum6 = 0.0; 15092b692628SMatthew Knepley sum7 = 0.0; 15102b692628SMatthew Knepley sum8 = 0.0; 15112b692628SMatthew Knepley sum9 = 0.0; 15122b692628SMatthew Knepley for (j=0; j<n; j++) { 15132b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15142b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15152b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15162b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15172b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15182b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15192b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15202b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15212b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15222b692628SMatthew Knepley jrow++; 15232b692628SMatthew Knepley } 15242b692628SMatthew Knepley y[9*i] += sum1; 15252b692628SMatthew Knepley y[9*i+1] += sum2; 15262b692628SMatthew Knepley y[9*i+2] += sum3; 15272b692628SMatthew Knepley y[9*i+3] += sum4; 15282b692628SMatthew Knepley y[9*i+4] += sum5; 15292b692628SMatthew Knepley y[9*i+5] += sum6; 15302b692628SMatthew Knepley y[9*i+6] += sum7; 15312b692628SMatthew Knepley y[9*i+7] += sum8; 15322b692628SMatthew Knepley y[9*i+8] += sum9; 15332b692628SMatthew Knepley } 15342b692628SMatthew Knepley 1535efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15361ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15371ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15382b692628SMatthew Knepley PetscFunctionReturn(0); 15392b692628SMatthew Knepley } 15402b692628SMatthew Knepley 15412b692628SMatthew Knepley #undef __FUNCT__ 15422b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1543dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15442b692628SMatthew Knepley { 15452b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15462b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15472b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1548dfbe8321SBarry Smith PetscErrorCode ierr; 1549899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 15502b692628SMatthew Knepley 15512b692628SMatthew Knepley PetscFunctionBegin; 15522b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15531ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15541ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15552b692628SMatthew Knepley for (i=0; i<m; i++) { 15562b692628SMatthew Knepley idx = a->j + a->i[i] ; 15572b692628SMatthew Knepley v = a->a + a->i[i] ; 15582b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15592b692628SMatthew Knepley alpha1 = x[9*i]; 15602b692628SMatthew Knepley alpha2 = x[9*i+1]; 15612b692628SMatthew Knepley alpha3 = x[9*i+2]; 15622b692628SMatthew Knepley alpha4 = x[9*i+3]; 15632b692628SMatthew Knepley alpha5 = x[9*i+4]; 15642b692628SMatthew Knepley alpha6 = x[9*i+5]; 15652b692628SMatthew Knepley alpha7 = x[9*i+6]; 15662b692628SMatthew Knepley alpha8 = x[9*i+7]; 15672b692628SMatthew Knepley alpha9 = x[9*i+8]; 15682b692628SMatthew Knepley while (n-->0) { 15692b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15702b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15712b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15722b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15732b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15742b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15752b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15762b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15772b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15782b692628SMatthew Knepley idx++; v++; 15792b692628SMatthew Knepley } 15802b692628SMatthew Knepley } 1581efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15821ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15831ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15842b692628SMatthew Knepley PetscFunctionReturn(0); 15852b692628SMatthew Knepley } 1586b9cda22cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 1587b9cda22cSBarry Smith #undef __FUNCT__ 1588b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1589b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1590b9cda22cSBarry Smith { 1591b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1592b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1593b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1594b9cda22cSBarry Smith PetscErrorCode ierr; 1595899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1596b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1597b9cda22cSBarry Smith 1598b9cda22cSBarry Smith PetscFunctionBegin; 1599b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1600b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1601b9cda22cSBarry Smith idx = a->j; 1602b9cda22cSBarry Smith v = a->a; 1603b9cda22cSBarry Smith ii = a->i; 1604b9cda22cSBarry Smith 1605b9cda22cSBarry Smith for (i=0; i<m; i++) { 1606b9cda22cSBarry Smith jrow = ii[i]; 1607b9cda22cSBarry Smith n = ii[i+1] - jrow; 1608b9cda22cSBarry Smith sum1 = 0.0; 1609b9cda22cSBarry Smith sum2 = 0.0; 1610b9cda22cSBarry Smith sum3 = 0.0; 1611b9cda22cSBarry Smith sum4 = 0.0; 1612b9cda22cSBarry Smith sum5 = 0.0; 1613b9cda22cSBarry Smith sum6 = 0.0; 1614b9cda22cSBarry Smith sum7 = 0.0; 1615b9cda22cSBarry Smith sum8 = 0.0; 1616b9cda22cSBarry Smith sum9 = 0.0; 1617b9cda22cSBarry Smith sum10 = 0.0; 1618b9cda22cSBarry Smith for (j=0; j<n; j++) { 1619b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1620b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1621b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1622b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1623b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1624b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1625b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1626b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1627b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1628b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1629b9cda22cSBarry Smith jrow++; 1630b9cda22cSBarry Smith } 1631b9cda22cSBarry Smith y[10*i] = sum1; 1632b9cda22cSBarry Smith y[10*i+1] = sum2; 1633b9cda22cSBarry Smith y[10*i+2] = sum3; 1634b9cda22cSBarry Smith y[10*i+3] = sum4; 1635b9cda22cSBarry Smith y[10*i+4] = sum5; 1636b9cda22cSBarry Smith y[10*i+5] = sum6; 1637b9cda22cSBarry Smith y[10*i+6] = sum7; 1638b9cda22cSBarry Smith y[10*i+7] = sum8; 1639b9cda22cSBarry Smith y[10*i+8] = sum9; 1640b9cda22cSBarry Smith y[10*i+9] = sum10; 1641b9cda22cSBarry Smith } 1642b9cda22cSBarry Smith 1643b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1644b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1645b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1646b9cda22cSBarry Smith PetscFunctionReturn(0); 1647b9cda22cSBarry Smith } 1648b9cda22cSBarry Smith 1649b9cda22cSBarry Smith #undef __FUNCT__ 1650b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1651b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1652b9cda22cSBarry Smith { 1653b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1654b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1655b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1656b9cda22cSBarry Smith PetscErrorCode ierr; 1657899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1658b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1659b9cda22cSBarry Smith 1660b9cda22cSBarry Smith PetscFunctionBegin; 1661b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1662b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1663b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1664b9cda22cSBarry Smith idx = a->j; 1665b9cda22cSBarry Smith v = a->a; 1666b9cda22cSBarry Smith ii = a->i; 1667b9cda22cSBarry Smith 1668b9cda22cSBarry Smith for (i=0; i<m; i++) { 1669b9cda22cSBarry Smith jrow = ii[i]; 1670b9cda22cSBarry Smith n = ii[i+1] - jrow; 1671b9cda22cSBarry Smith sum1 = 0.0; 1672b9cda22cSBarry Smith sum2 = 0.0; 1673b9cda22cSBarry Smith sum3 = 0.0; 1674b9cda22cSBarry Smith sum4 = 0.0; 1675b9cda22cSBarry Smith sum5 = 0.0; 1676b9cda22cSBarry Smith sum6 = 0.0; 1677b9cda22cSBarry Smith sum7 = 0.0; 1678b9cda22cSBarry Smith sum8 = 0.0; 1679b9cda22cSBarry Smith sum9 = 0.0; 1680b9cda22cSBarry Smith sum10 = 0.0; 1681b9cda22cSBarry Smith for (j=0; j<n; j++) { 1682b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1683b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1684b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1685b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1686b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1687b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1688b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1689b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1690b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1691b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1692b9cda22cSBarry Smith jrow++; 1693b9cda22cSBarry Smith } 1694b9cda22cSBarry Smith y[10*i] += sum1; 1695b9cda22cSBarry Smith y[10*i+1] += sum2; 1696b9cda22cSBarry Smith y[10*i+2] += sum3; 1697b9cda22cSBarry Smith y[10*i+3] += sum4; 1698b9cda22cSBarry Smith y[10*i+4] += sum5; 1699b9cda22cSBarry Smith y[10*i+5] += sum6; 1700b9cda22cSBarry Smith y[10*i+6] += sum7; 1701b9cda22cSBarry Smith y[10*i+7] += sum8; 1702b9cda22cSBarry Smith y[10*i+8] += sum9; 1703b9cda22cSBarry Smith y[10*i+9] += sum10; 1704b9cda22cSBarry Smith } 1705b9cda22cSBarry Smith 1706b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1707b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1708b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1709b9cda22cSBarry Smith PetscFunctionReturn(0); 1710b9cda22cSBarry Smith } 1711b9cda22cSBarry Smith 1712b9cda22cSBarry Smith #undef __FUNCT__ 1713b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1714b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1715b9cda22cSBarry Smith { 1716b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1717b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1718b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1719b9cda22cSBarry Smith PetscErrorCode ierr; 1720899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1721b9cda22cSBarry Smith 1722b9cda22cSBarry Smith PetscFunctionBegin; 1723b9cda22cSBarry Smith ierr = VecSet(yy,zero);CHKERRQ(ierr); 1724b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1725b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1726b9cda22cSBarry Smith 1727b9cda22cSBarry Smith for (i=0; i<m; i++) { 1728b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1729b9cda22cSBarry Smith v = a->a + a->i[i] ; 1730b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1731e29fdc22SBarry Smith alpha1 = x[10*i]; 1732e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1733e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1734e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1735e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1736e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1737e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1738e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1739e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1740b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1741b9cda22cSBarry Smith while (n-->0) { 1742e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1743e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1744e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1745e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1746e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1747e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1748e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1749e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1750e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1751b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1752b9cda22cSBarry Smith idx++; v++; 1753b9cda22cSBarry Smith } 1754b9cda22cSBarry Smith } 1755899cda47SBarry Smith ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr); 1756b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1757b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1758b9cda22cSBarry Smith PetscFunctionReturn(0); 1759b9cda22cSBarry Smith } 1760b9cda22cSBarry Smith 1761b9cda22cSBarry Smith #undef __FUNCT__ 1762b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1763b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1764b9cda22cSBarry Smith { 1765b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1766b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1767b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1768b9cda22cSBarry Smith PetscErrorCode ierr; 1769899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1770b9cda22cSBarry Smith 1771b9cda22cSBarry Smith PetscFunctionBegin; 1772b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1773b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1774b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1775b9cda22cSBarry Smith for (i=0; i<m; i++) { 1776b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1777b9cda22cSBarry Smith v = a->a + a->i[i] ; 1778b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1779b9cda22cSBarry Smith alpha1 = x[10*i]; 1780b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1781b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1782b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1783b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1784b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1785b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1786b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1787b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1788b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1789b9cda22cSBarry Smith while (n-->0) { 1790b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1791b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1792b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1793b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1794b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1795b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1796b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1797b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1798b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1799b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1800b9cda22cSBarry Smith idx++; v++; 1801b9cda22cSBarry Smith } 1802b9cda22cSBarry Smith } 1803b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1804b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1805b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1806b9cda22cSBarry Smith PetscFunctionReturn(0); 1807b9cda22cSBarry Smith } 1808b9cda22cSBarry Smith 18092b692628SMatthew Knepley 18102f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 18112f7816d4SBarry Smith #undef __FUNCT__ 18122f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 1813dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 18142f7816d4SBarry Smith { 18152f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 18162f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18172f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 18182f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1819dfbe8321SBarry Smith PetscErrorCode ierr; 1820899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1821b24ad042SBarry Smith PetscInt n,i,jrow,j; 18222f7816d4SBarry Smith 18232f7816d4SBarry Smith PetscFunctionBegin; 18241ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 18251ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 18262f7816d4SBarry Smith idx = a->j; 18272f7816d4SBarry Smith v = a->a; 18282f7816d4SBarry Smith ii = a->i; 18292f7816d4SBarry Smith 18302f7816d4SBarry Smith for (i=0; i<m; i++) { 18312f7816d4SBarry Smith jrow = ii[i]; 18322f7816d4SBarry Smith n = ii[i+1] - jrow; 18332f7816d4SBarry Smith sum1 = 0.0; 18342f7816d4SBarry Smith sum2 = 0.0; 18352f7816d4SBarry Smith sum3 = 0.0; 18362f7816d4SBarry Smith sum4 = 0.0; 18372f7816d4SBarry Smith sum5 = 0.0; 18382f7816d4SBarry Smith sum6 = 0.0; 18392f7816d4SBarry Smith sum7 = 0.0; 18402f7816d4SBarry Smith sum8 = 0.0; 18412f7816d4SBarry Smith sum9 = 0.0; 18422f7816d4SBarry Smith sum10 = 0.0; 18432f7816d4SBarry Smith sum11 = 0.0; 18442f7816d4SBarry Smith sum12 = 0.0; 18452f7816d4SBarry Smith sum13 = 0.0; 18462f7816d4SBarry Smith sum14 = 0.0; 18472f7816d4SBarry Smith sum15 = 0.0; 18482f7816d4SBarry Smith sum16 = 0.0; 18492f7816d4SBarry Smith for (j=0; j<n; j++) { 18502f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 18512f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 18522f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 18532f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 18542f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 18552f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 18562f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 18572f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 18582f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 18592f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 18602f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 18612f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 18622f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 18632f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 18642f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 18652f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 18662f7816d4SBarry Smith jrow++; 18672f7816d4SBarry Smith } 18682f7816d4SBarry Smith y[16*i] = sum1; 18692f7816d4SBarry Smith y[16*i+1] = sum2; 18702f7816d4SBarry Smith y[16*i+2] = sum3; 18712f7816d4SBarry Smith y[16*i+3] = sum4; 18722f7816d4SBarry Smith y[16*i+4] = sum5; 18732f7816d4SBarry Smith y[16*i+5] = sum6; 18742f7816d4SBarry Smith y[16*i+6] = sum7; 18752f7816d4SBarry Smith y[16*i+7] = sum8; 18762f7816d4SBarry Smith y[16*i+8] = sum9; 18772f7816d4SBarry Smith y[16*i+9] = sum10; 18782f7816d4SBarry Smith y[16*i+10] = sum11; 18792f7816d4SBarry Smith y[16*i+11] = sum12; 18802f7816d4SBarry Smith y[16*i+12] = sum13; 18812f7816d4SBarry Smith y[16*i+13] = sum14; 18822f7816d4SBarry Smith y[16*i+14] = sum15; 18832f7816d4SBarry Smith y[16*i+15] = sum16; 18842f7816d4SBarry Smith } 18852f7816d4SBarry Smith 1886efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 18871ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 18881ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 18892f7816d4SBarry Smith PetscFunctionReturn(0); 18902f7816d4SBarry Smith } 18912f7816d4SBarry Smith 18922f7816d4SBarry Smith #undef __FUNCT__ 18932f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1894dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 18952f7816d4SBarry Smith { 18962f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 18972f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18982f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 18992f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1900dfbe8321SBarry Smith PetscErrorCode ierr; 1901899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 19022f7816d4SBarry Smith 19032f7816d4SBarry Smith PetscFunctionBegin; 19042dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 19051ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19061ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1907bfec09a0SHong Zhang 19082f7816d4SBarry Smith for (i=0; i<m; i++) { 1909bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1910bfec09a0SHong Zhang v = a->a + a->i[i] ; 19112f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 19122f7816d4SBarry Smith alpha1 = x[16*i]; 19132f7816d4SBarry Smith alpha2 = x[16*i+1]; 19142f7816d4SBarry Smith alpha3 = x[16*i+2]; 19152f7816d4SBarry Smith alpha4 = x[16*i+3]; 19162f7816d4SBarry Smith alpha5 = x[16*i+4]; 19172f7816d4SBarry Smith alpha6 = x[16*i+5]; 19182f7816d4SBarry Smith alpha7 = x[16*i+6]; 19192f7816d4SBarry Smith alpha8 = x[16*i+7]; 19202f7816d4SBarry Smith alpha9 = x[16*i+8]; 19212f7816d4SBarry Smith alpha10 = x[16*i+9]; 19222f7816d4SBarry Smith alpha11 = x[16*i+10]; 19232f7816d4SBarry Smith alpha12 = x[16*i+11]; 19242f7816d4SBarry Smith alpha13 = x[16*i+12]; 19252f7816d4SBarry Smith alpha14 = x[16*i+13]; 19262f7816d4SBarry Smith alpha15 = x[16*i+14]; 19272f7816d4SBarry Smith alpha16 = x[16*i+15]; 19282f7816d4SBarry Smith while (n-->0) { 19292f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 19302f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 19312f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 19322f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 19332f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 19342f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 19352f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 19362f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 19372f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 19382f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 19392f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 19402f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 19412f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 19422f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 19432f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 19442f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 19452f7816d4SBarry Smith idx++; v++; 19462f7816d4SBarry Smith } 19472f7816d4SBarry Smith } 1948899cda47SBarry Smith ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr); 19491ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 19501ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 19512f7816d4SBarry Smith PetscFunctionReturn(0); 19522f7816d4SBarry Smith } 19532f7816d4SBarry Smith 19542f7816d4SBarry Smith #undef __FUNCT__ 19552f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1956dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 19572f7816d4SBarry Smith { 19582f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 19592f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19602f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 19612f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1962dfbe8321SBarry Smith PetscErrorCode ierr; 1963899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1964b24ad042SBarry Smith PetscInt n,i,jrow,j; 19652f7816d4SBarry Smith 19662f7816d4SBarry Smith PetscFunctionBegin; 19672f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19681ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19691ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 19702f7816d4SBarry Smith idx = a->j; 19712f7816d4SBarry Smith v = a->a; 19722f7816d4SBarry Smith ii = a->i; 19732f7816d4SBarry Smith 19742f7816d4SBarry Smith for (i=0; i<m; i++) { 19752f7816d4SBarry Smith jrow = ii[i]; 19762f7816d4SBarry Smith n = ii[i+1] - jrow; 19772f7816d4SBarry Smith sum1 = 0.0; 19782f7816d4SBarry Smith sum2 = 0.0; 19792f7816d4SBarry Smith sum3 = 0.0; 19802f7816d4SBarry Smith sum4 = 0.0; 19812f7816d4SBarry Smith sum5 = 0.0; 19822f7816d4SBarry Smith sum6 = 0.0; 19832f7816d4SBarry Smith sum7 = 0.0; 19842f7816d4SBarry Smith sum8 = 0.0; 19852f7816d4SBarry Smith sum9 = 0.0; 19862f7816d4SBarry Smith sum10 = 0.0; 19872f7816d4SBarry Smith sum11 = 0.0; 19882f7816d4SBarry Smith sum12 = 0.0; 19892f7816d4SBarry Smith sum13 = 0.0; 19902f7816d4SBarry Smith sum14 = 0.0; 19912f7816d4SBarry Smith sum15 = 0.0; 19922f7816d4SBarry Smith sum16 = 0.0; 19932f7816d4SBarry Smith for (j=0; j<n; j++) { 19942f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 19952f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 19962f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 19972f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 19982f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 19992f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 20002f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 20012f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 20022f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 20032f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 20042f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 20052f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 20062f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 20072f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 20082f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 20092f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 20102f7816d4SBarry Smith jrow++; 20112f7816d4SBarry Smith } 20122f7816d4SBarry Smith y[16*i] += sum1; 20132f7816d4SBarry Smith y[16*i+1] += sum2; 20142f7816d4SBarry Smith y[16*i+2] += sum3; 20152f7816d4SBarry Smith y[16*i+3] += sum4; 20162f7816d4SBarry Smith y[16*i+4] += sum5; 20172f7816d4SBarry Smith y[16*i+5] += sum6; 20182f7816d4SBarry Smith y[16*i+6] += sum7; 20192f7816d4SBarry Smith y[16*i+7] += sum8; 20202f7816d4SBarry Smith y[16*i+8] += sum9; 20212f7816d4SBarry Smith y[16*i+9] += sum10; 20222f7816d4SBarry Smith y[16*i+10] += sum11; 20232f7816d4SBarry Smith y[16*i+11] += sum12; 20242f7816d4SBarry Smith y[16*i+12] += sum13; 20252f7816d4SBarry Smith y[16*i+13] += sum14; 20262f7816d4SBarry Smith y[16*i+14] += sum15; 20272f7816d4SBarry Smith y[16*i+15] += sum16; 20282f7816d4SBarry Smith } 20292f7816d4SBarry Smith 2030efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 20311ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 20321ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 20332f7816d4SBarry Smith PetscFunctionReturn(0); 20342f7816d4SBarry Smith } 20352f7816d4SBarry Smith 20362f7816d4SBarry Smith #undef __FUNCT__ 20372f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2038dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 20392f7816d4SBarry Smith { 20402f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20412f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 20422f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 20432f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2044dfbe8321SBarry Smith PetscErrorCode ierr; 2045899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 20462f7816d4SBarry Smith 20472f7816d4SBarry Smith PetscFunctionBegin; 20482f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20491ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 20501ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 20512f7816d4SBarry Smith for (i=0; i<m; i++) { 2052bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2053bfec09a0SHong Zhang v = a->a + a->i[i] ; 20542f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 20552f7816d4SBarry Smith alpha1 = x[16*i]; 20562f7816d4SBarry Smith alpha2 = x[16*i+1]; 20572f7816d4SBarry Smith alpha3 = x[16*i+2]; 20582f7816d4SBarry Smith alpha4 = x[16*i+3]; 20592f7816d4SBarry Smith alpha5 = x[16*i+4]; 20602f7816d4SBarry Smith alpha6 = x[16*i+5]; 20612f7816d4SBarry Smith alpha7 = x[16*i+6]; 20622f7816d4SBarry Smith alpha8 = x[16*i+7]; 20632f7816d4SBarry Smith alpha9 = x[16*i+8]; 20642f7816d4SBarry Smith alpha10 = x[16*i+9]; 20652f7816d4SBarry Smith alpha11 = x[16*i+10]; 20662f7816d4SBarry Smith alpha12 = x[16*i+11]; 20672f7816d4SBarry Smith alpha13 = x[16*i+12]; 20682f7816d4SBarry Smith alpha14 = x[16*i+13]; 20692f7816d4SBarry Smith alpha15 = x[16*i+14]; 20702f7816d4SBarry Smith alpha16 = x[16*i+15]; 20712f7816d4SBarry Smith while (n-->0) { 20722f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 20732f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 20742f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 20752f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 20762f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 20772f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 20782f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 20792f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 20802f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 20812f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 20822f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 20832f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 20842f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 20852f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 20862f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 20872f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 20882f7816d4SBarry Smith idx++; v++; 20892f7816d4SBarry Smith } 20902f7816d4SBarry Smith } 2091efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 20921ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 20931ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 209466ed3db0SBarry Smith PetscFunctionReturn(0); 209566ed3db0SBarry Smith } 209666ed3db0SBarry Smith 2097f4a53059SBarry Smith /*===================================================================================*/ 20984a2ae208SSatish Balay #undef __FUNCT__ 20994a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2100dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2101f4a53059SBarry Smith { 21024cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2103dfbe8321SBarry Smith PetscErrorCode ierr; 2104f4a53059SBarry Smith 2105b24ad042SBarry Smith PetscFunctionBegin; 2106f4a53059SBarry Smith /* start the scatter */ 2107f4a53059SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 21084cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2109f4a53059SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 21104cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2111f4a53059SBarry Smith PetscFunctionReturn(0); 2112f4a53059SBarry Smith } 2113f4a53059SBarry Smith 21144a2ae208SSatish Balay #undef __FUNCT__ 21154a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2116dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 21174cb9d9b8SBarry Smith { 21184cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2119dfbe8321SBarry Smith PetscErrorCode ierr; 2120b24ad042SBarry Smith 21214cb9d9b8SBarry Smith PetscFunctionBegin; 21224cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 21234cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2124a5ff213dSBarry Smith ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21254cb9d9b8SBarry Smith ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21264cb9d9b8SBarry Smith PetscFunctionReturn(0); 21274cb9d9b8SBarry Smith } 21284cb9d9b8SBarry Smith 21294a2ae208SSatish Balay #undef __FUNCT__ 21304a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2131dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 21324cb9d9b8SBarry Smith { 21334cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2134dfbe8321SBarry Smith PetscErrorCode ierr; 21354cb9d9b8SBarry Smith 2136b24ad042SBarry Smith PetscFunctionBegin; 21374cb9d9b8SBarry Smith /* start the scatter */ 21384cb9d9b8SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2139d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 21404cb9d9b8SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2141717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 21424cb9d9b8SBarry Smith PetscFunctionReturn(0); 21434cb9d9b8SBarry Smith } 21444cb9d9b8SBarry Smith 21454a2ae208SSatish Balay #undef __FUNCT__ 21464a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2147dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 21484cb9d9b8SBarry Smith { 21494cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2150dfbe8321SBarry Smith PetscErrorCode ierr; 2151b24ad042SBarry Smith 21524cb9d9b8SBarry Smith PetscFunctionBegin; 21534cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2154d72c5c08SBarry Smith ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2155d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2156d72c5c08SBarry Smith ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21574cb9d9b8SBarry Smith PetscFunctionReturn(0); 21584cb9d9b8SBarry Smith } 21594cb9d9b8SBarry Smith 21609c4fc4c7SBarry Smith #undef __FUNCT__ 21617ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 21627ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 21637ba1a0bfSKris Buschelman { 21647ba1a0bfSKris Buschelman /* This routine requires testing -- but it's getting better. */ 21657ba1a0bfSKris Buschelman PetscErrorCode ierr; 2166a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 21677ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 21687ba1a0bfSKris Buschelman Mat P=pp->AIJ; 21697ba1a0bfSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 21707ba1a0bfSKris Buschelman PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 21717ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2172899cda47SBarry Smith PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn; 21737ba1a0bfSKris Buschelman PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 21747ba1a0bfSKris Buschelman MatScalar *ca; 21757ba1a0bfSKris Buschelman 21767ba1a0bfSKris Buschelman PetscFunctionBegin; 21777ba1a0bfSKris Buschelman /* Start timer */ 21787ba1a0bfSKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 21797ba1a0bfSKris Buschelman 21807ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 21817ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 21827ba1a0bfSKris Buschelman 21837ba1a0bfSKris Buschelman cn = pn*ppdof; 21847ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 21857ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 21867ba1a0bfSKris Buschelman ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 21877ba1a0bfSKris Buschelman ci[0] = 0; 21887ba1a0bfSKris Buschelman 21897ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 21907ba1a0bfSKris Buschelman ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 21917ba1a0bfSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 21927ba1a0bfSKris Buschelman ptasparserow = ptadenserow + an; 21937ba1a0bfSKris Buschelman denserow = ptasparserow + an; 21947ba1a0bfSKris Buschelman sparserow = denserow + cn; 21957ba1a0bfSKris Buschelman 21967ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 21977ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 21987ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2199a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 22007ba1a0bfSKris Buschelman current_space = free_space; 22017ba1a0bfSKris Buschelman 22027ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 22037ba1a0bfSKris Buschelman for (i=0;i<pn;i++) { 22047ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 22057ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 22067ba1a0bfSKris Buschelman for (dof=0;dof<ppdof;dof++) { 22077ba1a0bfSKris Buschelman ptanzi = 0; 22087ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 22097ba1a0bfSKris Buschelman for (j=0;j<ptnzi;j++) { 22107ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 22117ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 22127ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 22137ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 22147ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 22157ba1a0bfSKris Buschelman for (k=0;k<anzj;k++) { 22167ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 22177ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 22187ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 22197ba1a0bfSKris Buschelman } 22207ba1a0bfSKris Buschelman } 22217ba1a0bfSKris Buschelman } 22227ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 22237ba1a0bfSKris Buschelman ptaj = ptasparserow; 22247ba1a0bfSKris Buschelman cnzi = 0; 22257ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 22267ba1a0bfSKris Buschelman /* Get offset within block of P */ 22277ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 22287ba1a0bfSKris Buschelman /* Get block row of P */ 22297ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 22307ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 22317ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 22327ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 22337ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 22347ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 22357ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 22367ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 22377ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 22387ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 22397ba1a0bfSKris Buschelman } 22407ba1a0bfSKris Buschelman } 22417ba1a0bfSKris Buschelman } 22427ba1a0bfSKris Buschelman 22437ba1a0bfSKris Buschelman /* sort sparserow */ 22447ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 22457ba1a0bfSKris Buschelman 22467ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 22477ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 22487ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 2249a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 22507ba1a0bfSKris Buschelman } 22517ba1a0bfSKris Buschelman 22527ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 22537ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 22547ba1a0bfSKris Buschelman current_space->array += cnzi; 22557ba1a0bfSKris Buschelman current_space->local_used += cnzi; 22567ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 22577ba1a0bfSKris Buschelman 22587ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 22597ba1a0bfSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 22607ba1a0bfSKris Buschelman } 22617ba1a0bfSKris Buschelman for (j=0;j<cnzi;j++) { 22627ba1a0bfSKris Buschelman denserow[sparserow[j]] = 0; 22637ba1a0bfSKris Buschelman } 22647ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 22657ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 22667ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 22677ba1a0bfSKris Buschelman } 22687ba1a0bfSKris Buschelman } 22697ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 22707ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 22717ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 22727ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2273a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 22747ba1a0bfSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 22757ba1a0bfSKris Buschelman 22767ba1a0bfSKris Buschelman /* Allocate space for ca */ 22777ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 22787ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 22797ba1a0bfSKris Buschelman 22807ba1a0bfSKris Buschelman /* put together the new matrix */ 22817ba1a0bfSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 22827ba1a0bfSKris Buschelman 22837ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 22847ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 22857ba1a0bfSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 22867ba1a0bfSKris Buschelman c->freedata = PETSC_TRUE; 22877ba1a0bfSKris Buschelman c->nonew = 0; 22887ba1a0bfSKris Buschelman 22897ba1a0bfSKris Buschelman /* Clean up. */ 22907ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 22917ba1a0bfSKris Buschelman 22927ba1a0bfSKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 22937ba1a0bfSKris Buschelman PetscFunctionReturn(0); 22947ba1a0bfSKris Buschelman } 22957ba1a0bfSKris Buschelman 22967ba1a0bfSKris Buschelman #undef __FUNCT__ 22977ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 22987ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 22997ba1a0bfSKris Buschelman { 23007ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 23017ba1a0bfSKris Buschelman PetscErrorCode ierr; 23027ba1a0bfSKris Buschelman PetscInt flops=0; 23037ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 23047ba1a0bfSKris Buschelman Mat P=pp->AIJ; 23057ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 23067ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 23077ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 23087ba1a0bfSKris Buschelman PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 23097ba1a0bfSKris Buschelman PetscInt *ci=c->i,*cj=c->j,*cjj; 2310899cda47SBarry Smith PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof; 23117ba1a0bfSKris Buschelman PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 23127ba1a0bfSKris Buschelman MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 23137ba1a0bfSKris Buschelman 23147ba1a0bfSKris Buschelman PetscFunctionBegin; 23157ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 23167ba1a0bfSKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 23177ba1a0bfSKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 23187ba1a0bfSKris Buschelman 23197ba1a0bfSKris Buschelman apj = (PetscInt *)(apa + cn); 23207ba1a0bfSKris Buschelman apjdense = apj + cn; 23217ba1a0bfSKris Buschelman 23227ba1a0bfSKris Buschelman /* Clear old values in C */ 23237ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 23247ba1a0bfSKris Buschelman 23257ba1a0bfSKris Buschelman for (i=0;i<am;i++) { 23267ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 23277ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 23287ba1a0bfSKris Buschelman apnzj = 0; 23297ba1a0bfSKris Buschelman for (j=0;j<anzi;j++) { 23307ba1a0bfSKris Buschelman /* Get offset within block of P */ 23317ba1a0bfSKris Buschelman pshift = *aj%ppdof; 23327ba1a0bfSKris Buschelman /* Get block row of P */ 23337ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 23347ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 23357ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 23367ba1a0bfSKris Buschelman paj = pa + pi[prow]; 23377ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 23387ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 23397ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 23407ba1a0bfSKris Buschelman apjdense[poffset] = -1; 23417ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 23427ba1a0bfSKris Buschelman } 23437ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 23447ba1a0bfSKris Buschelman } 23457ba1a0bfSKris Buschelman flops += 2*pnzj; 23467ba1a0bfSKris Buschelman aa++; 23477ba1a0bfSKris Buschelman } 23487ba1a0bfSKris Buschelman 23497ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 23507ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 23517ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 23527ba1a0bfSKris Buschelman 23537ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 23547ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 23557ba1a0bfSKris Buschelman pshift = i%ppdof; 23567ba1a0bfSKris Buschelman poffset = pi[prow]; 23577ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 23587ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 23597ba1a0bfSKris Buschelman pJ = pj+poffset; 23607ba1a0bfSKris Buschelman pA = pa+poffset; 23617ba1a0bfSKris Buschelman for (j=0;j<pnzi;j++) { 23627ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 23637ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 23647ba1a0bfSKris Buschelman caj = ca + ci[crow]; 23657ba1a0bfSKris Buschelman pJ++; 23667ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 23677ba1a0bfSKris Buschelman for (k=0,nextap=0;nextap<apnzj;k++) { 23687ba1a0bfSKris Buschelman if (cjj[k]==apj[nextap]) { 23697ba1a0bfSKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 23707ba1a0bfSKris Buschelman } 23717ba1a0bfSKris Buschelman } 23727ba1a0bfSKris Buschelman flops += 2*apnzj; 23737ba1a0bfSKris Buschelman pA++; 23747ba1a0bfSKris Buschelman } 23757ba1a0bfSKris Buschelman 23767ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 23777ba1a0bfSKris Buschelman for (j=0;j<apnzj;j++) { 23787ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 23797ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 23807ba1a0bfSKris Buschelman } 23817ba1a0bfSKris Buschelman } 23827ba1a0bfSKris Buschelman 23837ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 23847ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 23857ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 23867ba1a0bfSKris Buschelman ierr = PetscFree(apa);CHKERRQ(ierr); 23877ba1a0bfSKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 23887ba1a0bfSKris Buschelman 23897ba1a0bfSKris Buschelman PetscFunctionReturn(0); 23907ba1a0bfSKris Buschelman } 23917ba1a0bfSKris Buschelman 2392be1d678aSKris Buschelman EXTERN_C_BEGIN 23937ba1a0bfSKris Buschelman #undef __FUNCT__ 23940fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2395f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 23969c4fc4c7SBarry Smith { 23979c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2398ceb03754SKris Buschelman Mat a = b->AIJ,B; 23999c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 24009c4fc4c7SBarry Smith PetscErrorCode ierr; 24010fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2402ba8c8a56SBarry Smith PetscInt *cols; 2403ba8c8a56SBarry Smith PetscScalar *vals; 24049c4fc4c7SBarry Smith 24059c4fc4c7SBarry Smith PetscFunctionBegin; 24069c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 24077c307921SBarry Smith ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 24089c4fc4c7SBarry Smith for (i=0; i<m; i++) { 24099c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 24100fd73130SBarry Smith for (j=0; j<dof; j++) { 24110fd73130SBarry Smith ilen[dof*i+j] = aij->ilen[i]; 24129c4fc4c7SBarry Smith } 24139c4fc4c7SBarry Smith } 2414ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2415ceb03754SKris Buschelman ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 24169c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 24179c4fc4c7SBarry Smith ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 24189c4fc4c7SBarry Smith ii = 0; 24199c4fc4c7SBarry Smith for (i=0; i<m; i++) { 2420ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24210fd73130SBarry Smith for (j=0; j<dof; j++) { 24229c4fc4c7SBarry Smith for (k=0; k<ncols; k++) { 24230fd73130SBarry Smith icols[k] = dof*cols[k]+j; 24249c4fc4c7SBarry Smith } 2425ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 24269c4fc4c7SBarry Smith ii++; 24279c4fc4c7SBarry Smith } 2428ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24299c4fc4c7SBarry Smith } 24309c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 2431ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2432ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2433ceb03754SKris Buschelman 2434ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 24358ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2436c3d102feSKris Buschelman } else { 2437ceb03754SKris Buschelman *newmat = B; 2438c3d102feSKris Buschelman } 24399c4fc4c7SBarry Smith PetscFunctionReturn(0); 24409c4fc4c7SBarry Smith } 2441be1d678aSKris Buschelman EXTERN_C_END 24429c4fc4c7SBarry Smith 24430fd73130SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 2444be1d678aSKris Buschelman 2445be1d678aSKris Buschelman EXTERN_C_BEGIN 24460fd73130SBarry Smith #undef __FUNCT__ 24470fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2448f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 24490fd73130SBarry Smith { 24500fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2451ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 24520fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 24530fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 24540fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 24550fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 24567a1a7badSBarry Smith PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols; 24570fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 24580fd73130SBarry Smith PetscErrorCode ierr; 24590fd73130SBarry Smith PetscScalar *vals,*ovals; 24600fd73130SBarry Smith 24610fd73130SBarry Smith PetscFunctionBegin; 2462899cda47SBarry Smith ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr); 2463899cda47SBarry Smith for (i=0; i<A->rmap.n/dof; i++) { 24640fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 24650fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 24660fd73130SBarry Smith for (j=0; j<dof; j++) { 24670fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 24680fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 24690fd73130SBarry Smith } 24700fd73130SBarry Smith } 2471899cda47SBarry Smith ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2472ceb03754SKris Buschelman ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 24730fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 24740fd73130SBarry Smith 24757a1a7badSBarry Smith ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2476*9b5a856fSSatish Balay rstart = dof*maij->A->rmap.rstart; 2477*9b5a856fSSatish Balay cstart = dof*maij->A->cmap.rstart; 24780fd73130SBarry Smith garray = mpiaij->garray; 24790fd73130SBarry Smith 24800fd73130SBarry Smith ii = rstart; 2481899cda47SBarry Smith for (i=0; i<A->rmap.n/dof; i++) { 24820fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24830fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 24840fd73130SBarry Smith for (j=0; j<dof; j++) { 24850fd73130SBarry Smith for (k=0; k<ncols; k++) { 24860fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 24870fd73130SBarry Smith } 24880fd73130SBarry Smith for (k=0; k<oncols; k++) { 24890fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 24900fd73130SBarry Smith } 2491ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2492ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 24930fd73130SBarry Smith ii++; 24940fd73130SBarry Smith } 24950fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24960fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 24970fd73130SBarry Smith } 24980fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 24990fd73130SBarry Smith 2500ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2501ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2502ceb03754SKris Buschelman 2503ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 25048ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2505c3d102feSKris Buschelman } else { 2506ceb03754SKris Buschelman *newmat = B; 2507c3d102feSKris Buschelman } 25080fd73130SBarry Smith PetscFunctionReturn(0); 25090fd73130SBarry Smith } 2510be1d678aSKris Buschelman EXTERN_C_END 25110fd73130SBarry Smith 25120fd73130SBarry Smith 2513bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 25145983afb6SSatish Balay /*MC 25150bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 25160bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 25170bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 25180bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 25190bad9183SKris Buschelman 25200bad9183SKris Buschelman Operations provided: 25210fd73130SBarry Smith + MatMult 25220bad9183SKris Buschelman . MatMultTranspose 25230bad9183SKris Buschelman . MatMultAdd 25240bad9183SKris Buschelman . MatMultTransposeAdd 25250fd73130SBarry Smith - MatView 25260bad9183SKris Buschelman 25270bad9183SKris Buschelman Level: advanced 25280bad9183SKris Buschelman 25290bad9183SKris Buschelman M*/ 25304a2ae208SSatish Balay #undef __FUNCT__ 25314a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 2532be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 253382b1193eSBarry Smith { 2534dfbe8321SBarry Smith PetscErrorCode ierr; 2535b24ad042SBarry Smith PetscMPIInt size; 2536b24ad042SBarry Smith PetscInt n; 25374cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 253882b1193eSBarry Smith Mat B; 253982b1193eSBarry Smith 254082b1193eSBarry Smith PetscFunctionBegin; 2541d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2542d72c5c08SBarry Smith 2543d72c5c08SBarry Smith if (dof == 1) { 2544d72c5c08SBarry Smith *maij = A; 2545d72c5c08SBarry Smith } else { 2546f69a0ea3SMatthew Knepley ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 2547899cda47SBarry Smith ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr); 2548cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 2549d72c5c08SBarry Smith 2550f4a53059SBarry Smith ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2551f4a53059SBarry Smith if (size == 1) { 2552b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 25534cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 25540fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 2555b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2556b9b97703SBarry Smith b->dof = dof; 25574cb9d9b8SBarry Smith b->AIJ = A; 2558d72c5c08SBarry Smith if (dof == 2) { 2559cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 2560cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2561cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2562cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2563bcc973b7SBarry Smith } else if (dof == 3) { 2564bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 2565bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2566bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2567bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2568bcc973b7SBarry Smith } else if (dof == 4) { 2569bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 2570bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2571bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2572bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2573f9fae5adSBarry Smith } else if (dof == 5) { 2574f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 2575f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2576f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2577f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 25786cd98798SBarry Smith } else if (dof == 6) { 25796cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 25806cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 25816cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 25826cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2583ed8eea03SBarry Smith } else if (dof == 7) { 2584ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 2585ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2586ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2587ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 258866ed3db0SBarry Smith } else if (dof == 8) { 258966ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 259066ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 259166ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 259266ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 25932b692628SMatthew Knepley } else if (dof == 9) { 25942b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 25952b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 25962b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 25972b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2598b9cda22cSBarry Smith } else if (dof == 10) { 2599b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 2600b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2601b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2602b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 26032f7816d4SBarry Smith } else if (dof == 16) { 26042f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 26052f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 26062f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 26072f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 260882b1193eSBarry Smith } else { 260977431f27SBarry Smith SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 261082b1193eSBarry Smith } 26117ba1a0bfSKris Buschelman B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 26127ba1a0bfSKris Buschelman B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 26139c4fc4c7SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2614f4a53059SBarry Smith } else { 2615f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2616f4a53059SBarry Smith IS from,to; 2617f4a53059SBarry Smith Vec gvec; 2618b24ad042SBarry Smith PetscInt *garray,i; 2619f4a53059SBarry Smith 2620b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2621d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 26220fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 2623b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2624b9b97703SBarry Smith b->dof = dof; 2625b9b97703SBarry Smith b->A = A; 26264cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 26274cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 26284cb9d9b8SBarry Smith 2629f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2630f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 263113288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2632f4a53059SBarry Smith 2633f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 2634b24ad042SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2635f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2636f4a53059SBarry Smith ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2637f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 2638f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2639f4a53059SBarry Smith 2640f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 2641899cda47SBarry Smith ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr); 264213288a74SBarry Smith ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2643f4a53059SBarry Smith 2644f4a53059SBarry Smith /* generate the scatter context */ 2645f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2646f4a53059SBarry Smith 2647f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 2648f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 2649f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 2650f4a53059SBarry Smith 2651f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 26524cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 26534cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 26544cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 26550fd73130SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2656f4a53059SBarry Smith } 2657cd3bbe55SBarry Smith *maij = B; 26580fd73130SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 2659d72c5c08SBarry Smith } 266082b1193eSBarry Smith PetscFunctionReturn(0); 266182b1193eSBarry Smith } 266282b1193eSBarry Smith 266382b1193eSBarry Smith 266482b1193eSBarry Smith 266582b1193eSBarry Smith 266682b1193eSBarry Smith 266782b1193eSBarry Smith 266882b1193eSBarry Smith 266982b1193eSBarry Smith 267082b1193eSBarry Smith 267182b1193eSBarry Smith 267282b1193eSBarry Smith 267382b1193eSBarry Smith 2674