1*be1d678aSKris Buschelman #define PETSCMAT_DLL 2*be1d678aSKris Buschelman 382b1193eSBarry Smith /* 4cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 5cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 6cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 7cd3bbe55SBarry Smith independently. 8cd3bbe55SBarry Smith 9cd3bbe55SBarry Smith We provide: 10cd3bbe55SBarry Smith MatMult() 11cd3bbe55SBarry Smith MatMultTranspose() 12cd3bbe55SBarry Smith MatMultTransposeAdd() 13cd3bbe55SBarry Smith MatMultAdd() 14cd3bbe55SBarry Smith and 15cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 16f4a53059SBarry Smith 17f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1882b1193eSBarry Smith */ 1982b1193eSBarry Smith 20be911618SKris Buschelman #include "src/mat/impls/maij/maij.h" 217ba1a0bfSKris Buschelman #include "src/mat/utils/freespace.h" 223c94ec11SBarry Smith #include "vecimpl.h" 2382b1193eSBarry Smith 244a2ae208SSatish Balay #undef __FUNCT__ 254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 26*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27b9b97703SBarry Smith { 28dfbe8321SBarry Smith PetscErrorCode ierr; 29b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 30b9b97703SBarry Smith 31b9b97703SBarry Smith PetscFunctionBegin; 32b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34b9b97703SBarry Smith if (ismpimaij) { 35b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->A; 38b9b97703SBarry Smith } else if (isseqmaij) { 39b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40b9b97703SBarry Smith 41b9b97703SBarry Smith *B = b->AIJ; 42b9b97703SBarry Smith } else { 43b9b97703SBarry Smith *B = A; 44b9b97703SBarry Smith } 45b9b97703SBarry Smith PetscFunctionReturn(0); 46b9b97703SBarry Smith } 47b9b97703SBarry Smith 484a2ae208SSatish Balay #undef __FUNCT__ 494a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 50*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51b9b97703SBarry Smith { 52dfbe8321SBarry Smith PetscErrorCode ierr; 53b9b97703SBarry Smith Mat Aij; 54b9b97703SBarry Smith 55b9b97703SBarry Smith PetscFunctionBegin; 56b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58b9b97703SBarry Smith PetscFunctionReturn(0); 59b9b97703SBarry Smith } 60b9b97703SBarry Smith 614a2ae208SSatish Balay #undef __FUNCT__ 624a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 63dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 6482b1193eSBarry Smith { 65dfbe8321SBarry Smith PetscErrorCode ierr; 664cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6782b1193eSBarry Smith 6882b1193eSBarry Smith PetscFunctionBegin; 69cd3bbe55SBarry Smith if (b->AIJ) { 70cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 7182b1193eSBarry Smith } 724cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 734cb9d9b8SBarry Smith PetscFunctionReturn(0); 744cb9d9b8SBarry Smith } 754cb9d9b8SBarry Smith 764a2ae208SSatish Balay #undef __FUNCT__ 770fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 780fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 790fd73130SBarry Smith { 800fd73130SBarry Smith PetscErrorCode ierr; 810fd73130SBarry Smith Mat B; 820fd73130SBarry Smith 830fd73130SBarry Smith PetscFunctionBegin; 84ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 850fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 860fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 870fd73130SBarry Smith PetscFunctionReturn(0); 880fd73130SBarry Smith } 890fd73130SBarry Smith 900fd73130SBarry Smith #undef __FUNCT__ 910fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 920fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 930fd73130SBarry Smith { 940fd73130SBarry Smith PetscErrorCode ierr; 950fd73130SBarry Smith Mat B; 960fd73130SBarry Smith 970fd73130SBarry Smith PetscFunctionBegin; 98ceb03754SKris Buschelman ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 990fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1000fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1010fd73130SBarry Smith PetscFunctionReturn(0); 1020fd73130SBarry Smith } 1030fd73130SBarry Smith 1040fd73130SBarry Smith #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 106dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1074cb9d9b8SBarry Smith { 108dfbe8321SBarry Smith PetscErrorCode ierr; 1094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1104cb9d9b8SBarry Smith 1114cb9d9b8SBarry Smith PetscFunctionBegin; 1124cb9d9b8SBarry Smith if (b->AIJ) { 1134cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 1144cb9d9b8SBarry Smith } 115f4a53059SBarry Smith if (b->OAIJ) { 116f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117f4a53059SBarry Smith } 118b9b97703SBarry Smith if (b->A) { 119b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 120b9b97703SBarry Smith } 121f4a53059SBarry Smith if (b->ctx) { 122f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123f4a53059SBarry Smith } 124f4a53059SBarry Smith if (b->w) { 125f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 126f4a53059SBarry Smith } 127cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 128b9b97703SBarry Smith PetscFunctionReturn(0); 129b9b97703SBarry Smith } 130b9b97703SBarry Smith 1310bad9183SKris Buschelman /*MC 132fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1330bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1340bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1350bad9183SKris Buschelman 1360bad9183SKris Buschelman Operations provided: 1370bad9183SKris Buschelman . MatMult 1380bad9183SKris Buschelman . MatMultTranspose 1390bad9183SKris Buschelman . MatMultAdd 1400bad9183SKris Buschelman . MatMultTransposeAdd 1410bad9183SKris Buschelman 1420bad9183SKris Buschelman Level: advanced 1430bad9183SKris Buschelman 1440bad9183SKris Buschelman .seealso: MatCreateSeqDense 1450bad9183SKris Buschelman M*/ 1460bad9183SKris Buschelman 14782b1193eSBarry Smith EXTERN_C_BEGIN 1484a2ae208SSatish Balay #undef __FUNCT__ 1494a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 150*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 15182b1193eSBarry Smith { 152dfbe8321SBarry Smith PetscErrorCode ierr; 1534cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15482b1193eSBarry Smith 15582b1193eSBarry Smith PetscFunctionBegin; 156b0a32e0cSBarry Smith ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 157b0a32e0cSBarry Smith A->data = (void*)b; 158cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 159cd3bbe55SBarry Smith A->factor = 0; 160cd3bbe55SBarry Smith A->mapping = 0; 161f4a53059SBarry Smith 162cd3bbe55SBarry Smith b->AIJ = 0; 163cd3bbe55SBarry Smith b->dof = 0; 164f4a53059SBarry Smith b->OAIJ = 0; 165f4a53059SBarry Smith b->ctx = 0; 166f4a53059SBarry Smith b->w = 0; 16782b1193eSBarry Smith PetscFunctionReturn(0); 16882b1193eSBarry Smith } 16982b1193eSBarry Smith EXTERN_C_END 17082b1193eSBarry Smith 171cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1724a2ae208SSatish Balay #undef __FUNCT__ 173b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 174dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 17582b1193eSBarry Smith { 1764cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 177bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 17887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 179dfbe8321SBarry Smith PetscErrorCode ierr; 180b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 181b24ad042SBarry Smith PetscInt n,i,jrow,j; 18282b1193eSBarry Smith 183bcc973b7SBarry Smith PetscFunctionBegin; 1841ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1851ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 186bcc973b7SBarry Smith idx = a->j; 187bcc973b7SBarry Smith v = a->a; 188bcc973b7SBarry Smith ii = a->i; 189bcc973b7SBarry Smith 190bcc973b7SBarry Smith for (i=0; i<m; i++) { 191bcc973b7SBarry Smith jrow = ii[i]; 192bcc973b7SBarry Smith n = ii[i+1] - jrow; 193bcc973b7SBarry Smith sum1 = 0.0; 194bcc973b7SBarry Smith sum2 = 0.0; 195bcc973b7SBarry Smith for (j=0; j<n; j++) { 196bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 197bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 198bcc973b7SBarry Smith jrow++; 199bcc973b7SBarry Smith } 200bcc973b7SBarry Smith y[2*i] = sum1; 201bcc973b7SBarry Smith y[2*i+1] = sum2; 202bcc973b7SBarry Smith } 203bcc973b7SBarry Smith 204efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 2051ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2061ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 20782b1193eSBarry Smith PetscFunctionReturn(0); 20882b1193eSBarry Smith } 209bcc973b7SBarry Smith 2104a2ae208SSatish Balay #undef __FUNCT__ 211b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 212dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 21382b1193eSBarry Smith { 2144cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 215bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 21687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 217dfbe8321SBarry Smith PetscErrorCode ierr; 218b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 21982b1193eSBarry Smith 220bcc973b7SBarry Smith PetscFunctionBegin; 221bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 2221ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2231ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 224bfec09a0SHong Zhang 225bcc973b7SBarry Smith for (i=0; i<m; i++) { 226bfec09a0SHong Zhang idx = a->j + a->i[i] ; 227bfec09a0SHong Zhang v = a->a + a->i[i] ; 228bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 229bcc973b7SBarry Smith alpha1 = x[2*i]; 230bcc973b7SBarry Smith alpha2 = x[2*i+1]; 231bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 232bcc973b7SBarry Smith } 233efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->n);CHKERRQ(ierr); 2341ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2351ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 23682b1193eSBarry Smith PetscFunctionReturn(0); 23782b1193eSBarry Smith } 238bcc973b7SBarry Smith 2394a2ae208SSatish Balay #undef __FUNCT__ 240b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 241dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 24282b1193eSBarry Smith { 2434cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 244bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 24587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 246dfbe8321SBarry Smith PetscErrorCode ierr; 247b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 248b24ad042SBarry Smith PetscInt n,i,jrow,j; 24982b1193eSBarry Smith 250bcc973b7SBarry Smith PetscFunctionBegin; 251f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2521ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2531ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 254bcc973b7SBarry Smith idx = a->j; 255bcc973b7SBarry Smith v = a->a; 256bcc973b7SBarry Smith ii = a->i; 257bcc973b7SBarry Smith 258bcc973b7SBarry Smith for (i=0; i<m; i++) { 259bcc973b7SBarry Smith jrow = ii[i]; 260bcc973b7SBarry Smith n = ii[i+1] - jrow; 261bcc973b7SBarry Smith sum1 = 0.0; 262bcc973b7SBarry Smith sum2 = 0.0; 263bcc973b7SBarry Smith for (j=0; j<n; j++) { 264bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 265bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 266bcc973b7SBarry Smith jrow++; 267bcc973b7SBarry Smith } 268bcc973b7SBarry Smith y[2*i] += sum1; 269bcc973b7SBarry Smith y[2*i+1] += sum2; 270bcc973b7SBarry Smith } 271bcc973b7SBarry Smith 272efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 2731ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2741ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 275bcc973b7SBarry Smith PetscFunctionReturn(0); 27682b1193eSBarry Smith } 2774a2ae208SSatish Balay #undef __FUNCT__ 278b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 279dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 28082b1193eSBarry Smith { 2814cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 282bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 284dfbe8321SBarry Smith PetscErrorCode ierr; 285b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 28682b1193eSBarry Smith 287bcc973b7SBarry Smith PetscFunctionBegin; 288f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2891ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2901ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 291bfec09a0SHong Zhang 292bcc973b7SBarry Smith for (i=0; i<m; i++) { 293bfec09a0SHong Zhang idx = a->j + a->i[i] ; 294bfec09a0SHong Zhang v = a->a + a->i[i] ; 295bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 296bcc973b7SBarry Smith alpha1 = x[2*i]; 297bcc973b7SBarry Smith alpha2 = x[2*i+1]; 298bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 299bcc973b7SBarry Smith } 300efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->n);CHKERRQ(ierr); 3011ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3021ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 303bcc973b7SBarry Smith PetscFunctionReturn(0); 30482b1193eSBarry Smith } 305cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3064a2ae208SSatish Balay #undef __FUNCT__ 307b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 308dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 309bcc973b7SBarry Smith { 3104cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 311bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 31287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 313dfbe8321SBarry Smith PetscErrorCode ierr; 314b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 315b24ad042SBarry Smith PetscInt n,i,jrow,j; 31682b1193eSBarry Smith 317bcc973b7SBarry Smith PetscFunctionBegin; 3181ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3191ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 320bcc973b7SBarry Smith idx = a->j; 321bcc973b7SBarry Smith v = a->a; 322bcc973b7SBarry Smith ii = a->i; 323bcc973b7SBarry Smith 324bcc973b7SBarry Smith for (i=0; i<m; i++) { 325bcc973b7SBarry Smith jrow = ii[i]; 326bcc973b7SBarry Smith n = ii[i+1] - jrow; 327bcc973b7SBarry Smith sum1 = 0.0; 328bcc973b7SBarry Smith sum2 = 0.0; 329bcc973b7SBarry Smith sum3 = 0.0; 330bcc973b7SBarry Smith for (j=0; j<n; j++) { 331bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 332bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 333bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 334bcc973b7SBarry Smith jrow++; 335bcc973b7SBarry Smith } 336bcc973b7SBarry Smith y[3*i] = sum1; 337bcc973b7SBarry Smith y[3*i+1] = sum2; 338bcc973b7SBarry Smith y[3*i+2] = sum3; 339bcc973b7SBarry Smith } 340bcc973b7SBarry Smith 341efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 3421ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3431ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 344bcc973b7SBarry Smith PetscFunctionReturn(0); 345bcc973b7SBarry Smith } 346bcc973b7SBarry Smith 3474a2ae208SSatish Balay #undef __FUNCT__ 348b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 349dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 350bcc973b7SBarry Smith { 3514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 35387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 354dfbe8321SBarry Smith PetscErrorCode ierr; 355b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 356bcc973b7SBarry Smith 357bcc973b7SBarry Smith PetscFunctionBegin; 358bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 3591ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3601ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 361bfec09a0SHong Zhang 362bcc973b7SBarry Smith for (i=0; i<m; i++) { 363bfec09a0SHong Zhang idx = a->j + a->i[i]; 364bfec09a0SHong Zhang v = a->a + a->i[i]; 365bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 366bcc973b7SBarry Smith alpha1 = x[3*i]; 367bcc973b7SBarry Smith alpha2 = x[3*i+1]; 368bcc973b7SBarry Smith alpha3 = x[3*i+2]; 369bcc973b7SBarry Smith while (n-->0) { 370bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 371bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 372bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 373bcc973b7SBarry Smith idx++; v++; 374bcc973b7SBarry Smith } 375bcc973b7SBarry Smith } 376efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->n);CHKERRQ(ierr); 3771ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3781ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 379bcc973b7SBarry Smith PetscFunctionReturn(0); 380bcc973b7SBarry Smith } 381bcc973b7SBarry Smith 3824a2ae208SSatish Balay #undef __FUNCT__ 383b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 384dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 385bcc973b7SBarry Smith { 3864cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 387bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 38887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 389dfbe8321SBarry Smith PetscErrorCode ierr; 390b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 391b24ad042SBarry Smith PetscInt n,i,jrow,j; 392bcc973b7SBarry Smith 393bcc973b7SBarry Smith PetscFunctionBegin; 394f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3951ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3961ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 397bcc973b7SBarry Smith idx = a->j; 398bcc973b7SBarry Smith v = a->a; 399bcc973b7SBarry Smith ii = a->i; 400bcc973b7SBarry Smith 401bcc973b7SBarry Smith for (i=0; i<m; i++) { 402bcc973b7SBarry Smith jrow = ii[i]; 403bcc973b7SBarry Smith n = ii[i+1] - jrow; 404bcc973b7SBarry Smith sum1 = 0.0; 405bcc973b7SBarry Smith sum2 = 0.0; 406bcc973b7SBarry Smith sum3 = 0.0; 407bcc973b7SBarry Smith for (j=0; j<n; j++) { 408bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 409bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 410bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 411bcc973b7SBarry Smith jrow++; 412bcc973b7SBarry Smith } 413bcc973b7SBarry Smith y[3*i] += sum1; 414bcc973b7SBarry Smith y[3*i+1] += sum2; 415bcc973b7SBarry Smith y[3*i+2] += sum3; 416bcc973b7SBarry Smith } 417bcc973b7SBarry Smith 418efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4191ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4201ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 421bcc973b7SBarry Smith PetscFunctionReturn(0); 422bcc973b7SBarry Smith } 4234a2ae208SSatish Balay #undef __FUNCT__ 424b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 425dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 426bcc973b7SBarry Smith { 4274cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 428bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 42987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 430dfbe8321SBarry Smith PetscErrorCode ierr; 431b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 432bcc973b7SBarry Smith 433bcc973b7SBarry Smith PetscFunctionBegin; 434f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4351ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4361ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 437bcc973b7SBarry Smith for (i=0; i<m; i++) { 438bfec09a0SHong Zhang idx = a->j + a->i[i] ; 439bfec09a0SHong Zhang v = a->a + a->i[i] ; 440bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 441bcc973b7SBarry Smith alpha1 = x[3*i]; 442bcc973b7SBarry Smith alpha2 = x[3*i+1]; 443bcc973b7SBarry Smith alpha3 = x[3*i+2]; 444bcc973b7SBarry Smith while (n-->0) { 445bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 446bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 447bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 448bcc973b7SBarry Smith idx++; v++; 449bcc973b7SBarry Smith } 450bcc973b7SBarry Smith } 451efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4521ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4531ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 454bcc973b7SBarry Smith PetscFunctionReturn(0); 455bcc973b7SBarry Smith } 456bcc973b7SBarry Smith 457bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4584a2ae208SSatish Balay #undef __FUNCT__ 459b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 460dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 461bcc973b7SBarry Smith { 4624cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 463bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 46487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 465dfbe8321SBarry Smith PetscErrorCode ierr; 466b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 467b24ad042SBarry Smith PetscInt n,i,jrow,j; 468bcc973b7SBarry Smith 469bcc973b7SBarry Smith PetscFunctionBegin; 4701ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4711ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 472bcc973b7SBarry Smith idx = a->j; 473bcc973b7SBarry Smith v = a->a; 474bcc973b7SBarry Smith ii = a->i; 475bcc973b7SBarry Smith 476bcc973b7SBarry Smith for (i=0; i<m; i++) { 477bcc973b7SBarry Smith jrow = ii[i]; 478bcc973b7SBarry Smith n = ii[i+1] - jrow; 479bcc973b7SBarry Smith sum1 = 0.0; 480bcc973b7SBarry Smith sum2 = 0.0; 481bcc973b7SBarry Smith sum3 = 0.0; 482bcc973b7SBarry Smith sum4 = 0.0; 483bcc973b7SBarry Smith for (j=0; j<n; j++) { 484bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 485bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 486bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 487bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 488bcc973b7SBarry Smith jrow++; 489bcc973b7SBarry Smith } 490bcc973b7SBarry Smith y[4*i] = sum1; 491bcc973b7SBarry Smith y[4*i+1] = sum2; 492bcc973b7SBarry Smith y[4*i+2] = sum3; 493bcc973b7SBarry Smith y[4*i+3] = sum4; 494bcc973b7SBarry Smith } 495bcc973b7SBarry Smith 496efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 4971ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4981ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 499bcc973b7SBarry Smith PetscFunctionReturn(0); 500bcc973b7SBarry Smith } 501bcc973b7SBarry Smith 5024a2ae208SSatish Balay #undef __FUNCT__ 503b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 504dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 505bcc973b7SBarry Smith { 5064cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 507bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 50887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 509dfbe8321SBarry Smith PetscErrorCode ierr; 510b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 511bcc973b7SBarry Smith 512bcc973b7SBarry Smith PetscFunctionBegin; 513bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 5141ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5151ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 516bcc973b7SBarry Smith for (i=0; i<m; i++) { 517bfec09a0SHong Zhang idx = a->j + a->i[i] ; 518bfec09a0SHong Zhang v = a->a + a->i[i] ; 519bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 520bcc973b7SBarry Smith alpha1 = x[4*i]; 521bcc973b7SBarry Smith alpha2 = x[4*i+1]; 522bcc973b7SBarry Smith alpha3 = x[4*i+2]; 523bcc973b7SBarry Smith alpha4 = x[4*i+3]; 524bcc973b7SBarry Smith while (n-->0) { 525bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 526bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 527bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 528bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 529bcc973b7SBarry Smith idx++; v++; 530bcc973b7SBarry Smith } 531bcc973b7SBarry Smith } 532efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->n);CHKERRQ(ierr); 5331ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5341ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 535bcc973b7SBarry Smith PetscFunctionReturn(0); 536bcc973b7SBarry Smith } 537bcc973b7SBarry Smith 5384a2ae208SSatish Balay #undef __FUNCT__ 539b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 540dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 541bcc973b7SBarry Smith { 5424cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 543f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 54487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 545dfbe8321SBarry Smith PetscErrorCode ierr; 546b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 547b24ad042SBarry Smith PetscInt n,i,jrow,j; 548f9fae5adSBarry Smith 549f9fae5adSBarry Smith PetscFunctionBegin; 550f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5511ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5521ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 553f9fae5adSBarry Smith idx = a->j; 554f9fae5adSBarry Smith v = a->a; 555f9fae5adSBarry Smith ii = a->i; 556f9fae5adSBarry Smith 557f9fae5adSBarry Smith for (i=0; i<m; i++) { 558f9fae5adSBarry Smith jrow = ii[i]; 559f9fae5adSBarry Smith n = ii[i+1] - jrow; 560f9fae5adSBarry Smith sum1 = 0.0; 561f9fae5adSBarry Smith sum2 = 0.0; 562f9fae5adSBarry Smith sum3 = 0.0; 563f9fae5adSBarry Smith sum4 = 0.0; 564f9fae5adSBarry Smith for (j=0; j<n; j++) { 565f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 566f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 567f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 568f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 569f9fae5adSBarry Smith jrow++; 570f9fae5adSBarry Smith } 571f9fae5adSBarry Smith y[4*i] += sum1; 572f9fae5adSBarry Smith y[4*i+1] += sum2; 573f9fae5adSBarry Smith y[4*i+2] += sum3; 574f9fae5adSBarry Smith y[4*i+3] += sum4; 575f9fae5adSBarry Smith } 576f9fae5adSBarry Smith 577efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 5781ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5791ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 580f9fae5adSBarry Smith PetscFunctionReturn(0); 581bcc973b7SBarry Smith } 5824a2ae208SSatish Balay #undef __FUNCT__ 583b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 584dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 585bcc973b7SBarry Smith { 5864cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 587f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 58887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 589dfbe8321SBarry Smith PetscErrorCode ierr; 590b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 591f9fae5adSBarry Smith 592f9fae5adSBarry Smith PetscFunctionBegin; 593f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5941ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5951ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 596bfec09a0SHong Zhang 597f9fae5adSBarry Smith for (i=0; i<m; i++) { 598bfec09a0SHong Zhang idx = a->j + a->i[i] ; 599bfec09a0SHong Zhang v = a->a + a->i[i] ; 600f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 601f9fae5adSBarry Smith alpha1 = x[4*i]; 602f9fae5adSBarry Smith alpha2 = x[4*i+1]; 603f9fae5adSBarry Smith alpha3 = x[4*i+2]; 604f9fae5adSBarry Smith alpha4 = x[4*i+3]; 605f9fae5adSBarry Smith while (n-->0) { 606f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 607f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 608f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 609f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 610f9fae5adSBarry Smith idx++; v++; 611f9fae5adSBarry Smith } 612f9fae5adSBarry Smith } 613efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->n);CHKERRQ(ierr); 6141ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6151ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 616f9fae5adSBarry Smith PetscFunctionReturn(0); 617f9fae5adSBarry Smith } 618f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6196cd98798SBarry Smith 6204a2ae208SSatish Balay #undef __FUNCT__ 621b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 622dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 623f9fae5adSBarry Smith { 6244cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 625f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 62687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 627dfbe8321SBarry Smith PetscErrorCode ierr; 628b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 629b24ad042SBarry Smith PetscInt n,i,jrow,j; 630f9fae5adSBarry Smith 631f9fae5adSBarry Smith PetscFunctionBegin; 6321ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6331ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 634f9fae5adSBarry Smith idx = a->j; 635f9fae5adSBarry Smith v = a->a; 636f9fae5adSBarry Smith ii = a->i; 637f9fae5adSBarry Smith 638f9fae5adSBarry Smith for (i=0; i<m; i++) { 639f9fae5adSBarry Smith jrow = ii[i]; 640f9fae5adSBarry Smith n = ii[i+1] - jrow; 641f9fae5adSBarry Smith sum1 = 0.0; 642f9fae5adSBarry Smith sum2 = 0.0; 643f9fae5adSBarry Smith sum3 = 0.0; 644f9fae5adSBarry Smith sum4 = 0.0; 645f9fae5adSBarry Smith sum5 = 0.0; 646f9fae5adSBarry Smith for (j=0; j<n; j++) { 647f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 648f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 649f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 650f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 651f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 652f9fae5adSBarry Smith jrow++; 653f9fae5adSBarry Smith } 654f9fae5adSBarry Smith y[5*i] = sum1; 655f9fae5adSBarry Smith y[5*i+1] = sum2; 656f9fae5adSBarry Smith y[5*i+2] = sum3; 657f9fae5adSBarry Smith y[5*i+3] = sum4; 658f9fae5adSBarry Smith y[5*i+4] = sum5; 659f9fae5adSBarry Smith } 660f9fae5adSBarry Smith 661efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 6621ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6631ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 664f9fae5adSBarry Smith PetscFunctionReturn(0); 665f9fae5adSBarry Smith } 666f9fae5adSBarry Smith 6674a2ae208SSatish Balay #undef __FUNCT__ 668b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 669dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 670f9fae5adSBarry Smith { 6714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 672f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 67387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 674dfbe8321SBarry Smith PetscErrorCode ierr; 675b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 676f9fae5adSBarry Smith 677f9fae5adSBarry Smith PetscFunctionBegin; 678f9fae5adSBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 6791ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6801ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 681bfec09a0SHong Zhang 682f9fae5adSBarry Smith for (i=0; i<m; i++) { 683bfec09a0SHong Zhang idx = a->j + a->i[i] ; 684bfec09a0SHong Zhang v = a->a + a->i[i] ; 685f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 686f9fae5adSBarry Smith alpha1 = x[5*i]; 687f9fae5adSBarry Smith alpha2 = x[5*i+1]; 688f9fae5adSBarry Smith alpha3 = x[5*i+2]; 689f9fae5adSBarry Smith alpha4 = x[5*i+3]; 690f9fae5adSBarry Smith alpha5 = x[5*i+4]; 691f9fae5adSBarry Smith while (n-->0) { 692f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 693f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 694f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 695f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 696f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 697f9fae5adSBarry Smith idx++; v++; 698f9fae5adSBarry Smith } 699f9fae5adSBarry Smith } 700efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->n);CHKERRQ(ierr); 7011ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7021ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 703f9fae5adSBarry Smith PetscFunctionReturn(0); 704f9fae5adSBarry Smith } 705f9fae5adSBarry Smith 7064a2ae208SSatish Balay #undef __FUNCT__ 707b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 708dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 709f9fae5adSBarry Smith { 7104cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 711f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 71287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 713dfbe8321SBarry Smith PetscErrorCode ierr; 714b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 715b24ad042SBarry Smith PetscInt n,i,jrow,j; 716f9fae5adSBarry Smith 717f9fae5adSBarry Smith PetscFunctionBegin; 718f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7191ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7201ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 721f9fae5adSBarry Smith idx = a->j; 722f9fae5adSBarry Smith v = a->a; 723f9fae5adSBarry Smith ii = a->i; 724f9fae5adSBarry Smith 725f9fae5adSBarry Smith for (i=0; i<m; i++) { 726f9fae5adSBarry Smith jrow = ii[i]; 727f9fae5adSBarry Smith n = ii[i+1] - jrow; 728f9fae5adSBarry Smith sum1 = 0.0; 729f9fae5adSBarry Smith sum2 = 0.0; 730f9fae5adSBarry Smith sum3 = 0.0; 731f9fae5adSBarry Smith sum4 = 0.0; 732f9fae5adSBarry Smith sum5 = 0.0; 733f9fae5adSBarry Smith for (j=0; j<n; j++) { 734f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 735f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 736f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 737f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 738f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 739f9fae5adSBarry Smith jrow++; 740f9fae5adSBarry Smith } 741f9fae5adSBarry Smith y[5*i] += sum1; 742f9fae5adSBarry Smith y[5*i+1] += sum2; 743f9fae5adSBarry Smith y[5*i+2] += sum3; 744f9fae5adSBarry Smith y[5*i+3] += sum4; 745f9fae5adSBarry Smith y[5*i+4] += sum5; 746f9fae5adSBarry Smith } 747f9fae5adSBarry Smith 748efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7491ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7501ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 751f9fae5adSBarry Smith PetscFunctionReturn(0); 752f9fae5adSBarry Smith } 753f9fae5adSBarry Smith 7544a2ae208SSatish Balay #undef __FUNCT__ 755b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 756dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 757f9fae5adSBarry Smith { 7584cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 759f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 76087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 761dfbe8321SBarry Smith PetscErrorCode ierr; 762b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 763f9fae5adSBarry Smith 764f9fae5adSBarry Smith PetscFunctionBegin; 765f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7661ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7671ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 768bfec09a0SHong Zhang 769f9fae5adSBarry Smith for (i=0; i<m; i++) { 770bfec09a0SHong Zhang idx = a->j + a->i[i] ; 771bfec09a0SHong Zhang v = a->a + a->i[i] ; 772f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 773f9fae5adSBarry Smith alpha1 = x[5*i]; 774f9fae5adSBarry Smith alpha2 = x[5*i+1]; 775f9fae5adSBarry Smith alpha3 = x[5*i+2]; 776f9fae5adSBarry Smith alpha4 = x[5*i+3]; 777f9fae5adSBarry Smith alpha5 = x[5*i+4]; 778f9fae5adSBarry Smith while (n-->0) { 779f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 780f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 781f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 782f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 783f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 784f9fae5adSBarry Smith idx++; v++; 785f9fae5adSBarry Smith } 786f9fae5adSBarry Smith } 787efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7881ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7891ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 790f9fae5adSBarry Smith PetscFunctionReturn(0); 791bcc973b7SBarry Smith } 792bcc973b7SBarry Smith 7936cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7944a2ae208SSatish Balay #undef __FUNCT__ 795b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 796dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 7976cd98798SBarry Smith { 7986cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 7996cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 80087828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 801dfbe8321SBarry Smith PetscErrorCode ierr; 802b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 803b24ad042SBarry Smith PetscInt n,i,jrow,j; 8046cd98798SBarry Smith 8056cd98798SBarry Smith PetscFunctionBegin; 8061ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8071ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8086cd98798SBarry Smith idx = a->j; 8096cd98798SBarry Smith v = a->a; 8106cd98798SBarry Smith ii = a->i; 8116cd98798SBarry Smith 8126cd98798SBarry Smith for (i=0; i<m; i++) { 8136cd98798SBarry Smith jrow = ii[i]; 8146cd98798SBarry Smith n = ii[i+1] - jrow; 8156cd98798SBarry Smith sum1 = 0.0; 8166cd98798SBarry Smith sum2 = 0.0; 8176cd98798SBarry Smith sum3 = 0.0; 8186cd98798SBarry Smith sum4 = 0.0; 8196cd98798SBarry Smith sum5 = 0.0; 8206cd98798SBarry Smith sum6 = 0.0; 8216cd98798SBarry Smith for (j=0; j<n; j++) { 8226cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8236cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8246cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8256cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8266cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8276cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8286cd98798SBarry Smith jrow++; 8296cd98798SBarry Smith } 8306cd98798SBarry Smith y[6*i] = sum1; 8316cd98798SBarry Smith y[6*i+1] = sum2; 8326cd98798SBarry Smith y[6*i+2] = sum3; 8336cd98798SBarry Smith y[6*i+3] = sum4; 8346cd98798SBarry Smith y[6*i+4] = sum5; 8356cd98798SBarry Smith y[6*i+5] = sum6; 8366cd98798SBarry Smith } 8376cd98798SBarry Smith 838efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 8391ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8401ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8416cd98798SBarry Smith PetscFunctionReturn(0); 8426cd98798SBarry Smith } 8436cd98798SBarry Smith 8444a2ae208SSatish Balay #undef __FUNCT__ 845b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 846dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8476cd98798SBarry Smith { 8486cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8496cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 85087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 851dfbe8321SBarry Smith PetscErrorCode ierr; 852b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 8536cd98798SBarry Smith 8546cd98798SBarry Smith PetscFunctionBegin; 8556cd98798SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 8561ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8571ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 858bfec09a0SHong Zhang 8596cd98798SBarry Smith for (i=0; i<m; i++) { 860bfec09a0SHong Zhang idx = a->j + a->i[i] ; 861bfec09a0SHong Zhang v = a->a + a->i[i] ; 8626cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8636cd98798SBarry Smith alpha1 = x[6*i]; 8646cd98798SBarry Smith alpha2 = x[6*i+1]; 8656cd98798SBarry Smith alpha3 = x[6*i+2]; 8666cd98798SBarry Smith alpha4 = x[6*i+3]; 8676cd98798SBarry Smith alpha5 = x[6*i+4]; 8686cd98798SBarry Smith alpha6 = x[6*i+5]; 8696cd98798SBarry Smith while (n-->0) { 8706cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8716cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8726cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8736cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8746cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8756cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8766cd98798SBarry Smith idx++; v++; 8776cd98798SBarry Smith } 8786cd98798SBarry Smith } 879efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->n);CHKERRQ(ierr); 8801ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8811ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8826cd98798SBarry Smith PetscFunctionReturn(0); 8836cd98798SBarry Smith } 8846cd98798SBarry Smith 8854a2ae208SSatish Balay #undef __FUNCT__ 886b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 887dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8886cd98798SBarry Smith { 8896cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8906cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 89187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 892dfbe8321SBarry Smith PetscErrorCode ierr; 893b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 894b24ad042SBarry Smith PetscInt n,i,jrow,j; 8956cd98798SBarry Smith 8966cd98798SBarry Smith PetscFunctionBegin; 8976cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8981ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8991ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9006cd98798SBarry Smith idx = a->j; 9016cd98798SBarry Smith v = a->a; 9026cd98798SBarry Smith ii = a->i; 9036cd98798SBarry Smith 9046cd98798SBarry Smith for (i=0; i<m; i++) { 9056cd98798SBarry Smith jrow = ii[i]; 9066cd98798SBarry Smith n = ii[i+1] - jrow; 9076cd98798SBarry Smith sum1 = 0.0; 9086cd98798SBarry Smith sum2 = 0.0; 9096cd98798SBarry Smith sum3 = 0.0; 9106cd98798SBarry Smith sum4 = 0.0; 9116cd98798SBarry Smith sum5 = 0.0; 9126cd98798SBarry Smith sum6 = 0.0; 9136cd98798SBarry Smith for (j=0; j<n; j++) { 9146cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9156cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9166cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9176cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9186cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9196cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9206cd98798SBarry Smith jrow++; 9216cd98798SBarry Smith } 9226cd98798SBarry Smith y[6*i] += sum1; 9236cd98798SBarry Smith y[6*i+1] += sum2; 9246cd98798SBarry Smith y[6*i+2] += sum3; 9256cd98798SBarry Smith y[6*i+3] += sum4; 9266cd98798SBarry Smith y[6*i+4] += sum5; 9276cd98798SBarry Smith y[6*i+5] += sum6; 9286cd98798SBarry Smith } 9296cd98798SBarry Smith 930efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9311ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9321ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9336cd98798SBarry Smith PetscFunctionReturn(0); 9346cd98798SBarry Smith } 9356cd98798SBarry Smith 9364a2ae208SSatish Balay #undef __FUNCT__ 937b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 938dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9396cd98798SBarry Smith { 9406cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9416cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 94287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 943dfbe8321SBarry Smith PetscErrorCode ierr; 944b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 9456cd98798SBarry Smith 9466cd98798SBarry Smith PetscFunctionBegin; 9476cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9481ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9491ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 950bfec09a0SHong Zhang 9516cd98798SBarry Smith for (i=0; i<m; i++) { 952bfec09a0SHong Zhang idx = a->j + a->i[i] ; 953bfec09a0SHong Zhang v = a->a + a->i[i] ; 9546cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9556cd98798SBarry Smith alpha1 = x[6*i]; 9566cd98798SBarry Smith alpha2 = x[6*i+1]; 9576cd98798SBarry Smith alpha3 = x[6*i+2]; 9586cd98798SBarry Smith alpha4 = x[6*i+3]; 9596cd98798SBarry Smith alpha5 = x[6*i+4]; 9606cd98798SBarry Smith alpha6 = x[6*i+5]; 9616cd98798SBarry Smith while (n-->0) { 9626cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9636cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9646cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9656cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9666cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9676cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9686cd98798SBarry Smith idx++; v++; 9696cd98798SBarry Smith } 9706cd98798SBarry Smith } 971efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9721ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9731ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9746cd98798SBarry Smith PetscFunctionReturn(0); 9756cd98798SBarry Smith } 9766cd98798SBarry Smith 97766ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 97866ed3db0SBarry Smith #undef __FUNCT__ 979ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 980ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 981ed8eea03SBarry Smith { 982ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 983ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 984ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 985ed8eea03SBarry Smith PetscErrorCode ierr; 986ed8eea03SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 987ed8eea03SBarry Smith PetscInt n,i,jrow,j; 988ed8eea03SBarry Smith 989ed8eea03SBarry Smith PetscFunctionBegin; 990ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 991ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 992ed8eea03SBarry Smith idx = a->j; 993ed8eea03SBarry Smith v = a->a; 994ed8eea03SBarry Smith ii = a->i; 995ed8eea03SBarry Smith 996ed8eea03SBarry Smith for (i=0; i<m; i++) { 997ed8eea03SBarry Smith jrow = ii[i]; 998ed8eea03SBarry Smith n = ii[i+1] - jrow; 999ed8eea03SBarry Smith sum1 = 0.0; 1000ed8eea03SBarry Smith sum2 = 0.0; 1001ed8eea03SBarry Smith sum3 = 0.0; 1002ed8eea03SBarry Smith sum4 = 0.0; 1003ed8eea03SBarry Smith sum5 = 0.0; 1004ed8eea03SBarry Smith sum6 = 0.0; 1005ed8eea03SBarry Smith sum7 = 0.0; 1006ed8eea03SBarry Smith for (j=0; j<n; j++) { 1007ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1008ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1009ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1010ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1011ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1012ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1013ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1014ed8eea03SBarry Smith jrow++; 1015ed8eea03SBarry Smith } 1016ed8eea03SBarry Smith y[7*i] = sum1; 1017ed8eea03SBarry Smith y[7*i+1] = sum2; 1018ed8eea03SBarry Smith y[7*i+2] = sum3; 1019ed8eea03SBarry Smith y[7*i+3] = sum4; 1020ed8eea03SBarry Smith y[7*i+4] = sum5; 1021ed8eea03SBarry Smith y[7*i+5] = sum6; 1022ed8eea03SBarry Smith y[7*i+6] = sum7; 1023ed8eea03SBarry Smith } 1024ed8eea03SBarry Smith 1025efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1026ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1027ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1028ed8eea03SBarry Smith PetscFunctionReturn(0); 1029ed8eea03SBarry Smith } 1030ed8eea03SBarry Smith 1031ed8eea03SBarry Smith #undef __FUNCT__ 1032ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1033ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1034ed8eea03SBarry Smith { 1035ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1036ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1037ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1038ed8eea03SBarry Smith PetscErrorCode ierr; 1039ed8eea03SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 1040ed8eea03SBarry Smith 1041ed8eea03SBarry Smith PetscFunctionBegin; 1042ed8eea03SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1043ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1044ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1045ed8eea03SBarry Smith 1046ed8eea03SBarry Smith for (i=0; i<m; i++) { 1047ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1048ed8eea03SBarry Smith v = a->a + a->i[i] ; 1049ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1050ed8eea03SBarry Smith alpha1 = x[7*i]; 1051ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1052ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1053ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1054ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1055ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1056ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1057ed8eea03SBarry Smith while (n-->0) { 1058ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1059ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1060ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1061ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1062ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1063ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1064ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1065ed8eea03SBarry Smith idx++; v++; 1066ed8eea03SBarry Smith } 1067ed8eea03SBarry Smith } 1068efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->n);CHKERRQ(ierr); 1069ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1070ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1071ed8eea03SBarry Smith PetscFunctionReturn(0); 1072ed8eea03SBarry Smith } 1073ed8eea03SBarry Smith 1074ed8eea03SBarry Smith #undef __FUNCT__ 1075ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1076ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1077ed8eea03SBarry Smith { 1078ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1079ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1080ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1081ed8eea03SBarry Smith PetscErrorCode ierr; 1082ed8eea03SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1083ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1084ed8eea03SBarry Smith 1085ed8eea03SBarry Smith PetscFunctionBegin; 1086ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1087ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1088ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1089ed8eea03SBarry Smith idx = a->j; 1090ed8eea03SBarry Smith v = a->a; 1091ed8eea03SBarry Smith ii = a->i; 1092ed8eea03SBarry Smith 1093ed8eea03SBarry Smith for (i=0; i<m; i++) { 1094ed8eea03SBarry Smith jrow = ii[i]; 1095ed8eea03SBarry Smith n = ii[i+1] - jrow; 1096ed8eea03SBarry Smith sum1 = 0.0; 1097ed8eea03SBarry Smith sum2 = 0.0; 1098ed8eea03SBarry Smith sum3 = 0.0; 1099ed8eea03SBarry Smith sum4 = 0.0; 1100ed8eea03SBarry Smith sum5 = 0.0; 1101ed8eea03SBarry Smith sum6 = 0.0; 1102ed8eea03SBarry Smith sum7 = 0.0; 1103ed8eea03SBarry Smith for (j=0; j<n; j++) { 1104ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1105ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1106ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1107ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1108ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1109ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1110ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1111ed8eea03SBarry Smith jrow++; 1112ed8eea03SBarry Smith } 1113ed8eea03SBarry Smith y[7*i] += sum1; 1114ed8eea03SBarry Smith y[7*i+1] += sum2; 1115ed8eea03SBarry Smith y[7*i+2] += sum3; 1116ed8eea03SBarry Smith y[7*i+3] += sum4; 1117ed8eea03SBarry Smith y[7*i+4] += sum5; 1118ed8eea03SBarry Smith y[7*i+5] += sum6; 1119ed8eea03SBarry Smith y[7*i+6] += sum7; 1120ed8eea03SBarry Smith } 1121ed8eea03SBarry Smith 1122efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1123ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1124ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1125ed8eea03SBarry Smith PetscFunctionReturn(0); 1126ed8eea03SBarry Smith } 1127ed8eea03SBarry Smith 1128ed8eea03SBarry Smith #undef __FUNCT__ 1129ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1130ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1131ed8eea03SBarry Smith { 1132ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1133ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1134ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1135ed8eea03SBarry Smith PetscErrorCode ierr; 1136ed8eea03SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 1137ed8eea03SBarry Smith 1138ed8eea03SBarry Smith PetscFunctionBegin; 1139ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1140ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1141ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1142ed8eea03SBarry Smith for (i=0; i<m; i++) { 1143ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1144ed8eea03SBarry Smith v = a->a + a->i[i] ; 1145ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1146ed8eea03SBarry Smith alpha1 = x[7*i]; 1147ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1148ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1149ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1150ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1151ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1152ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1153ed8eea03SBarry Smith while (n-->0) { 1154ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1155ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1156ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1157ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1158ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1159ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1160ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1161ed8eea03SBarry Smith idx++; v++; 1162ed8eea03SBarry Smith } 1163ed8eea03SBarry Smith } 1164efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1165ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1166ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1167ed8eea03SBarry Smith PetscFunctionReturn(0); 1168ed8eea03SBarry Smith } 1169ed8eea03SBarry Smith 1170ed8eea03SBarry Smith #undef __FUNCT__ 117166ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1172dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 117366ed3db0SBarry Smith { 117466ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 117566ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 117687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1177dfbe8321SBarry Smith PetscErrorCode ierr; 1178b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1179b24ad042SBarry Smith PetscInt n,i,jrow,j; 118066ed3db0SBarry Smith 118166ed3db0SBarry Smith PetscFunctionBegin; 11821ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 11831ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 118466ed3db0SBarry Smith idx = a->j; 118566ed3db0SBarry Smith v = a->a; 118666ed3db0SBarry Smith ii = a->i; 118766ed3db0SBarry Smith 118866ed3db0SBarry Smith for (i=0; i<m; i++) { 118966ed3db0SBarry Smith jrow = ii[i]; 119066ed3db0SBarry Smith n = ii[i+1] - jrow; 119166ed3db0SBarry Smith sum1 = 0.0; 119266ed3db0SBarry Smith sum2 = 0.0; 119366ed3db0SBarry Smith sum3 = 0.0; 119466ed3db0SBarry Smith sum4 = 0.0; 119566ed3db0SBarry Smith sum5 = 0.0; 119666ed3db0SBarry Smith sum6 = 0.0; 119766ed3db0SBarry Smith sum7 = 0.0; 119866ed3db0SBarry Smith sum8 = 0.0; 119966ed3db0SBarry Smith for (j=0; j<n; j++) { 120066ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 120166ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 120266ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 120366ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 120466ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 120566ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 120666ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 120766ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 120866ed3db0SBarry Smith jrow++; 120966ed3db0SBarry Smith } 121066ed3db0SBarry Smith y[8*i] = sum1; 121166ed3db0SBarry Smith y[8*i+1] = sum2; 121266ed3db0SBarry Smith y[8*i+2] = sum3; 121366ed3db0SBarry Smith y[8*i+3] = sum4; 121466ed3db0SBarry Smith y[8*i+4] = sum5; 121566ed3db0SBarry Smith y[8*i+5] = sum6; 121666ed3db0SBarry Smith y[8*i+6] = sum7; 121766ed3db0SBarry Smith y[8*i+7] = sum8; 121866ed3db0SBarry Smith } 121966ed3db0SBarry Smith 1220efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 12211ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12221ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 122366ed3db0SBarry Smith PetscFunctionReturn(0); 122466ed3db0SBarry Smith } 122566ed3db0SBarry Smith 122666ed3db0SBarry Smith #undef __FUNCT__ 122766ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1228dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 122966ed3db0SBarry Smith { 123066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 123166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 123287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1233dfbe8321SBarry Smith PetscErrorCode ierr; 1234b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 123566ed3db0SBarry Smith 123666ed3db0SBarry Smith PetscFunctionBegin; 123766ed3db0SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 12381ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12391ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1240bfec09a0SHong Zhang 124166ed3db0SBarry Smith for (i=0; i<m; i++) { 1242bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1243bfec09a0SHong Zhang v = a->a + a->i[i] ; 124466ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 124566ed3db0SBarry Smith alpha1 = x[8*i]; 124666ed3db0SBarry Smith alpha2 = x[8*i+1]; 124766ed3db0SBarry Smith alpha3 = x[8*i+2]; 124866ed3db0SBarry Smith alpha4 = x[8*i+3]; 124966ed3db0SBarry Smith alpha5 = x[8*i+4]; 125066ed3db0SBarry Smith alpha6 = x[8*i+5]; 125166ed3db0SBarry Smith alpha7 = x[8*i+6]; 125266ed3db0SBarry Smith alpha8 = x[8*i+7]; 125366ed3db0SBarry Smith while (n-->0) { 125466ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 125566ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 125666ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 125766ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 125866ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 125966ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 126066ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 126166ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 126266ed3db0SBarry Smith idx++; v++; 126366ed3db0SBarry Smith } 126466ed3db0SBarry Smith } 1265efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->n);CHKERRQ(ierr); 12661ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12671ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 126866ed3db0SBarry Smith PetscFunctionReturn(0); 126966ed3db0SBarry Smith } 127066ed3db0SBarry Smith 127166ed3db0SBarry Smith #undef __FUNCT__ 127266ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1273dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 127466ed3db0SBarry Smith { 127566ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 127666ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 127787828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1278dfbe8321SBarry Smith PetscErrorCode ierr; 1279b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1280b24ad042SBarry Smith PetscInt n,i,jrow,j; 128166ed3db0SBarry Smith 128266ed3db0SBarry Smith PetscFunctionBegin; 128366ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12841ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12851ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 128666ed3db0SBarry Smith idx = a->j; 128766ed3db0SBarry Smith v = a->a; 128866ed3db0SBarry Smith ii = a->i; 128966ed3db0SBarry Smith 129066ed3db0SBarry Smith for (i=0; i<m; i++) { 129166ed3db0SBarry Smith jrow = ii[i]; 129266ed3db0SBarry Smith n = ii[i+1] - jrow; 129366ed3db0SBarry Smith sum1 = 0.0; 129466ed3db0SBarry Smith sum2 = 0.0; 129566ed3db0SBarry Smith sum3 = 0.0; 129666ed3db0SBarry Smith sum4 = 0.0; 129766ed3db0SBarry Smith sum5 = 0.0; 129866ed3db0SBarry Smith sum6 = 0.0; 129966ed3db0SBarry Smith sum7 = 0.0; 130066ed3db0SBarry Smith sum8 = 0.0; 130166ed3db0SBarry Smith for (j=0; j<n; j++) { 130266ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 130366ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 130466ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 130566ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 130666ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 130766ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 130866ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 130966ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 131066ed3db0SBarry Smith jrow++; 131166ed3db0SBarry Smith } 131266ed3db0SBarry Smith y[8*i] += sum1; 131366ed3db0SBarry Smith y[8*i+1] += sum2; 131466ed3db0SBarry Smith y[8*i+2] += sum3; 131566ed3db0SBarry Smith y[8*i+3] += sum4; 131666ed3db0SBarry Smith y[8*i+4] += sum5; 131766ed3db0SBarry Smith y[8*i+5] += sum6; 131866ed3db0SBarry Smith y[8*i+6] += sum7; 131966ed3db0SBarry Smith y[8*i+7] += sum8; 132066ed3db0SBarry Smith } 132166ed3db0SBarry Smith 1322efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13231ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13241ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 132566ed3db0SBarry Smith PetscFunctionReturn(0); 132666ed3db0SBarry Smith } 132766ed3db0SBarry Smith 132866ed3db0SBarry Smith #undef __FUNCT__ 132966ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1330dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 133166ed3db0SBarry Smith { 133266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 133366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 133487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1335dfbe8321SBarry Smith PetscErrorCode ierr; 1336b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 133766ed3db0SBarry Smith 133866ed3db0SBarry Smith PetscFunctionBegin; 133966ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13401ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13411ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 134266ed3db0SBarry Smith for (i=0; i<m; i++) { 1343bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1344bfec09a0SHong Zhang v = a->a + a->i[i] ; 134566ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 134666ed3db0SBarry Smith alpha1 = x[8*i]; 134766ed3db0SBarry Smith alpha2 = x[8*i+1]; 134866ed3db0SBarry Smith alpha3 = x[8*i+2]; 134966ed3db0SBarry Smith alpha4 = x[8*i+3]; 135066ed3db0SBarry Smith alpha5 = x[8*i+4]; 135166ed3db0SBarry Smith alpha6 = x[8*i+5]; 135266ed3db0SBarry Smith alpha7 = x[8*i+6]; 135366ed3db0SBarry Smith alpha8 = x[8*i+7]; 135466ed3db0SBarry Smith while (n-->0) { 135566ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 135666ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 135766ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 135866ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 135966ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 136066ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 136166ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 136266ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 136366ed3db0SBarry Smith idx++; v++; 136466ed3db0SBarry Smith } 136566ed3db0SBarry Smith } 1366efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13671ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13681ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 13692f7816d4SBarry Smith PetscFunctionReturn(0); 13702f7816d4SBarry Smith } 13712f7816d4SBarry Smith 1372ed8eea03SBarry Smith 13732b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 13742b692628SMatthew Knepley #undef __FUNCT__ 13752b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1376dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 13772b692628SMatthew Knepley { 13782b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13792b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13802b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1381dfbe8321SBarry Smith PetscErrorCode ierr; 1382b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1383b24ad042SBarry Smith PetscInt n,i,jrow,j; 13842b692628SMatthew Knepley 13852b692628SMatthew Knepley PetscFunctionBegin; 13861ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13871ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 13882b692628SMatthew Knepley idx = a->j; 13892b692628SMatthew Knepley v = a->a; 13902b692628SMatthew Knepley ii = a->i; 13912b692628SMatthew Knepley 13922b692628SMatthew Knepley for (i=0; i<m; i++) { 13932b692628SMatthew Knepley jrow = ii[i]; 13942b692628SMatthew Knepley n = ii[i+1] - jrow; 13952b692628SMatthew Knepley sum1 = 0.0; 13962b692628SMatthew Knepley sum2 = 0.0; 13972b692628SMatthew Knepley sum3 = 0.0; 13982b692628SMatthew Knepley sum4 = 0.0; 13992b692628SMatthew Knepley sum5 = 0.0; 14002b692628SMatthew Knepley sum6 = 0.0; 14012b692628SMatthew Knepley sum7 = 0.0; 14022b692628SMatthew Knepley sum8 = 0.0; 14032b692628SMatthew Knepley sum9 = 0.0; 14042b692628SMatthew Knepley for (j=0; j<n; j++) { 14052b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14062b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14072b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14082b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14092b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14102b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14112b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14122b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14132b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14142b692628SMatthew Knepley jrow++; 14152b692628SMatthew Knepley } 14162b692628SMatthew Knepley y[9*i] = sum1; 14172b692628SMatthew Knepley y[9*i+1] = sum2; 14182b692628SMatthew Knepley y[9*i+2] = sum3; 14192b692628SMatthew Knepley y[9*i+3] = sum4; 14202b692628SMatthew Knepley y[9*i+4] = sum5; 14212b692628SMatthew Knepley y[9*i+5] = sum6; 14222b692628SMatthew Knepley y[9*i+6] = sum7; 14232b692628SMatthew Knepley y[9*i+7] = sum8; 14242b692628SMatthew Knepley y[9*i+8] = sum9; 14252b692628SMatthew Knepley } 14262b692628SMatthew Knepley 1427efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 14281ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14291ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14302b692628SMatthew Knepley PetscFunctionReturn(0); 14312b692628SMatthew Knepley } 14322b692628SMatthew Knepley 14332b692628SMatthew Knepley #undef __FUNCT__ 14342b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1435dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14362b692628SMatthew Knepley { 14372b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14382b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14392b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1440dfbe8321SBarry Smith PetscErrorCode ierr; 1441b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 14422b692628SMatthew Knepley 14432b692628SMatthew Knepley PetscFunctionBegin; 14442b692628SMatthew Knepley ierr = VecSet(&zero,yy);CHKERRQ(ierr); 14451ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14461ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14472b692628SMatthew Knepley 14482b692628SMatthew Knepley for (i=0; i<m; i++) { 14492b692628SMatthew Knepley idx = a->j + a->i[i] ; 14502b692628SMatthew Knepley v = a->a + a->i[i] ; 14512b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14522b692628SMatthew Knepley alpha1 = x[9*i]; 14532b692628SMatthew Knepley alpha2 = x[9*i+1]; 14542b692628SMatthew Knepley alpha3 = x[9*i+2]; 14552b692628SMatthew Knepley alpha4 = x[9*i+3]; 14562b692628SMatthew Knepley alpha5 = x[9*i+4]; 14572b692628SMatthew Knepley alpha6 = x[9*i+5]; 14582b692628SMatthew Knepley alpha7 = x[9*i+6]; 14592b692628SMatthew Knepley alpha8 = x[9*i+7]; 14602f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14612b692628SMatthew Knepley while (n-->0) { 14622b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 14632b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 14642b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 14652b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 14662b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 14672b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 14682b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 14692b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 14702b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 14712b692628SMatthew Knepley idx++; v++; 14722b692628SMatthew Knepley } 14732b692628SMatthew Knepley } 1474efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->n);CHKERRQ(ierr); 14751ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14761ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14772b692628SMatthew Knepley PetscFunctionReturn(0); 14782b692628SMatthew Knepley } 14792b692628SMatthew Knepley 14802b692628SMatthew Knepley #undef __FUNCT__ 14812b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1482dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 14832b692628SMatthew Knepley { 14842b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14852b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14862b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1487dfbe8321SBarry Smith PetscErrorCode ierr; 1488b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1489b24ad042SBarry Smith PetscInt n,i,jrow,j; 14902b692628SMatthew Knepley 14912b692628SMatthew Knepley PetscFunctionBegin; 14922b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 14931ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14941ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 14952b692628SMatthew Knepley idx = a->j; 14962b692628SMatthew Knepley v = a->a; 14972b692628SMatthew Knepley ii = a->i; 14982b692628SMatthew Knepley 14992b692628SMatthew Knepley for (i=0; i<m; i++) { 15002b692628SMatthew Knepley jrow = ii[i]; 15012b692628SMatthew Knepley n = ii[i+1] - jrow; 15022b692628SMatthew Knepley sum1 = 0.0; 15032b692628SMatthew Knepley sum2 = 0.0; 15042b692628SMatthew Knepley sum3 = 0.0; 15052b692628SMatthew Knepley sum4 = 0.0; 15062b692628SMatthew Knepley sum5 = 0.0; 15072b692628SMatthew Knepley sum6 = 0.0; 15082b692628SMatthew Knepley sum7 = 0.0; 15092b692628SMatthew Knepley sum8 = 0.0; 15102b692628SMatthew Knepley sum9 = 0.0; 15112b692628SMatthew Knepley for (j=0; j<n; j++) { 15122b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15132b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15142b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15152b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15162b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15172b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15182b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15192b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15202b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15212b692628SMatthew Knepley jrow++; 15222b692628SMatthew Knepley } 15232b692628SMatthew Knepley y[9*i] += sum1; 15242b692628SMatthew Knepley y[9*i+1] += sum2; 15252b692628SMatthew Knepley y[9*i+2] += sum3; 15262b692628SMatthew Knepley y[9*i+3] += sum4; 15272b692628SMatthew Knepley y[9*i+4] += sum5; 15282b692628SMatthew Knepley y[9*i+5] += sum6; 15292b692628SMatthew Knepley y[9*i+6] += sum7; 15302b692628SMatthew Knepley y[9*i+7] += sum8; 15312b692628SMatthew Knepley y[9*i+8] += sum9; 15322b692628SMatthew Knepley } 15332b692628SMatthew Knepley 1534efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15351ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15361ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15372b692628SMatthew Knepley PetscFunctionReturn(0); 15382b692628SMatthew Knepley } 15392b692628SMatthew Knepley 15402b692628SMatthew Knepley #undef __FUNCT__ 15412b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1542dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15432b692628SMatthew Knepley { 15442b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15452b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15462b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1547dfbe8321SBarry Smith PetscErrorCode ierr; 1548b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 15492b692628SMatthew Knepley 15502b692628SMatthew Knepley PetscFunctionBegin; 15512b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15521ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15531ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15542b692628SMatthew Knepley for (i=0; i<m; i++) { 15552b692628SMatthew Knepley idx = a->j + a->i[i] ; 15562b692628SMatthew Knepley v = a->a + a->i[i] ; 15572b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15582b692628SMatthew Knepley alpha1 = x[9*i]; 15592b692628SMatthew Knepley alpha2 = x[9*i+1]; 15602b692628SMatthew Knepley alpha3 = x[9*i+2]; 15612b692628SMatthew Knepley alpha4 = x[9*i+3]; 15622b692628SMatthew Knepley alpha5 = x[9*i+4]; 15632b692628SMatthew Knepley alpha6 = x[9*i+5]; 15642b692628SMatthew Knepley alpha7 = x[9*i+6]; 15652b692628SMatthew Knepley alpha8 = x[9*i+7]; 15662b692628SMatthew Knepley alpha9 = x[9*i+8]; 15672b692628SMatthew Knepley while (n-->0) { 15682b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15692b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15702b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15712b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15722b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15732b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15742b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15752b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15762b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15772b692628SMatthew Knepley idx++; v++; 15782b692628SMatthew Knepley } 15792b692628SMatthew Knepley } 1580efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15811ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15821ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15832b692628SMatthew Knepley PetscFunctionReturn(0); 15842b692628SMatthew Knepley } 15852b692628SMatthew Knepley 15862f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 15872f7816d4SBarry Smith #undef __FUNCT__ 15882f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 1589dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 15902f7816d4SBarry Smith { 15912f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15922f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15932f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 15942f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1595dfbe8321SBarry Smith PetscErrorCode ierr; 1596b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1597b24ad042SBarry Smith PetscInt n,i,jrow,j; 15982f7816d4SBarry Smith 15992f7816d4SBarry Smith PetscFunctionBegin; 16001ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 16011ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 16022f7816d4SBarry Smith idx = a->j; 16032f7816d4SBarry Smith v = a->a; 16042f7816d4SBarry Smith ii = a->i; 16052f7816d4SBarry Smith 16062f7816d4SBarry Smith for (i=0; i<m; i++) { 16072f7816d4SBarry Smith jrow = ii[i]; 16082f7816d4SBarry Smith n = ii[i+1] - jrow; 16092f7816d4SBarry Smith sum1 = 0.0; 16102f7816d4SBarry Smith sum2 = 0.0; 16112f7816d4SBarry Smith sum3 = 0.0; 16122f7816d4SBarry Smith sum4 = 0.0; 16132f7816d4SBarry Smith sum5 = 0.0; 16142f7816d4SBarry Smith sum6 = 0.0; 16152f7816d4SBarry Smith sum7 = 0.0; 16162f7816d4SBarry Smith sum8 = 0.0; 16172f7816d4SBarry Smith sum9 = 0.0; 16182f7816d4SBarry Smith sum10 = 0.0; 16192f7816d4SBarry Smith sum11 = 0.0; 16202f7816d4SBarry Smith sum12 = 0.0; 16212f7816d4SBarry Smith sum13 = 0.0; 16222f7816d4SBarry Smith sum14 = 0.0; 16232f7816d4SBarry Smith sum15 = 0.0; 16242f7816d4SBarry Smith sum16 = 0.0; 16252f7816d4SBarry Smith for (j=0; j<n; j++) { 16262f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 16272f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 16282f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 16292f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 16302f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 16312f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 16322f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 16332f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 16342f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 16352f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 16362f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 16372f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 16382f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 16392f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 16402f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 16412f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 16422f7816d4SBarry Smith jrow++; 16432f7816d4SBarry Smith } 16442f7816d4SBarry Smith y[16*i] = sum1; 16452f7816d4SBarry Smith y[16*i+1] = sum2; 16462f7816d4SBarry Smith y[16*i+2] = sum3; 16472f7816d4SBarry Smith y[16*i+3] = sum4; 16482f7816d4SBarry Smith y[16*i+4] = sum5; 16492f7816d4SBarry Smith y[16*i+5] = sum6; 16502f7816d4SBarry Smith y[16*i+6] = sum7; 16512f7816d4SBarry Smith y[16*i+7] = sum8; 16522f7816d4SBarry Smith y[16*i+8] = sum9; 16532f7816d4SBarry Smith y[16*i+9] = sum10; 16542f7816d4SBarry Smith y[16*i+10] = sum11; 16552f7816d4SBarry Smith y[16*i+11] = sum12; 16562f7816d4SBarry Smith y[16*i+12] = sum13; 16572f7816d4SBarry Smith y[16*i+13] = sum14; 16582f7816d4SBarry Smith y[16*i+14] = sum15; 16592f7816d4SBarry Smith y[16*i+15] = sum16; 16602f7816d4SBarry Smith } 16612f7816d4SBarry Smith 1662efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 16631ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 16641ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 16652f7816d4SBarry Smith PetscFunctionReturn(0); 16662f7816d4SBarry Smith } 16672f7816d4SBarry Smith 16682f7816d4SBarry Smith #undef __FUNCT__ 16692f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1670dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 16712f7816d4SBarry Smith { 16722f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 16732f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 16742f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 16752f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1676dfbe8321SBarry Smith PetscErrorCode ierr; 1677b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 16782f7816d4SBarry Smith 16792f7816d4SBarry Smith PetscFunctionBegin; 16802f7816d4SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 16811ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 16821ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1683bfec09a0SHong Zhang 16842f7816d4SBarry Smith for (i=0; i<m; i++) { 1685bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1686bfec09a0SHong Zhang v = a->a + a->i[i] ; 16872f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 16882f7816d4SBarry Smith alpha1 = x[16*i]; 16892f7816d4SBarry Smith alpha2 = x[16*i+1]; 16902f7816d4SBarry Smith alpha3 = x[16*i+2]; 16912f7816d4SBarry Smith alpha4 = x[16*i+3]; 16922f7816d4SBarry Smith alpha5 = x[16*i+4]; 16932f7816d4SBarry Smith alpha6 = x[16*i+5]; 16942f7816d4SBarry Smith alpha7 = x[16*i+6]; 16952f7816d4SBarry Smith alpha8 = x[16*i+7]; 16962f7816d4SBarry Smith alpha9 = x[16*i+8]; 16972f7816d4SBarry Smith alpha10 = x[16*i+9]; 16982f7816d4SBarry Smith alpha11 = x[16*i+10]; 16992f7816d4SBarry Smith alpha12 = x[16*i+11]; 17002f7816d4SBarry Smith alpha13 = x[16*i+12]; 17012f7816d4SBarry Smith alpha14 = x[16*i+13]; 17022f7816d4SBarry Smith alpha15 = x[16*i+14]; 17032f7816d4SBarry Smith alpha16 = x[16*i+15]; 17042f7816d4SBarry Smith while (n-->0) { 17052f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 17062f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 17072f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 17082f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 17092f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 17102f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 17112f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 17122f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 17132f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 17142f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 17152f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 17162f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 17172f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 17182f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 17192f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 17202f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 17212f7816d4SBarry Smith idx++; v++; 17222f7816d4SBarry Smith } 17232f7816d4SBarry Smith } 1724efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->n);CHKERRQ(ierr); 17251ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 17261ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 17272f7816d4SBarry Smith PetscFunctionReturn(0); 17282f7816d4SBarry Smith } 17292f7816d4SBarry Smith 17302f7816d4SBarry Smith #undef __FUNCT__ 17312f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1732dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 17332f7816d4SBarry Smith { 17342f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 17352f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 17362f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 17372f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1738dfbe8321SBarry Smith PetscErrorCode ierr; 1739b24ad042SBarry Smith PetscInt m = b->AIJ->m,*idx,*ii; 1740b24ad042SBarry Smith PetscInt n,i,jrow,j; 17412f7816d4SBarry Smith 17422f7816d4SBarry Smith PetscFunctionBegin; 17432f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 17441ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 17451ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 17462f7816d4SBarry Smith idx = a->j; 17472f7816d4SBarry Smith v = a->a; 17482f7816d4SBarry Smith ii = a->i; 17492f7816d4SBarry Smith 17502f7816d4SBarry Smith for (i=0; i<m; i++) { 17512f7816d4SBarry Smith jrow = ii[i]; 17522f7816d4SBarry Smith n = ii[i+1] - jrow; 17532f7816d4SBarry Smith sum1 = 0.0; 17542f7816d4SBarry Smith sum2 = 0.0; 17552f7816d4SBarry Smith sum3 = 0.0; 17562f7816d4SBarry Smith sum4 = 0.0; 17572f7816d4SBarry Smith sum5 = 0.0; 17582f7816d4SBarry Smith sum6 = 0.0; 17592f7816d4SBarry Smith sum7 = 0.0; 17602f7816d4SBarry Smith sum8 = 0.0; 17612f7816d4SBarry Smith sum9 = 0.0; 17622f7816d4SBarry Smith sum10 = 0.0; 17632f7816d4SBarry Smith sum11 = 0.0; 17642f7816d4SBarry Smith sum12 = 0.0; 17652f7816d4SBarry Smith sum13 = 0.0; 17662f7816d4SBarry Smith sum14 = 0.0; 17672f7816d4SBarry Smith sum15 = 0.0; 17682f7816d4SBarry Smith sum16 = 0.0; 17692f7816d4SBarry Smith for (j=0; j<n; j++) { 17702f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 17712f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 17722f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 17732f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 17742f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 17752f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 17762f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 17772f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 17782f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 17792f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 17802f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 17812f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 17822f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 17832f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 17842f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 17852f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 17862f7816d4SBarry Smith jrow++; 17872f7816d4SBarry Smith } 17882f7816d4SBarry Smith y[16*i] += sum1; 17892f7816d4SBarry Smith y[16*i+1] += sum2; 17902f7816d4SBarry Smith y[16*i+2] += sum3; 17912f7816d4SBarry Smith y[16*i+3] += sum4; 17922f7816d4SBarry Smith y[16*i+4] += sum5; 17932f7816d4SBarry Smith y[16*i+5] += sum6; 17942f7816d4SBarry Smith y[16*i+6] += sum7; 17952f7816d4SBarry Smith y[16*i+7] += sum8; 17962f7816d4SBarry Smith y[16*i+8] += sum9; 17972f7816d4SBarry Smith y[16*i+9] += sum10; 17982f7816d4SBarry Smith y[16*i+10] += sum11; 17992f7816d4SBarry Smith y[16*i+11] += sum12; 18002f7816d4SBarry Smith y[16*i+12] += sum13; 18012f7816d4SBarry Smith y[16*i+13] += sum14; 18022f7816d4SBarry Smith y[16*i+14] += sum15; 18032f7816d4SBarry Smith y[16*i+15] += sum16; 18042f7816d4SBarry Smith } 18052f7816d4SBarry Smith 1806efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 18071ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 18081ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 18092f7816d4SBarry Smith PetscFunctionReturn(0); 18102f7816d4SBarry Smith } 18112f7816d4SBarry Smith 18122f7816d4SBarry Smith #undef __FUNCT__ 18132f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1814dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 18152f7816d4SBarry Smith { 18162f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 18172f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18182f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 18192f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1820dfbe8321SBarry Smith PetscErrorCode ierr; 1821b24ad042SBarry Smith PetscInt m = b->AIJ->m,n,i,*idx; 18222f7816d4SBarry Smith 18232f7816d4SBarry Smith PetscFunctionBegin; 18242f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 18251ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 18261ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 18272f7816d4SBarry Smith for (i=0; i<m; i++) { 1828bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1829bfec09a0SHong Zhang v = a->a + a->i[i] ; 18302f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 18312f7816d4SBarry Smith alpha1 = x[16*i]; 18322f7816d4SBarry Smith alpha2 = x[16*i+1]; 18332f7816d4SBarry Smith alpha3 = x[16*i+2]; 18342f7816d4SBarry Smith alpha4 = x[16*i+3]; 18352f7816d4SBarry Smith alpha5 = x[16*i+4]; 18362f7816d4SBarry Smith alpha6 = x[16*i+5]; 18372f7816d4SBarry Smith alpha7 = x[16*i+6]; 18382f7816d4SBarry Smith alpha8 = x[16*i+7]; 18392f7816d4SBarry Smith alpha9 = x[16*i+8]; 18402f7816d4SBarry Smith alpha10 = x[16*i+9]; 18412f7816d4SBarry Smith alpha11 = x[16*i+10]; 18422f7816d4SBarry Smith alpha12 = x[16*i+11]; 18432f7816d4SBarry Smith alpha13 = x[16*i+12]; 18442f7816d4SBarry Smith alpha14 = x[16*i+13]; 18452f7816d4SBarry Smith alpha15 = x[16*i+14]; 18462f7816d4SBarry Smith alpha16 = x[16*i+15]; 18472f7816d4SBarry Smith while (n-->0) { 18482f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 18492f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 18502f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 18512f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 18522f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 18532f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 18542f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 18552f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 18562f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 18572f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 18582f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 18592f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 18602f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 18612f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 18622f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 18632f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 18642f7816d4SBarry Smith idx++; v++; 18652f7816d4SBarry Smith } 18662f7816d4SBarry Smith } 1867efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 18681ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 18691ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 187066ed3db0SBarry Smith PetscFunctionReturn(0); 187166ed3db0SBarry Smith } 187266ed3db0SBarry Smith 1873f4a53059SBarry Smith /*===================================================================================*/ 18744a2ae208SSatish Balay #undef __FUNCT__ 18754a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1876dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1877f4a53059SBarry Smith { 18784cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1879dfbe8321SBarry Smith PetscErrorCode ierr; 1880f4a53059SBarry Smith 1881b24ad042SBarry Smith PetscFunctionBegin; 1882f4a53059SBarry Smith /* start the scatter */ 1883f4a53059SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 18844cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1885f4a53059SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 18864cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1887f4a53059SBarry Smith PetscFunctionReturn(0); 1888f4a53059SBarry Smith } 1889f4a53059SBarry Smith 18904a2ae208SSatish Balay #undef __FUNCT__ 18914a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1892dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 18934cb9d9b8SBarry Smith { 18944cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1895dfbe8321SBarry Smith PetscErrorCode ierr; 1896b24ad042SBarry Smith 18974cb9d9b8SBarry Smith PetscFunctionBegin; 18984cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 18994cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1900a5ff213dSBarry Smith ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 19014cb9d9b8SBarry Smith ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 19024cb9d9b8SBarry Smith PetscFunctionReturn(0); 19034cb9d9b8SBarry Smith } 19044cb9d9b8SBarry Smith 19054a2ae208SSatish Balay #undef __FUNCT__ 19064a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1907dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 19084cb9d9b8SBarry Smith { 19094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1910dfbe8321SBarry Smith PetscErrorCode ierr; 19114cb9d9b8SBarry Smith 1912b24ad042SBarry Smith PetscFunctionBegin; 19134cb9d9b8SBarry Smith /* start the scatter */ 19144cb9d9b8SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1915d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 19164cb9d9b8SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1917717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 19184cb9d9b8SBarry Smith PetscFunctionReturn(0); 19194cb9d9b8SBarry Smith } 19204cb9d9b8SBarry Smith 19214a2ae208SSatish Balay #undef __FUNCT__ 19224a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1923dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 19244cb9d9b8SBarry Smith { 19254cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1926dfbe8321SBarry Smith PetscErrorCode ierr; 1927b24ad042SBarry Smith 19284cb9d9b8SBarry Smith PetscFunctionBegin; 19294cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1930d72c5c08SBarry Smith ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1931d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1932d72c5c08SBarry Smith ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 19334cb9d9b8SBarry Smith PetscFunctionReturn(0); 19344cb9d9b8SBarry Smith } 19354cb9d9b8SBarry Smith 19369c4fc4c7SBarry Smith #undef __FUNCT__ 19377ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 19387ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 19397ba1a0bfSKris Buschelman { 19407ba1a0bfSKris Buschelman /* This routine requires testing -- but it's getting better. */ 19417ba1a0bfSKris Buschelman PetscErrorCode ierr; 19427ba1a0bfSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 19437ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 19447ba1a0bfSKris Buschelman Mat P=pp->AIJ; 19457ba1a0bfSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 19467ba1a0bfSKris Buschelman PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 19477ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 19487ba1a0bfSKris Buschelman PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn; 19497ba1a0bfSKris Buschelman PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 19507ba1a0bfSKris Buschelman MatScalar *ca; 19517ba1a0bfSKris Buschelman 19527ba1a0bfSKris Buschelman PetscFunctionBegin; 19537ba1a0bfSKris Buschelman /* Start timer */ 19547ba1a0bfSKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 19557ba1a0bfSKris Buschelman 19567ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 19577ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 19587ba1a0bfSKris Buschelman 19597ba1a0bfSKris Buschelman cn = pn*ppdof; 19607ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 19617ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 19627ba1a0bfSKris Buschelman ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 19637ba1a0bfSKris Buschelman ci[0] = 0; 19647ba1a0bfSKris Buschelman 19657ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 19667ba1a0bfSKris Buschelman ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 19677ba1a0bfSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 19687ba1a0bfSKris Buschelman ptasparserow = ptadenserow + an; 19697ba1a0bfSKris Buschelman denserow = ptasparserow + an; 19707ba1a0bfSKris Buschelman sparserow = denserow + cn; 19717ba1a0bfSKris Buschelman 19727ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 19737ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 19747ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 19757ba1a0bfSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 19767ba1a0bfSKris Buschelman current_space = free_space; 19777ba1a0bfSKris Buschelman 19787ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 19797ba1a0bfSKris Buschelman for (i=0;i<pn;i++) { 19807ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 19817ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 19827ba1a0bfSKris Buschelman for (dof=0;dof<ppdof;dof++) { 19837ba1a0bfSKris Buschelman ptanzi = 0; 19847ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 19857ba1a0bfSKris Buschelman for (j=0;j<ptnzi;j++) { 19867ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 19877ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 19887ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 19897ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 19907ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 19917ba1a0bfSKris Buschelman for (k=0;k<anzj;k++) { 19927ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 19937ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 19947ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 19957ba1a0bfSKris Buschelman } 19967ba1a0bfSKris Buschelman } 19977ba1a0bfSKris Buschelman } 19987ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 19997ba1a0bfSKris Buschelman ptaj = ptasparserow; 20007ba1a0bfSKris Buschelman cnzi = 0; 20017ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 20027ba1a0bfSKris Buschelman /* Get offset within block of P */ 20037ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 20047ba1a0bfSKris Buschelman /* Get block row of P */ 20057ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 20067ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 20077ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 20087ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 20097ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 20107ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 20117ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 20127ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 20137ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 20147ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 20157ba1a0bfSKris Buschelman } 20167ba1a0bfSKris Buschelman } 20177ba1a0bfSKris Buschelman } 20187ba1a0bfSKris Buschelman 20197ba1a0bfSKris Buschelman /* sort sparserow */ 20207ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 20217ba1a0bfSKris Buschelman 20227ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 20237ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 20247ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 20257ba1a0bfSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 20267ba1a0bfSKris Buschelman } 20277ba1a0bfSKris Buschelman 20287ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 20297ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 20307ba1a0bfSKris Buschelman current_space->array += cnzi; 20317ba1a0bfSKris Buschelman current_space->local_used += cnzi; 20327ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 20337ba1a0bfSKris Buschelman 20347ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 20357ba1a0bfSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 20367ba1a0bfSKris Buschelman } 20377ba1a0bfSKris Buschelman for (j=0;j<cnzi;j++) { 20387ba1a0bfSKris Buschelman denserow[sparserow[j]] = 0; 20397ba1a0bfSKris Buschelman } 20407ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 20417ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 20427ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 20437ba1a0bfSKris Buschelman } 20447ba1a0bfSKris Buschelman } 20457ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 20467ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 20477ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 20487ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 20497ba1a0bfSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 20507ba1a0bfSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 20517ba1a0bfSKris Buschelman 20527ba1a0bfSKris Buschelman /* Allocate space for ca */ 20537ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 20547ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 20557ba1a0bfSKris Buschelman 20567ba1a0bfSKris Buschelman /* put together the new matrix */ 20577ba1a0bfSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 20587ba1a0bfSKris Buschelman 20597ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 20607ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 20617ba1a0bfSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 20627ba1a0bfSKris Buschelman c->freedata = PETSC_TRUE; 20637ba1a0bfSKris Buschelman c->nonew = 0; 20647ba1a0bfSKris Buschelman 20657ba1a0bfSKris Buschelman /* Clean up. */ 20667ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 20677ba1a0bfSKris Buschelman 20687ba1a0bfSKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 20697ba1a0bfSKris Buschelman PetscFunctionReturn(0); 20707ba1a0bfSKris Buschelman } 20717ba1a0bfSKris Buschelman 20727ba1a0bfSKris Buschelman #undef __FUNCT__ 20737ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 20747ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 20757ba1a0bfSKris Buschelman { 20767ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 20777ba1a0bfSKris Buschelman PetscErrorCode ierr; 20787ba1a0bfSKris Buschelman PetscInt flops=0; 20797ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 20807ba1a0bfSKris Buschelman Mat P=pp->AIJ; 20817ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 20827ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 20837ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 20847ba1a0bfSKris Buschelman PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 20857ba1a0bfSKris Buschelman PetscInt *ci=c->i,*cj=c->j,*cjj; 20867ba1a0bfSKris Buschelman PetscInt am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof; 20877ba1a0bfSKris Buschelman PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 20887ba1a0bfSKris Buschelman MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 20897ba1a0bfSKris Buschelman 20907ba1a0bfSKris Buschelman PetscFunctionBegin; 20917ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 20927ba1a0bfSKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 20937ba1a0bfSKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 20947ba1a0bfSKris Buschelman 20957ba1a0bfSKris Buschelman apj = (PetscInt *)(apa + cn); 20967ba1a0bfSKris Buschelman apjdense = apj + cn; 20977ba1a0bfSKris Buschelman 20987ba1a0bfSKris Buschelman /* Clear old values in C */ 20997ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 21007ba1a0bfSKris Buschelman 21017ba1a0bfSKris Buschelman for (i=0;i<am;i++) { 21027ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 21037ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 21047ba1a0bfSKris Buschelman apnzj = 0; 21057ba1a0bfSKris Buschelman for (j=0;j<anzi;j++) { 21067ba1a0bfSKris Buschelman /* Get offset within block of P */ 21077ba1a0bfSKris Buschelman pshift = *aj%ppdof; 21087ba1a0bfSKris Buschelman /* Get block row of P */ 21097ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 21107ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 21117ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 21127ba1a0bfSKris Buschelman paj = pa + pi[prow]; 21137ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 21147ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 21157ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 21167ba1a0bfSKris Buschelman apjdense[poffset] = -1; 21177ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 21187ba1a0bfSKris Buschelman } 21197ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 21207ba1a0bfSKris Buschelman } 21217ba1a0bfSKris Buschelman flops += 2*pnzj; 21227ba1a0bfSKris Buschelman aa++; 21237ba1a0bfSKris Buschelman } 21247ba1a0bfSKris Buschelman 21257ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 21267ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 21277ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 21287ba1a0bfSKris Buschelman 21297ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 21307ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 21317ba1a0bfSKris Buschelman pshift = i%ppdof; 21327ba1a0bfSKris Buschelman poffset = pi[prow]; 21337ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 21347ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 21357ba1a0bfSKris Buschelman pJ = pj+poffset; 21367ba1a0bfSKris Buschelman pA = pa+poffset; 21377ba1a0bfSKris Buschelman for (j=0;j<pnzi;j++) { 21387ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 21397ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 21407ba1a0bfSKris Buschelman caj = ca + ci[crow]; 21417ba1a0bfSKris Buschelman pJ++; 21427ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 21437ba1a0bfSKris Buschelman for (k=0,nextap=0;nextap<apnzj;k++) { 21447ba1a0bfSKris Buschelman if (cjj[k]==apj[nextap]) { 21457ba1a0bfSKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 21467ba1a0bfSKris Buschelman } 21477ba1a0bfSKris Buschelman } 21487ba1a0bfSKris Buschelman flops += 2*apnzj; 21497ba1a0bfSKris Buschelman pA++; 21507ba1a0bfSKris Buschelman } 21517ba1a0bfSKris Buschelman 21527ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 21537ba1a0bfSKris Buschelman for (j=0;j<apnzj;j++) { 21547ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 21557ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 21567ba1a0bfSKris Buschelman } 21577ba1a0bfSKris Buschelman } 21587ba1a0bfSKris Buschelman 21597ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 21607ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21617ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21627ba1a0bfSKris Buschelman ierr = PetscFree(apa);CHKERRQ(ierr); 21637ba1a0bfSKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 21647ba1a0bfSKris Buschelman 21657ba1a0bfSKris Buschelman PetscFunctionReturn(0); 21667ba1a0bfSKris Buschelman } 21677ba1a0bfSKris Buschelman 2168*be1d678aSKris Buschelman EXTERN_C_BEGIN 21697ba1a0bfSKris Buschelman #undef __FUNCT__ 21700fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2171*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 21729c4fc4c7SBarry Smith { 21739c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2174ceb03754SKris Buschelman Mat a = b->AIJ,B; 21759c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 21769c4fc4c7SBarry Smith PetscErrorCode ierr; 21770fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2178ba8c8a56SBarry Smith PetscInt *cols; 2179ba8c8a56SBarry Smith PetscScalar *vals; 21809c4fc4c7SBarry Smith 21819c4fc4c7SBarry Smith PetscFunctionBegin; 21829c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 21830fd73130SBarry Smith ierr = PetscMalloc(dof*m*sizeof(int),&ilen);CHKERRQ(ierr); 21849c4fc4c7SBarry Smith for (i=0; i<m; i++) { 21859c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 21860fd73130SBarry Smith for (j=0; j<dof; j++) { 21870fd73130SBarry Smith ilen[dof*i+j] = aij->ilen[i]; 21889c4fc4c7SBarry Smith } 21899c4fc4c7SBarry Smith } 2190ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2191ceb03754SKris Buschelman ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 21929c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 21939c4fc4c7SBarry Smith ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 21949c4fc4c7SBarry Smith ii = 0; 21959c4fc4c7SBarry Smith for (i=0; i<m; i++) { 2196ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 21970fd73130SBarry Smith for (j=0; j<dof; j++) { 21989c4fc4c7SBarry Smith for (k=0; k<ncols; k++) { 21990fd73130SBarry Smith icols[k] = dof*cols[k]+j; 22009c4fc4c7SBarry Smith } 2201ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 22029c4fc4c7SBarry Smith ii++; 22039c4fc4c7SBarry Smith } 2204ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 22059c4fc4c7SBarry Smith } 22069c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 2207ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2208ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2209ceb03754SKris Buschelman 2210ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 22118ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2212c3d102feSKris Buschelman } else { 2213ceb03754SKris Buschelman *newmat = B; 2214c3d102feSKris Buschelman } 22159c4fc4c7SBarry Smith PetscFunctionReturn(0); 22169c4fc4c7SBarry Smith } 2217*be1d678aSKris Buschelman EXTERN_C_END 22189c4fc4c7SBarry Smith 22190fd73130SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 2220*be1d678aSKris Buschelman 2221*be1d678aSKris Buschelman EXTERN_C_BEGIN 22220fd73130SBarry Smith #undef __FUNCT__ 22230fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2224*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 22250fd73130SBarry Smith { 22260fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2227ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 22280fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 22290fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 22300fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 22310fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 22327a1a7badSBarry Smith PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols; 22330fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 22340fd73130SBarry Smith PetscErrorCode ierr; 22350fd73130SBarry Smith PetscScalar *vals,*ovals; 22360fd73130SBarry Smith 22370fd73130SBarry Smith PetscFunctionBegin; 22387a1a7badSBarry Smith ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr); 22390fd73130SBarry Smith for (i=0; i<A->m/dof; i++) { 22400fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 22410fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 22420fd73130SBarry Smith for (j=0; j<dof; j++) { 22430fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 22440fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 22450fd73130SBarry Smith } 22460fd73130SBarry Smith } 2247ceb03754SKris Buschelman ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2248ceb03754SKris Buschelman ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 22490fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 22500fd73130SBarry Smith 22517a1a7badSBarry Smith ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 22520fd73130SBarry Smith rstart = dof*mpiaij->rstart; 22530fd73130SBarry Smith cstart = dof*mpiaij->cstart; 22540fd73130SBarry Smith garray = mpiaij->garray; 22550fd73130SBarry Smith 22560fd73130SBarry Smith ii = rstart; 22570fd73130SBarry Smith for (i=0; i<A->m/dof; i++) { 22580fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 22590fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 22600fd73130SBarry Smith for (j=0; j<dof; j++) { 22610fd73130SBarry Smith for (k=0; k<ncols; k++) { 22620fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 22630fd73130SBarry Smith } 22640fd73130SBarry Smith for (k=0; k<oncols; k++) { 22650fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 22660fd73130SBarry Smith } 2267ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2268ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 22690fd73130SBarry Smith ii++; 22700fd73130SBarry Smith } 22710fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 22720fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 22730fd73130SBarry Smith } 22740fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 22750fd73130SBarry Smith 2276ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2277ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2278ceb03754SKris Buschelman 2279ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 22808ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2281c3d102feSKris Buschelman } else { 2282ceb03754SKris Buschelman *newmat = B; 2283c3d102feSKris Buschelman } 22840fd73130SBarry Smith PetscFunctionReturn(0); 22850fd73130SBarry Smith } 2286*be1d678aSKris Buschelman EXTERN_C_END 22870fd73130SBarry Smith 22880fd73130SBarry Smith 2289bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 22905983afb6SSatish Balay /*MC 22910bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 22920bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 22930bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 22940bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 22950bad9183SKris Buschelman 22960bad9183SKris Buschelman Operations provided: 22970fd73130SBarry Smith + MatMult 22980bad9183SKris Buschelman . MatMultTranspose 22990bad9183SKris Buschelman . MatMultAdd 23000bad9183SKris Buschelman . MatMultTransposeAdd 23010fd73130SBarry Smith - MatView 23020bad9183SKris Buschelman 23030bad9183SKris Buschelman Level: advanced 23040bad9183SKris Buschelman 23050bad9183SKris Buschelman M*/ 23064a2ae208SSatish Balay #undef __FUNCT__ 23074a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 2308*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 230982b1193eSBarry Smith { 2310dfbe8321SBarry Smith PetscErrorCode ierr; 2311b24ad042SBarry Smith PetscMPIInt size; 2312b24ad042SBarry Smith PetscInt n; 23134cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 231482b1193eSBarry Smith Mat B; 231582b1193eSBarry Smith 231682b1193eSBarry Smith PetscFunctionBegin; 2317d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2318d72c5c08SBarry Smith 2319d72c5c08SBarry Smith if (dof == 1) { 2320d72c5c08SBarry Smith *maij = A; 2321d72c5c08SBarry Smith } else { 232283903688SBarry Smith ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 2323cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 2324d72c5c08SBarry Smith 2325f4a53059SBarry Smith ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2326f4a53059SBarry Smith if (size == 1) { 2327b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 23284cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 23290fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 2330b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2331b9b97703SBarry Smith b->dof = dof; 23324cb9d9b8SBarry Smith b->AIJ = A; 2333d72c5c08SBarry Smith if (dof == 2) { 2334cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 2335cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2336cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2337cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2338bcc973b7SBarry Smith } else if (dof == 3) { 2339bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 2340bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2341bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2342bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2343bcc973b7SBarry Smith } else if (dof == 4) { 2344bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 2345bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2346bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2347bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2348f9fae5adSBarry Smith } else if (dof == 5) { 2349f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 2350f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2351f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2352f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 23536cd98798SBarry Smith } else if (dof == 6) { 23546cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 23556cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 23566cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 23576cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2358ed8eea03SBarry Smith } else if (dof == 7) { 2359ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 2360ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2361ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2362ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 236366ed3db0SBarry Smith } else if (dof == 8) { 236466ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 236566ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 236666ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 236766ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 23682b692628SMatthew Knepley } else if (dof == 9) { 23692b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 23702b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 23712b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 23722b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 23732f7816d4SBarry Smith } else if (dof == 16) { 23742f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 23752f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 23762f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 23772f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 237882b1193eSBarry Smith } else { 237977431f27SBarry Smith SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 238082b1193eSBarry Smith } 23817ba1a0bfSKris Buschelman B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 23827ba1a0bfSKris Buschelman B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 23839c4fc4c7SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2384f4a53059SBarry Smith } else { 2385f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2386f4a53059SBarry Smith IS from,to; 2387f4a53059SBarry Smith Vec gvec; 2388b24ad042SBarry Smith PetscInt *garray,i; 2389f4a53059SBarry Smith 2390b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2391d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 23920fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 2393b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2394b9b97703SBarry Smith b->dof = dof; 2395b9b97703SBarry Smith b->A = A; 23964cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 23974cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 23984cb9d9b8SBarry Smith 2399f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2400f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 240113288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2402f4a53059SBarry Smith 2403f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 2404b24ad042SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2405f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2406f4a53059SBarry Smith ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2407f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 2408f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2409f4a53059SBarry Smith 2410f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 2411f4a53059SBarry Smith ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 241213288a74SBarry Smith ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2413f4a53059SBarry Smith 2414f4a53059SBarry Smith /* generate the scatter context */ 2415f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2416f4a53059SBarry Smith 2417f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 2418f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 2419f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 2420f4a53059SBarry Smith 2421f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 24224cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 24234cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 24244cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 24250fd73130SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2426f4a53059SBarry Smith } 2427cd3bbe55SBarry Smith *maij = B; 24280fd73130SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 2429d72c5c08SBarry Smith } 243082b1193eSBarry Smith PetscFunctionReturn(0); 243182b1193eSBarry Smith } 243282b1193eSBarry Smith 243382b1193eSBarry Smith 243482b1193eSBarry Smith 243582b1193eSBarry Smith 243682b1193eSBarry Smith 243782b1193eSBarry Smith 243882b1193eSBarry Smith 243982b1193eSBarry Smith 244082b1193eSBarry Smith 244182b1193eSBarry Smith 244282b1193eSBarry Smith 244382b1193eSBarry Smith 2444