1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 382b1193eSBarry Smith /* 4cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 5cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 6cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 7cd3bbe55SBarry Smith independently. 8cd3bbe55SBarry Smith 9cd3bbe55SBarry Smith We provide: 10cd3bbe55SBarry Smith MatMult() 11cd3bbe55SBarry Smith MatMultTranspose() 12cd3bbe55SBarry Smith MatMultTransposeAdd() 13cd3bbe55SBarry Smith MatMultAdd() 14cd3bbe55SBarry Smith and 15cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 16f4a53059SBarry Smith 17f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1882b1193eSBarry Smith */ 1982b1193eSBarry Smith 20*ff585edeSJed Brown #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/ 217c4f633dSBarry Smith #include "../src/mat/utils/freespace.h" 221d8d5f9aSSatish Balay #include "private/vecimpl.h" 2382b1193eSBarry Smith 244a2ae208SSatish Balay #undef __FUNCT__ 254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 26*ff585edeSJed Brown /*@C 27*ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 28*ff585edeSJed Brown 29*ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 30*ff585edeSJed Brown 31*ff585edeSJed Brown Input Parameter: 32*ff585edeSJed Brown . A - the MAIJ matrix 33*ff585edeSJed Brown 34*ff585edeSJed Brown Output Parameter: 35*ff585edeSJed Brown . B - the AIJ matrix 36*ff585edeSJed Brown 37*ff585edeSJed Brown Level: advanced 38*ff585edeSJed Brown 39*ff585edeSJed Brown Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 40*ff585edeSJed Brown 41*ff585edeSJed Brown .seealso: MatCreateMAIJ() 42*ff585edeSJed Brown @*/ 43be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 44b9b97703SBarry Smith { 45dfbe8321SBarry Smith PetscErrorCode ierr; 46b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 47b9b97703SBarry Smith 48b9b97703SBarry Smith PetscFunctionBegin; 49b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 50b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 51b9b97703SBarry Smith if (ismpimaij) { 52b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 53b9b97703SBarry Smith 54b9b97703SBarry Smith *B = b->A; 55b9b97703SBarry Smith } else if (isseqmaij) { 56b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 57b9b97703SBarry Smith 58b9b97703SBarry Smith *B = b->AIJ; 59b9b97703SBarry Smith } else { 60b9b97703SBarry Smith *B = A; 61b9b97703SBarry Smith } 62b9b97703SBarry Smith PetscFunctionReturn(0); 63b9b97703SBarry Smith } 64b9b97703SBarry Smith 654a2ae208SSatish Balay #undef __FUNCT__ 664a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 67*ff585edeSJed Brown /*@C 68*ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 69*ff585edeSJed Brown 70*ff585edeSJed Brown Collective 71*ff585edeSJed Brown 72*ff585edeSJed Brown Input Parameter: 73*ff585edeSJed Brown + A - the MAIJ matrix 74*ff585edeSJed Brown - dof - the block size for the new matrix 75*ff585edeSJed Brown 76*ff585edeSJed Brown Output Parameter: 77*ff585edeSJed Brown . B - the new MAIJ matrix 78*ff585edeSJed Brown 79*ff585edeSJed Brown Level: advanced 80*ff585edeSJed Brown 81*ff585edeSJed Brown .seealso: MatCreateMAIJ() 82*ff585edeSJed Brown @*/ 83be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 84b9b97703SBarry Smith { 85dfbe8321SBarry Smith PetscErrorCode ierr; 86b9b97703SBarry Smith Mat Aij; 87b9b97703SBarry Smith 88b9b97703SBarry Smith PetscFunctionBegin; 89b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91b9b97703SBarry Smith PetscFunctionReturn(0); 92b9b97703SBarry Smith } 93b9b97703SBarry Smith 944a2ae208SSatish Balay #undef __FUNCT__ 954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 9782b1193eSBarry Smith { 98dfbe8321SBarry Smith PetscErrorCode ierr; 994cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 10082b1193eSBarry Smith 10182b1193eSBarry Smith PetscFunctionBegin; 102cd3bbe55SBarry Smith if (b->AIJ) { 103cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 10482b1193eSBarry Smith } 1054cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 1064cb9d9b8SBarry Smith PetscFunctionReturn(0); 1074cb9d9b8SBarry Smith } 1084cb9d9b8SBarry Smith 1094a2ae208SSatish Balay #undef __FUNCT__ 1100fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 1110fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1120fd73130SBarry Smith { 1130fd73130SBarry Smith PetscErrorCode ierr; 1140fd73130SBarry Smith Mat B; 1150fd73130SBarry Smith 1160fd73130SBarry Smith PetscFunctionBegin; 117ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 1180fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1190fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1200fd73130SBarry Smith PetscFunctionReturn(0); 1210fd73130SBarry Smith } 1220fd73130SBarry Smith 1230fd73130SBarry Smith #undef __FUNCT__ 1240fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 1250fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1260fd73130SBarry Smith { 1270fd73130SBarry Smith PetscErrorCode ierr; 1280fd73130SBarry Smith Mat B; 1290fd73130SBarry Smith 1300fd73130SBarry Smith PetscFunctionBegin; 131ceb03754SKris Buschelman ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 1320fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1330fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1340fd73130SBarry Smith PetscFunctionReturn(0); 1350fd73130SBarry Smith } 1360fd73130SBarry Smith 1370fd73130SBarry Smith #undef __FUNCT__ 1384a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 139dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1404cb9d9b8SBarry Smith { 141dfbe8321SBarry Smith PetscErrorCode ierr; 1424cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1434cb9d9b8SBarry Smith 1444cb9d9b8SBarry Smith PetscFunctionBegin; 1454cb9d9b8SBarry Smith if (b->AIJ) { 1464cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 1474cb9d9b8SBarry Smith } 148f4a53059SBarry Smith if (b->OAIJ) { 149f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 150f4a53059SBarry Smith } 151b9b97703SBarry Smith if (b->A) { 152b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 153b9b97703SBarry Smith } 154f4a53059SBarry Smith if (b->ctx) { 155f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 156f4a53059SBarry Smith } 157f4a53059SBarry Smith if (b->w) { 158f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 159f4a53059SBarry Smith } 160cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 161dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 162b9b97703SBarry Smith PetscFunctionReturn(0); 163b9b97703SBarry Smith } 164b9b97703SBarry Smith 1650bad9183SKris Buschelman /*MC 166fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1670bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1680bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1690bad9183SKris Buschelman 1700bad9183SKris Buschelman Operations provided: 1710bad9183SKris Buschelman . MatMult 1720bad9183SKris Buschelman . MatMultTranspose 1730bad9183SKris Buschelman . MatMultAdd 1740bad9183SKris Buschelman . MatMultTransposeAdd 1750bad9183SKris Buschelman 1760bad9183SKris Buschelman Level: advanced 1770bad9183SKris Buschelman 1780bad9183SKris Buschelman .seealso: MatCreateSeqDense 1790bad9183SKris Buschelman M*/ 1800bad9183SKris Buschelman 18182b1193eSBarry Smith EXTERN_C_BEGIN 1824a2ae208SSatish Balay #undef __FUNCT__ 1834a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 184be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 18582b1193eSBarry Smith { 186dfbe8321SBarry Smith PetscErrorCode ierr; 1874cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 18864b52464SHong Zhang PetscMPIInt size; 18982b1193eSBarry Smith 19082b1193eSBarry Smith PetscFunctionBegin; 19138f2d2fdSLisandro Dalcin ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 192b0a32e0cSBarry Smith A->data = (void*)b; 193cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 194cd3bbe55SBarry Smith A->mapping = 0; 195f4a53059SBarry Smith 196cd3bbe55SBarry Smith b->AIJ = 0; 197cd3bbe55SBarry Smith b->dof = 0; 198f4a53059SBarry Smith b->OAIJ = 0; 199f4a53059SBarry Smith b->ctx = 0; 200f4a53059SBarry Smith b->w = 0; 2017adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 20264b52464SHong Zhang if (size == 1){ 20364b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 20464b52464SHong Zhang } else { 20564b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 20664b52464SHong Zhang } 20782b1193eSBarry Smith PetscFunctionReturn(0); 20882b1193eSBarry Smith } 20982b1193eSBarry Smith EXTERN_C_END 21082b1193eSBarry Smith 211cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 2124a2ae208SSatish Balay #undef __FUNCT__ 213b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 214dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 21582b1193eSBarry Smith { 2164cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 217bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 21887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 219dfbe8321SBarry Smith PetscErrorCode ierr; 220d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 221b24ad042SBarry Smith PetscInt n,i,jrow,j; 22282b1193eSBarry Smith 223bcc973b7SBarry Smith PetscFunctionBegin; 2241ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2251ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 226bcc973b7SBarry Smith idx = a->j; 227bcc973b7SBarry Smith v = a->a; 228bcc973b7SBarry Smith ii = a->i; 229bcc973b7SBarry Smith 230bcc973b7SBarry Smith for (i=0; i<m; i++) { 231bcc973b7SBarry Smith jrow = ii[i]; 232bcc973b7SBarry Smith n = ii[i+1] - jrow; 233bcc973b7SBarry Smith sum1 = 0.0; 234bcc973b7SBarry Smith sum2 = 0.0; 235b3c51c66SMatthew Knepley nonzerorow += (n>0); 236bcc973b7SBarry Smith for (j=0; j<n; j++) { 237bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 238bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 239bcc973b7SBarry Smith jrow++; 240bcc973b7SBarry Smith } 241bcc973b7SBarry Smith y[2*i] = sum1; 242bcc973b7SBarry Smith y[2*i+1] = sum2; 243bcc973b7SBarry Smith } 244bcc973b7SBarry Smith 245b3c51c66SMatthew Knepley ierr = PetscLogFlops(4*a->nz - 2*nonzerorow);CHKERRQ(ierr); 2461ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2471ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 24882b1193eSBarry Smith PetscFunctionReturn(0); 24982b1193eSBarry Smith } 250bcc973b7SBarry Smith 2514a2ae208SSatish Balay #undef __FUNCT__ 252b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 253dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 25482b1193eSBarry Smith { 2554cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 256bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 25787828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 258dfbe8321SBarry Smith PetscErrorCode ierr; 259d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 26082b1193eSBarry Smith 261bcc973b7SBarry Smith PetscFunctionBegin; 2622dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 2631ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2641ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 265bfec09a0SHong Zhang 266bcc973b7SBarry Smith for (i=0; i<m; i++) { 267bfec09a0SHong Zhang idx = a->j + a->i[i] ; 268bfec09a0SHong Zhang v = a->a + a->i[i] ; 269bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 270bcc973b7SBarry Smith alpha1 = x[2*i]; 271bcc973b7SBarry Smith alpha2 = x[2*i+1]; 272bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 273bcc973b7SBarry Smith } 274b3c51c66SMatthew Knepley ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 2751ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2761ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27782b1193eSBarry Smith PetscFunctionReturn(0); 27882b1193eSBarry Smith } 279bcc973b7SBarry Smith 2804a2ae208SSatish Balay #undef __FUNCT__ 281b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 282dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 28382b1193eSBarry Smith { 2844cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 285bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 287dfbe8321SBarry Smith PetscErrorCode ierr; 288d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 289b24ad042SBarry Smith PetscInt n,i,jrow,j; 29082b1193eSBarry Smith 291bcc973b7SBarry Smith PetscFunctionBegin; 292f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2931ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2941ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 295bcc973b7SBarry Smith idx = a->j; 296bcc973b7SBarry Smith v = a->a; 297bcc973b7SBarry Smith ii = a->i; 298bcc973b7SBarry Smith 299bcc973b7SBarry Smith for (i=0; i<m; i++) { 300bcc973b7SBarry Smith jrow = ii[i]; 301bcc973b7SBarry Smith n = ii[i+1] - jrow; 302bcc973b7SBarry Smith sum1 = 0.0; 303bcc973b7SBarry Smith sum2 = 0.0; 304bcc973b7SBarry Smith for (j=0; j<n; j++) { 305bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 306bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 307bcc973b7SBarry Smith jrow++; 308bcc973b7SBarry Smith } 309bcc973b7SBarry Smith y[2*i] += sum1; 310bcc973b7SBarry Smith y[2*i+1] += sum2; 311bcc973b7SBarry Smith } 312bcc973b7SBarry Smith 313b3c51c66SMatthew Knepley ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 3141ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3151ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 316bcc973b7SBarry Smith PetscFunctionReturn(0); 31782b1193eSBarry Smith } 3184a2ae208SSatish Balay #undef __FUNCT__ 319b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 320dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 32182b1193eSBarry Smith { 3224cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 323bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 32487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 325dfbe8321SBarry Smith PetscErrorCode ierr; 326d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 32782b1193eSBarry Smith 328bcc973b7SBarry Smith PetscFunctionBegin; 329f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3301ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3311ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 332bfec09a0SHong Zhang 333bcc973b7SBarry Smith for (i=0; i<m; i++) { 334bfec09a0SHong Zhang idx = a->j + a->i[i] ; 335bfec09a0SHong Zhang v = a->a + a->i[i] ; 336bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 337bcc973b7SBarry Smith alpha1 = x[2*i]; 338bcc973b7SBarry Smith alpha2 = x[2*i+1]; 339bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 340bcc973b7SBarry Smith } 341b3c51c66SMatthew Knepley ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 3421ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3431ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 344bcc973b7SBarry Smith PetscFunctionReturn(0); 34582b1193eSBarry Smith } 346cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3474a2ae208SSatish Balay #undef __FUNCT__ 348b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 349dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 350bcc973b7SBarry Smith { 3514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 35387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 354dfbe8321SBarry Smith PetscErrorCode ierr; 355d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 356b24ad042SBarry Smith PetscInt n,i,jrow,j; 35782b1193eSBarry Smith 358bcc973b7SBarry Smith PetscFunctionBegin; 3591ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3601ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 361bcc973b7SBarry Smith idx = a->j; 362bcc973b7SBarry Smith v = a->a; 363bcc973b7SBarry Smith ii = a->i; 364bcc973b7SBarry Smith 365bcc973b7SBarry Smith for (i=0; i<m; i++) { 366bcc973b7SBarry Smith jrow = ii[i]; 367bcc973b7SBarry Smith n = ii[i+1] - jrow; 368bcc973b7SBarry Smith sum1 = 0.0; 369bcc973b7SBarry Smith sum2 = 0.0; 370bcc973b7SBarry Smith sum3 = 0.0; 371b3c51c66SMatthew Knepley nonzerorow += (n>0); 372bcc973b7SBarry Smith for (j=0; j<n; j++) { 373bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 374bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 375bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 376bcc973b7SBarry Smith jrow++; 377bcc973b7SBarry Smith } 378bcc973b7SBarry Smith y[3*i] = sum1; 379bcc973b7SBarry Smith y[3*i+1] = sum2; 380bcc973b7SBarry Smith y[3*i+2] = sum3; 381bcc973b7SBarry Smith } 382bcc973b7SBarry Smith 383b3c51c66SMatthew Knepley ierr = PetscLogFlops(6*a->nz - 3*nonzerorow);CHKERRQ(ierr); 3841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3851ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 386bcc973b7SBarry Smith PetscFunctionReturn(0); 387bcc973b7SBarry Smith } 388bcc973b7SBarry Smith 3894a2ae208SSatish Balay #undef __FUNCT__ 390b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 391dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 392bcc973b7SBarry Smith { 3934cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 394bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 39587828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 396dfbe8321SBarry Smith PetscErrorCode ierr; 397d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 398bcc973b7SBarry Smith 399bcc973b7SBarry Smith PetscFunctionBegin; 4002dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 4011ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4021ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 403bfec09a0SHong Zhang 404bcc973b7SBarry Smith for (i=0; i<m; i++) { 405bfec09a0SHong Zhang idx = a->j + a->i[i]; 406bfec09a0SHong Zhang v = a->a + a->i[i]; 407bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 408bcc973b7SBarry Smith alpha1 = x[3*i]; 409bcc973b7SBarry Smith alpha2 = x[3*i+1]; 410bcc973b7SBarry Smith alpha3 = x[3*i+2]; 411bcc973b7SBarry Smith while (n-->0) { 412bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 413bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 414bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 415bcc973b7SBarry Smith idx++; v++; 416bcc973b7SBarry Smith } 417bcc973b7SBarry Smith } 418b3c51c66SMatthew Knepley ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4191ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4201ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 421bcc973b7SBarry Smith PetscFunctionReturn(0); 422bcc973b7SBarry Smith } 423bcc973b7SBarry Smith 4244a2ae208SSatish Balay #undef __FUNCT__ 425b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 426dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 427bcc973b7SBarry Smith { 4284cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 429bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 43087828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 431dfbe8321SBarry Smith PetscErrorCode ierr; 432d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 433b24ad042SBarry Smith PetscInt n,i,jrow,j; 434bcc973b7SBarry Smith 435bcc973b7SBarry Smith PetscFunctionBegin; 436f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4371ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4381ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 439bcc973b7SBarry Smith idx = a->j; 440bcc973b7SBarry Smith v = a->a; 441bcc973b7SBarry Smith ii = a->i; 442bcc973b7SBarry Smith 443bcc973b7SBarry Smith for (i=0; i<m; i++) { 444bcc973b7SBarry Smith jrow = ii[i]; 445bcc973b7SBarry Smith n = ii[i+1] - jrow; 446bcc973b7SBarry Smith sum1 = 0.0; 447bcc973b7SBarry Smith sum2 = 0.0; 448bcc973b7SBarry Smith sum3 = 0.0; 449bcc973b7SBarry Smith for (j=0; j<n; j++) { 450bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 451bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 452bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 453bcc973b7SBarry Smith jrow++; 454bcc973b7SBarry Smith } 455bcc973b7SBarry Smith y[3*i] += sum1; 456bcc973b7SBarry Smith y[3*i+1] += sum2; 457bcc973b7SBarry Smith y[3*i+2] += sum3; 458bcc973b7SBarry Smith } 459bcc973b7SBarry Smith 460efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4611ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4621ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 463bcc973b7SBarry Smith PetscFunctionReturn(0); 464bcc973b7SBarry Smith } 4654a2ae208SSatish Balay #undef __FUNCT__ 466b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 467dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 468bcc973b7SBarry Smith { 4694cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 470bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 47187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 472dfbe8321SBarry Smith PetscErrorCode ierr; 473d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 474bcc973b7SBarry Smith 475bcc973b7SBarry Smith PetscFunctionBegin; 476f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4771ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4781ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 479bcc973b7SBarry Smith for (i=0; i<m; i++) { 480bfec09a0SHong Zhang idx = a->j + a->i[i] ; 481bfec09a0SHong Zhang v = a->a + a->i[i] ; 482bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 483bcc973b7SBarry Smith alpha1 = x[3*i]; 484bcc973b7SBarry Smith alpha2 = x[3*i+1]; 485bcc973b7SBarry Smith alpha3 = x[3*i+2]; 486bcc973b7SBarry Smith while (n-->0) { 487bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 488bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 489bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 490bcc973b7SBarry Smith idx++; v++; 491bcc973b7SBarry Smith } 492bcc973b7SBarry Smith } 493efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4941ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4951ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 496bcc973b7SBarry Smith PetscFunctionReturn(0); 497bcc973b7SBarry Smith } 498bcc973b7SBarry Smith 499bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 5004a2ae208SSatish Balay #undef __FUNCT__ 501b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 502dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 503bcc973b7SBarry Smith { 5044cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 505bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 50687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 507dfbe8321SBarry Smith PetscErrorCode ierr; 508d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 509b24ad042SBarry Smith PetscInt n,i,jrow,j; 510bcc973b7SBarry Smith 511bcc973b7SBarry Smith PetscFunctionBegin; 5121ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5131ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 514bcc973b7SBarry Smith idx = a->j; 515bcc973b7SBarry Smith v = a->a; 516bcc973b7SBarry Smith ii = a->i; 517bcc973b7SBarry Smith 518bcc973b7SBarry Smith for (i=0; i<m; i++) { 519bcc973b7SBarry Smith jrow = ii[i]; 520bcc973b7SBarry Smith n = ii[i+1] - jrow; 521bcc973b7SBarry Smith sum1 = 0.0; 522bcc973b7SBarry Smith sum2 = 0.0; 523bcc973b7SBarry Smith sum3 = 0.0; 524bcc973b7SBarry Smith sum4 = 0.0; 525b3c51c66SMatthew Knepley nonzerorow += (n>0); 526bcc973b7SBarry Smith for (j=0; j<n; j++) { 527bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 528bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 529bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 530bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 531bcc973b7SBarry Smith jrow++; 532bcc973b7SBarry Smith } 533bcc973b7SBarry Smith y[4*i] = sum1; 534bcc973b7SBarry Smith y[4*i+1] = sum2; 535bcc973b7SBarry Smith y[4*i+2] = sum3; 536bcc973b7SBarry Smith y[4*i+3] = sum4; 537bcc973b7SBarry Smith } 538bcc973b7SBarry Smith 539b3c51c66SMatthew Knepley ierr = PetscLogFlops(8*a->nz - 4*nonzerorow);CHKERRQ(ierr); 5401ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5411ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 542bcc973b7SBarry Smith PetscFunctionReturn(0); 543bcc973b7SBarry Smith } 544bcc973b7SBarry Smith 5454a2ae208SSatish Balay #undef __FUNCT__ 546b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 547dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 548bcc973b7SBarry Smith { 5494cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 550bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 55187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 552dfbe8321SBarry Smith PetscErrorCode ierr; 553d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 554bcc973b7SBarry Smith 555bcc973b7SBarry Smith PetscFunctionBegin; 5562dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 5571ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5581ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 559bcc973b7SBarry Smith for (i=0; i<m; i++) { 560bfec09a0SHong Zhang idx = a->j + a->i[i] ; 561bfec09a0SHong Zhang v = a->a + a->i[i] ; 562bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 563bcc973b7SBarry Smith alpha1 = x[4*i]; 564bcc973b7SBarry Smith alpha2 = x[4*i+1]; 565bcc973b7SBarry Smith alpha3 = x[4*i+2]; 566bcc973b7SBarry Smith alpha4 = x[4*i+3]; 567bcc973b7SBarry Smith while (n-->0) { 568bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 569bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 570bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 571bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 572bcc973b7SBarry Smith idx++; v++; 573bcc973b7SBarry Smith } 574bcc973b7SBarry Smith } 575b3c51c66SMatthew Knepley ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 5761ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5771ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 578bcc973b7SBarry Smith PetscFunctionReturn(0); 579bcc973b7SBarry Smith } 580bcc973b7SBarry Smith 5814a2ae208SSatish Balay #undef __FUNCT__ 582b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 583dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 584bcc973b7SBarry Smith { 5854cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 586f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 58787828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 588dfbe8321SBarry Smith PetscErrorCode ierr; 589d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 590b24ad042SBarry Smith PetscInt n,i,jrow,j; 591f9fae5adSBarry Smith 592f9fae5adSBarry Smith PetscFunctionBegin; 593f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5941ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5951ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 596f9fae5adSBarry Smith idx = a->j; 597f9fae5adSBarry Smith v = a->a; 598f9fae5adSBarry Smith ii = a->i; 599f9fae5adSBarry Smith 600f9fae5adSBarry Smith for (i=0; i<m; i++) { 601f9fae5adSBarry Smith jrow = ii[i]; 602f9fae5adSBarry Smith n = ii[i+1] - jrow; 603f9fae5adSBarry Smith sum1 = 0.0; 604f9fae5adSBarry Smith sum2 = 0.0; 605f9fae5adSBarry Smith sum3 = 0.0; 606f9fae5adSBarry Smith sum4 = 0.0; 607f9fae5adSBarry Smith for (j=0; j<n; j++) { 608f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 609f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 610f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 611f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 612f9fae5adSBarry Smith jrow++; 613f9fae5adSBarry Smith } 614f9fae5adSBarry Smith y[4*i] += sum1; 615f9fae5adSBarry Smith y[4*i+1] += sum2; 616f9fae5adSBarry Smith y[4*i+2] += sum3; 617f9fae5adSBarry Smith y[4*i+3] += sum4; 618f9fae5adSBarry Smith } 619f9fae5adSBarry Smith 620b3c51c66SMatthew Knepley ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 6211ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6221ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 623f9fae5adSBarry Smith PetscFunctionReturn(0); 624bcc973b7SBarry Smith } 6254a2ae208SSatish Balay #undef __FUNCT__ 626b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 627dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 628bcc973b7SBarry Smith { 6294cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 630f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 63187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 632dfbe8321SBarry Smith PetscErrorCode ierr; 633d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 634f9fae5adSBarry Smith 635f9fae5adSBarry Smith PetscFunctionBegin; 636f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6371ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6381ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 639bfec09a0SHong Zhang 640f9fae5adSBarry Smith for (i=0; i<m; i++) { 641bfec09a0SHong Zhang idx = a->j + a->i[i] ; 642bfec09a0SHong Zhang v = a->a + a->i[i] ; 643f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 644f9fae5adSBarry Smith alpha1 = x[4*i]; 645f9fae5adSBarry Smith alpha2 = x[4*i+1]; 646f9fae5adSBarry Smith alpha3 = x[4*i+2]; 647f9fae5adSBarry Smith alpha4 = x[4*i+3]; 648f9fae5adSBarry Smith while (n-->0) { 649f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 650f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 651f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 652f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 653f9fae5adSBarry Smith idx++; v++; 654f9fae5adSBarry Smith } 655f9fae5adSBarry Smith } 656b3c51c66SMatthew Knepley ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 6571ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6581ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 659f9fae5adSBarry Smith PetscFunctionReturn(0); 660f9fae5adSBarry Smith } 661f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6626cd98798SBarry Smith 6634a2ae208SSatish Balay #undef __FUNCT__ 664b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 665dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 666f9fae5adSBarry Smith { 6674cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 668f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 66987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 670dfbe8321SBarry Smith PetscErrorCode ierr; 671d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 672b24ad042SBarry Smith PetscInt n,i,jrow,j; 673f9fae5adSBarry Smith 674f9fae5adSBarry Smith PetscFunctionBegin; 6751ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6761ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 677f9fae5adSBarry Smith idx = a->j; 678f9fae5adSBarry Smith v = a->a; 679f9fae5adSBarry Smith ii = a->i; 680f9fae5adSBarry Smith 681f9fae5adSBarry Smith for (i=0; i<m; i++) { 682f9fae5adSBarry Smith jrow = ii[i]; 683f9fae5adSBarry Smith n = ii[i+1] - jrow; 684f9fae5adSBarry Smith sum1 = 0.0; 685f9fae5adSBarry Smith sum2 = 0.0; 686f9fae5adSBarry Smith sum3 = 0.0; 687f9fae5adSBarry Smith sum4 = 0.0; 688f9fae5adSBarry Smith sum5 = 0.0; 689b3c51c66SMatthew Knepley nonzerorow += (n>0); 690f9fae5adSBarry Smith for (j=0; j<n; j++) { 691f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 692f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 693f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 694f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 695f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 696f9fae5adSBarry Smith jrow++; 697f9fae5adSBarry Smith } 698f9fae5adSBarry Smith y[5*i] = sum1; 699f9fae5adSBarry Smith y[5*i+1] = sum2; 700f9fae5adSBarry Smith y[5*i+2] = sum3; 701f9fae5adSBarry Smith y[5*i+3] = sum4; 702f9fae5adSBarry Smith y[5*i+4] = sum5; 703f9fae5adSBarry Smith } 704f9fae5adSBarry Smith 705b3c51c66SMatthew Knepley ierr = PetscLogFlops(10*a->nz - 5*nonzerorow);CHKERRQ(ierr); 7061ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7071ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 708f9fae5adSBarry Smith PetscFunctionReturn(0); 709f9fae5adSBarry Smith } 710f9fae5adSBarry Smith 7114a2ae208SSatish Balay #undef __FUNCT__ 712b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 713dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 714f9fae5adSBarry Smith { 7154cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 716f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 71787828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 718dfbe8321SBarry Smith PetscErrorCode ierr; 719d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 720f9fae5adSBarry Smith 721f9fae5adSBarry Smith PetscFunctionBegin; 7222dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 7231ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7241ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 725bfec09a0SHong Zhang 726f9fae5adSBarry Smith for (i=0; i<m; i++) { 727bfec09a0SHong Zhang idx = a->j + a->i[i] ; 728bfec09a0SHong Zhang v = a->a + a->i[i] ; 729f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 730f9fae5adSBarry Smith alpha1 = x[5*i]; 731f9fae5adSBarry Smith alpha2 = x[5*i+1]; 732f9fae5adSBarry Smith alpha3 = x[5*i+2]; 733f9fae5adSBarry Smith alpha4 = x[5*i+3]; 734f9fae5adSBarry Smith alpha5 = x[5*i+4]; 735f9fae5adSBarry Smith while (n-->0) { 736f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 737f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 738f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 739f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 740f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 741f9fae5adSBarry Smith idx++; v++; 742f9fae5adSBarry Smith } 743f9fae5adSBarry Smith } 744b3c51c66SMatthew Knepley ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7451ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7461ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 747f9fae5adSBarry Smith PetscFunctionReturn(0); 748f9fae5adSBarry Smith } 749f9fae5adSBarry Smith 7504a2ae208SSatish Balay #undef __FUNCT__ 751b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 752dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 753f9fae5adSBarry Smith { 7544cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 755f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 75687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 757dfbe8321SBarry Smith PetscErrorCode ierr; 758d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 759b24ad042SBarry Smith PetscInt n,i,jrow,j; 760f9fae5adSBarry Smith 761f9fae5adSBarry Smith PetscFunctionBegin; 762f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7631ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7641ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 765f9fae5adSBarry Smith idx = a->j; 766f9fae5adSBarry Smith v = a->a; 767f9fae5adSBarry Smith ii = a->i; 768f9fae5adSBarry Smith 769f9fae5adSBarry Smith for (i=0; i<m; i++) { 770f9fae5adSBarry Smith jrow = ii[i]; 771f9fae5adSBarry Smith n = ii[i+1] - jrow; 772f9fae5adSBarry Smith sum1 = 0.0; 773f9fae5adSBarry Smith sum2 = 0.0; 774f9fae5adSBarry Smith sum3 = 0.0; 775f9fae5adSBarry Smith sum4 = 0.0; 776f9fae5adSBarry Smith sum5 = 0.0; 777f9fae5adSBarry Smith for (j=0; j<n; j++) { 778f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 779f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 780f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 781f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 782f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 783f9fae5adSBarry Smith jrow++; 784f9fae5adSBarry Smith } 785f9fae5adSBarry Smith y[5*i] += sum1; 786f9fae5adSBarry Smith y[5*i+1] += sum2; 787f9fae5adSBarry Smith y[5*i+2] += sum3; 788f9fae5adSBarry Smith y[5*i+3] += sum4; 789f9fae5adSBarry Smith y[5*i+4] += sum5; 790f9fae5adSBarry Smith } 791f9fae5adSBarry Smith 792efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7931ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7941ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 795f9fae5adSBarry Smith PetscFunctionReturn(0); 796f9fae5adSBarry Smith } 797f9fae5adSBarry Smith 7984a2ae208SSatish Balay #undef __FUNCT__ 799b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 800dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 801f9fae5adSBarry Smith { 8024cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 803f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 80487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 805dfbe8321SBarry Smith PetscErrorCode ierr; 806d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 807f9fae5adSBarry Smith 808f9fae5adSBarry Smith PetscFunctionBegin; 809f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8101ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8111ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 812bfec09a0SHong Zhang 813f9fae5adSBarry Smith for (i=0; i<m; i++) { 814bfec09a0SHong Zhang idx = a->j + a->i[i] ; 815bfec09a0SHong Zhang v = a->a + a->i[i] ; 816f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 817f9fae5adSBarry Smith alpha1 = x[5*i]; 818f9fae5adSBarry Smith alpha2 = x[5*i+1]; 819f9fae5adSBarry Smith alpha3 = x[5*i+2]; 820f9fae5adSBarry Smith alpha4 = x[5*i+3]; 821f9fae5adSBarry Smith alpha5 = x[5*i+4]; 822f9fae5adSBarry Smith while (n-->0) { 823f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 824f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 825f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 826f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 827f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 828f9fae5adSBarry Smith idx++; v++; 829f9fae5adSBarry Smith } 830f9fae5adSBarry Smith } 831efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 8321ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8331ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 834f9fae5adSBarry Smith PetscFunctionReturn(0); 835bcc973b7SBarry Smith } 836bcc973b7SBarry Smith 8376cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 8384a2ae208SSatish Balay #undef __FUNCT__ 839b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 840dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8416cd98798SBarry Smith { 8426cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8436cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 84487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 845dfbe8321SBarry Smith PetscErrorCode ierr; 846d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 847b24ad042SBarry Smith PetscInt n,i,jrow,j; 8486cd98798SBarry Smith 8496cd98798SBarry Smith PetscFunctionBegin; 8501ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8511ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8526cd98798SBarry Smith idx = a->j; 8536cd98798SBarry Smith v = a->a; 8546cd98798SBarry Smith ii = a->i; 8556cd98798SBarry Smith 8566cd98798SBarry Smith for (i=0; i<m; i++) { 8576cd98798SBarry Smith jrow = ii[i]; 8586cd98798SBarry Smith n = ii[i+1] - jrow; 8596cd98798SBarry Smith sum1 = 0.0; 8606cd98798SBarry Smith sum2 = 0.0; 8616cd98798SBarry Smith sum3 = 0.0; 8626cd98798SBarry Smith sum4 = 0.0; 8636cd98798SBarry Smith sum5 = 0.0; 8646cd98798SBarry Smith sum6 = 0.0; 865b3c51c66SMatthew Knepley nonzerorow += (n>0); 8666cd98798SBarry Smith for (j=0; j<n; j++) { 8676cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8686cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8696cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8706cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8716cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8726cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8736cd98798SBarry Smith jrow++; 8746cd98798SBarry Smith } 8756cd98798SBarry Smith y[6*i] = sum1; 8766cd98798SBarry Smith y[6*i+1] = sum2; 8776cd98798SBarry Smith y[6*i+2] = sum3; 8786cd98798SBarry Smith y[6*i+3] = sum4; 8796cd98798SBarry Smith y[6*i+4] = sum5; 8806cd98798SBarry Smith y[6*i+5] = sum6; 8816cd98798SBarry Smith } 8826cd98798SBarry Smith 883b3c51c66SMatthew Knepley ierr = PetscLogFlops(12*a->nz - 6*nonzerorow);CHKERRQ(ierr); 8841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8851ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8866cd98798SBarry Smith PetscFunctionReturn(0); 8876cd98798SBarry Smith } 8886cd98798SBarry Smith 8894a2ae208SSatish Balay #undef __FUNCT__ 890b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 891dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8926cd98798SBarry Smith { 8936cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8946cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 89587828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 896dfbe8321SBarry Smith PetscErrorCode ierr; 897d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 8986cd98798SBarry Smith 8996cd98798SBarry Smith PetscFunctionBegin; 9002dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 9011ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9021ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 903bfec09a0SHong Zhang 9046cd98798SBarry Smith for (i=0; i<m; i++) { 905bfec09a0SHong Zhang idx = a->j + a->i[i] ; 906bfec09a0SHong Zhang v = a->a + a->i[i] ; 9076cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9086cd98798SBarry Smith alpha1 = x[6*i]; 9096cd98798SBarry Smith alpha2 = x[6*i+1]; 9106cd98798SBarry Smith alpha3 = x[6*i+2]; 9116cd98798SBarry Smith alpha4 = x[6*i+3]; 9126cd98798SBarry Smith alpha5 = x[6*i+4]; 9136cd98798SBarry Smith alpha6 = x[6*i+5]; 9146cd98798SBarry Smith while (n-->0) { 9156cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9166cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9176cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9186cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9196cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9206cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9216cd98798SBarry Smith idx++; v++; 9226cd98798SBarry Smith } 9236cd98798SBarry Smith } 924b3c51c66SMatthew Knepley ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9251ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9261ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9276cd98798SBarry Smith PetscFunctionReturn(0); 9286cd98798SBarry Smith } 9296cd98798SBarry Smith 9304a2ae208SSatish Balay #undef __FUNCT__ 931b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 932dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9336cd98798SBarry Smith { 9346cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9356cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 93687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 937dfbe8321SBarry Smith PetscErrorCode ierr; 938d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 939b24ad042SBarry Smith PetscInt n,i,jrow,j; 9406cd98798SBarry Smith 9416cd98798SBarry Smith PetscFunctionBegin; 9426cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9431ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9441ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9456cd98798SBarry Smith idx = a->j; 9466cd98798SBarry Smith v = a->a; 9476cd98798SBarry Smith ii = a->i; 9486cd98798SBarry Smith 9496cd98798SBarry Smith for (i=0; i<m; i++) { 9506cd98798SBarry Smith jrow = ii[i]; 9516cd98798SBarry Smith n = ii[i+1] - jrow; 9526cd98798SBarry Smith sum1 = 0.0; 9536cd98798SBarry Smith sum2 = 0.0; 9546cd98798SBarry Smith sum3 = 0.0; 9556cd98798SBarry Smith sum4 = 0.0; 9566cd98798SBarry Smith sum5 = 0.0; 9576cd98798SBarry Smith sum6 = 0.0; 9586cd98798SBarry Smith for (j=0; j<n; j++) { 9596cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9606cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9616cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9626cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9636cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9646cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9656cd98798SBarry Smith jrow++; 9666cd98798SBarry Smith } 9676cd98798SBarry Smith y[6*i] += sum1; 9686cd98798SBarry Smith y[6*i+1] += sum2; 9696cd98798SBarry Smith y[6*i+2] += sum3; 9706cd98798SBarry Smith y[6*i+3] += sum4; 9716cd98798SBarry Smith y[6*i+4] += sum5; 9726cd98798SBarry Smith y[6*i+5] += sum6; 9736cd98798SBarry Smith } 9746cd98798SBarry Smith 975efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9761ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9771ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9786cd98798SBarry Smith PetscFunctionReturn(0); 9796cd98798SBarry Smith } 9806cd98798SBarry Smith 9814a2ae208SSatish Balay #undef __FUNCT__ 982b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 983dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9846cd98798SBarry Smith { 9856cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9866cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 98787828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 988dfbe8321SBarry Smith PetscErrorCode ierr; 989d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 9906cd98798SBarry Smith 9916cd98798SBarry Smith PetscFunctionBegin; 9926cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9931ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9941ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 995bfec09a0SHong Zhang 9966cd98798SBarry Smith for (i=0; i<m; i++) { 997bfec09a0SHong Zhang idx = a->j + a->i[i] ; 998bfec09a0SHong Zhang v = a->a + a->i[i] ; 9996cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 10006cd98798SBarry Smith alpha1 = x[6*i]; 10016cd98798SBarry Smith alpha2 = x[6*i+1]; 10026cd98798SBarry Smith alpha3 = x[6*i+2]; 10036cd98798SBarry Smith alpha4 = x[6*i+3]; 10046cd98798SBarry Smith alpha5 = x[6*i+4]; 10056cd98798SBarry Smith alpha6 = x[6*i+5]; 10066cd98798SBarry Smith while (n-->0) { 10076cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 10086cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 10096cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 10106cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 10116cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 10126cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 10136cd98798SBarry Smith idx++; v++; 10146cd98798SBarry Smith } 10156cd98798SBarry Smith } 1016efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 10171ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 10181ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10196cd98798SBarry Smith PetscFunctionReturn(0); 10206cd98798SBarry Smith } 10216cd98798SBarry Smith 102266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 102366ed3db0SBarry Smith #undef __FUNCT__ 1024ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 1025ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1026ed8eea03SBarry Smith { 1027ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1028ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1029ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1030ed8eea03SBarry Smith PetscErrorCode ierr; 1031d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1032ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1033ed8eea03SBarry Smith 1034ed8eea03SBarry Smith PetscFunctionBegin; 1035ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1036ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1037ed8eea03SBarry Smith idx = a->j; 1038ed8eea03SBarry Smith v = a->a; 1039ed8eea03SBarry Smith ii = a->i; 1040ed8eea03SBarry Smith 1041ed8eea03SBarry Smith for (i=0; i<m; i++) { 1042ed8eea03SBarry Smith jrow = ii[i]; 1043ed8eea03SBarry Smith n = ii[i+1] - jrow; 1044ed8eea03SBarry Smith sum1 = 0.0; 1045ed8eea03SBarry Smith sum2 = 0.0; 1046ed8eea03SBarry Smith sum3 = 0.0; 1047ed8eea03SBarry Smith sum4 = 0.0; 1048ed8eea03SBarry Smith sum5 = 0.0; 1049ed8eea03SBarry Smith sum6 = 0.0; 1050ed8eea03SBarry Smith sum7 = 0.0; 1051b3c51c66SMatthew Knepley nonzerorow += (n>0); 1052ed8eea03SBarry Smith for (j=0; j<n; j++) { 1053ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1054ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1055ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1056ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1057ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1058ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1059ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1060ed8eea03SBarry Smith jrow++; 1061ed8eea03SBarry Smith } 1062ed8eea03SBarry Smith y[7*i] = sum1; 1063ed8eea03SBarry Smith y[7*i+1] = sum2; 1064ed8eea03SBarry Smith y[7*i+2] = sum3; 1065ed8eea03SBarry Smith y[7*i+3] = sum4; 1066ed8eea03SBarry Smith y[7*i+4] = sum5; 1067ed8eea03SBarry Smith y[7*i+5] = sum6; 1068ed8eea03SBarry Smith y[7*i+6] = sum7; 1069ed8eea03SBarry Smith } 1070ed8eea03SBarry Smith 1071b3c51c66SMatthew Knepley ierr = PetscLogFlops(14*a->nz - 7*nonzerorow);CHKERRQ(ierr); 1072ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1074ed8eea03SBarry Smith PetscFunctionReturn(0); 1075ed8eea03SBarry Smith } 1076ed8eea03SBarry Smith 1077ed8eea03SBarry Smith #undef __FUNCT__ 1078ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1079ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1080ed8eea03SBarry Smith { 1081ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1082ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1083ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1084ed8eea03SBarry Smith PetscErrorCode ierr; 1085d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1086ed8eea03SBarry Smith 1087ed8eea03SBarry Smith PetscFunctionBegin; 10882dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 1089ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1090ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1091ed8eea03SBarry Smith 1092ed8eea03SBarry Smith for (i=0; i<m; i++) { 1093ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1094ed8eea03SBarry Smith v = a->a + a->i[i] ; 1095ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1096ed8eea03SBarry Smith alpha1 = x[7*i]; 1097ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1098ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1099ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1100ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1101ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1102ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1103ed8eea03SBarry Smith while (n-->0) { 1104ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1105ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1106ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1107ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1108ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1109ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1110ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1111ed8eea03SBarry Smith idx++; v++; 1112ed8eea03SBarry Smith } 1113ed8eea03SBarry Smith } 1114b3c51c66SMatthew Knepley ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1115ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1116ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1117ed8eea03SBarry Smith PetscFunctionReturn(0); 1118ed8eea03SBarry Smith } 1119ed8eea03SBarry Smith 1120ed8eea03SBarry Smith #undef __FUNCT__ 1121ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1122ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1123ed8eea03SBarry Smith { 1124ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1125ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1126ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1127ed8eea03SBarry Smith PetscErrorCode ierr; 1128d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1129ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1130ed8eea03SBarry Smith 1131ed8eea03SBarry Smith PetscFunctionBegin; 1132ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1133ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1134ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1135ed8eea03SBarry Smith idx = a->j; 1136ed8eea03SBarry Smith v = a->a; 1137ed8eea03SBarry Smith ii = a->i; 1138ed8eea03SBarry Smith 1139ed8eea03SBarry Smith for (i=0; i<m; i++) { 1140ed8eea03SBarry Smith jrow = ii[i]; 1141ed8eea03SBarry Smith n = ii[i+1] - jrow; 1142ed8eea03SBarry Smith sum1 = 0.0; 1143ed8eea03SBarry Smith sum2 = 0.0; 1144ed8eea03SBarry Smith sum3 = 0.0; 1145ed8eea03SBarry Smith sum4 = 0.0; 1146ed8eea03SBarry Smith sum5 = 0.0; 1147ed8eea03SBarry Smith sum6 = 0.0; 1148ed8eea03SBarry Smith sum7 = 0.0; 1149ed8eea03SBarry Smith for (j=0; j<n; j++) { 1150ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1151ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1152ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1153ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1154ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1155ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1156ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1157ed8eea03SBarry Smith jrow++; 1158ed8eea03SBarry Smith } 1159ed8eea03SBarry Smith y[7*i] += sum1; 1160ed8eea03SBarry Smith y[7*i+1] += sum2; 1161ed8eea03SBarry Smith y[7*i+2] += sum3; 1162ed8eea03SBarry Smith y[7*i+3] += sum4; 1163ed8eea03SBarry Smith y[7*i+4] += sum5; 1164ed8eea03SBarry Smith y[7*i+5] += sum6; 1165ed8eea03SBarry Smith y[7*i+6] += sum7; 1166ed8eea03SBarry Smith } 1167ed8eea03SBarry Smith 1168efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1169ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1170ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1171ed8eea03SBarry Smith PetscFunctionReturn(0); 1172ed8eea03SBarry Smith } 1173ed8eea03SBarry Smith 1174ed8eea03SBarry Smith #undef __FUNCT__ 1175ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1176ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1177ed8eea03SBarry Smith { 1178ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1179ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1180ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1181ed8eea03SBarry Smith PetscErrorCode ierr; 1182d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1183ed8eea03SBarry Smith 1184ed8eea03SBarry Smith PetscFunctionBegin; 1185ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1186ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1187ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1188ed8eea03SBarry Smith for (i=0; i<m; i++) { 1189ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1190ed8eea03SBarry Smith v = a->a + a->i[i] ; 1191ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1192ed8eea03SBarry Smith alpha1 = x[7*i]; 1193ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1194ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1195ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1196ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1197ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1198ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1199ed8eea03SBarry Smith while (n-->0) { 1200ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1201ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1202ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1203ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1204ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1205ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1206ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1207ed8eea03SBarry Smith idx++; v++; 1208ed8eea03SBarry Smith } 1209ed8eea03SBarry Smith } 1210efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1211ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1212ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1213ed8eea03SBarry Smith PetscFunctionReturn(0); 1214ed8eea03SBarry Smith } 1215ed8eea03SBarry Smith 1216ed8eea03SBarry Smith #undef __FUNCT__ 121766ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1218dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 121966ed3db0SBarry Smith { 122066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 122166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 122287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1223dfbe8321SBarry Smith PetscErrorCode ierr; 1224d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1225b24ad042SBarry Smith PetscInt n,i,jrow,j; 122666ed3db0SBarry Smith 122766ed3db0SBarry Smith PetscFunctionBegin; 12281ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12291ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 123066ed3db0SBarry Smith idx = a->j; 123166ed3db0SBarry Smith v = a->a; 123266ed3db0SBarry Smith ii = a->i; 123366ed3db0SBarry Smith 123466ed3db0SBarry Smith for (i=0; i<m; i++) { 123566ed3db0SBarry Smith jrow = ii[i]; 123666ed3db0SBarry Smith n = ii[i+1] - jrow; 123766ed3db0SBarry Smith sum1 = 0.0; 123866ed3db0SBarry Smith sum2 = 0.0; 123966ed3db0SBarry Smith sum3 = 0.0; 124066ed3db0SBarry Smith sum4 = 0.0; 124166ed3db0SBarry Smith sum5 = 0.0; 124266ed3db0SBarry Smith sum6 = 0.0; 124366ed3db0SBarry Smith sum7 = 0.0; 124466ed3db0SBarry Smith sum8 = 0.0; 1245b3c51c66SMatthew Knepley nonzerorow += (n>0); 124666ed3db0SBarry Smith for (j=0; j<n; j++) { 124766ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 124866ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 124966ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 125066ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 125166ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 125266ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 125366ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 125466ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 125566ed3db0SBarry Smith jrow++; 125666ed3db0SBarry Smith } 125766ed3db0SBarry Smith y[8*i] = sum1; 125866ed3db0SBarry Smith y[8*i+1] = sum2; 125966ed3db0SBarry Smith y[8*i+2] = sum3; 126066ed3db0SBarry Smith y[8*i+3] = sum4; 126166ed3db0SBarry Smith y[8*i+4] = sum5; 126266ed3db0SBarry Smith y[8*i+5] = sum6; 126366ed3db0SBarry Smith y[8*i+6] = sum7; 126466ed3db0SBarry Smith y[8*i+7] = sum8; 126566ed3db0SBarry Smith } 126666ed3db0SBarry Smith 1267b3c51c66SMatthew Knepley ierr = PetscLogFlops(16*a->nz - 8*nonzerorow);CHKERRQ(ierr); 12681ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12691ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 127066ed3db0SBarry Smith PetscFunctionReturn(0); 127166ed3db0SBarry Smith } 127266ed3db0SBarry Smith 127366ed3db0SBarry Smith #undef __FUNCT__ 127466ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1275dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 127666ed3db0SBarry Smith { 127766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 127866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 127987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1280dfbe8321SBarry Smith PetscErrorCode ierr; 1281d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 128266ed3db0SBarry Smith 128366ed3db0SBarry Smith PetscFunctionBegin; 12842dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 12851ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12861ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1287bfec09a0SHong Zhang 128866ed3db0SBarry Smith for (i=0; i<m; i++) { 1289bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1290bfec09a0SHong Zhang v = a->a + a->i[i] ; 129166ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 129266ed3db0SBarry Smith alpha1 = x[8*i]; 129366ed3db0SBarry Smith alpha2 = x[8*i+1]; 129466ed3db0SBarry Smith alpha3 = x[8*i+2]; 129566ed3db0SBarry Smith alpha4 = x[8*i+3]; 129666ed3db0SBarry Smith alpha5 = x[8*i+4]; 129766ed3db0SBarry Smith alpha6 = x[8*i+5]; 129866ed3db0SBarry Smith alpha7 = x[8*i+6]; 129966ed3db0SBarry Smith alpha8 = x[8*i+7]; 130066ed3db0SBarry Smith while (n-->0) { 130166ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 130266ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 130366ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 130466ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 130566ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 130666ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 130766ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 130866ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 130966ed3db0SBarry Smith idx++; v++; 131066ed3db0SBarry Smith } 131166ed3db0SBarry Smith } 1312b3c51c66SMatthew Knepley ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13131ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13141ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 131566ed3db0SBarry Smith PetscFunctionReturn(0); 131666ed3db0SBarry Smith } 131766ed3db0SBarry Smith 131866ed3db0SBarry Smith #undef __FUNCT__ 131966ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1320dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 132166ed3db0SBarry Smith { 132266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 132366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 132487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1325dfbe8321SBarry Smith PetscErrorCode ierr; 1326d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1327b24ad042SBarry Smith PetscInt n,i,jrow,j; 132866ed3db0SBarry Smith 132966ed3db0SBarry Smith PetscFunctionBegin; 133066ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13311ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13321ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 133366ed3db0SBarry Smith idx = a->j; 133466ed3db0SBarry Smith v = a->a; 133566ed3db0SBarry Smith ii = a->i; 133666ed3db0SBarry Smith 133766ed3db0SBarry Smith for (i=0; i<m; i++) { 133866ed3db0SBarry Smith jrow = ii[i]; 133966ed3db0SBarry Smith n = ii[i+1] - jrow; 134066ed3db0SBarry Smith sum1 = 0.0; 134166ed3db0SBarry Smith sum2 = 0.0; 134266ed3db0SBarry Smith sum3 = 0.0; 134366ed3db0SBarry Smith sum4 = 0.0; 134466ed3db0SBarry Smith sum5 = 0.0; 134566ed3db0SBarry Smith sum6 = 0.0; 134666ed3db0SBarry Smith sum7 = 0.0; 134766ed3db0SBarry Smith sum8 = 0.0; 134866ed3db0SBarry Smith for (j=0; j<n; j++) { 134966ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 135066ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 135166ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 135266ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 135366ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 135466ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 135566ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 135666ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 135766ed3db0SBarry Smith jrow++; 135866ed3db0SBarry Smith } 135966ed3db0SBarry Smith y[8*i] += sum1; 136066ed3db0SBarry Smith y[8*i+1] += sum2; 136166ed3db0SBarry Smith y[8*i+2] += sum3; 136266ed3db0SBarry Smith y[8*i+3] += sum4; 136366ed3db0SBarry Smith y[8*i+4] += sum5; 136466ed3db0SBarry Smith y[8*i+5] += sum6; 136566ed3db0SBarry Smith y[8*i+6] += sum7; 136666ed3db0SBarry Smith y[8*i+7] += sum8; 136766ed3db0SBarry Smith } 136866ed3db0SBarry Smith 1369efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13701ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13711ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 137266ed3db0SBarry Smith PetscFunctionReturn(0); 137366ed3db0SBarry Smith } 137466ed3db0SBarry Smith 137566ed3db0SBarry Smith #undef __FUNCT__ 137666ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1377dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 137866ed3db0SBarry Smith { 137966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 138066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 138187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1382dfbe8321SBarry Smith PetscErrorCode ierr; 1383d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 138466ed3db0SBarry Smith 138566ed3db0SBarry Smith PetscFunctionBegin; 138666ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13871ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13881ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 138966ed3db0SBarry Smith for (i=0; i<m; i++) { 1390bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1391bfec09a0SHong Zhang v = a->a + a->i[i] ; 139266ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 139366ed3db0SBarry Smith alpha1 = x[8*i]; 139466ed3db0SBarry Smith alpha2 = x[8*i+1]; 139566ed3db0SBarry Smith alpha3 = x[8*i+2]; 139666ed3db0SBarry Smith alpha4 = x[8*i+3]; 139766ed3db0SBarry Smith alpha5 = x[8*i+4]; 139866ed3db0SBarry Smith alpha6 = x[8*i+5]; 139966ed3db0SBarry Smith alpha7 = x[8*i+6]; 140066ed3db0SBarry Smith alpha8 = x[8*i+7]; 140166ed3db0SBarry Smith while (n-->0) { 140266ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 140366ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 140466ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 140566ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 140666ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 140766ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 140866ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 140966ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 141066ed3db0SBarry Smith idx++; v++; 141166ed3db0SBarry Smith } 141266ed3db0SBarry Smith } 1413efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 14141ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14151ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 14162f7816d4SBarry Smith PetscFunctionReturn(0); 14172f7816d4SBarry Smith } 14182f7816d4SBarry Smith 14192b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 14202b692628SMatthew Knepley #undef __FUNCT__ 14212b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1422dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14232b692628SMatthew Knepley { 14242b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14252b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14262b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1427dfbe8321SBarry Smith PetscErrorCode ierr; 1428d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1429b24ad042SBarry Smith PetscInt n,i,jrow,j; 14302b692628SMatthew Knepley 14312b692628SMatthew Knepley PetscFunctionBegin; 14321ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14331ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14342b692628SMatthew Knepley idx = a->j; 14352b692628SMatthew Knepley v = a->a; 14362b692628SMatthew Knepley ii = a->i; 14372b692628SMatthew Knepley 14382b692628SMatthew Knepley for (i=0; i<m; i++) { 14392b692628SMatthew Knepley jrow = ii[i]; 14402b692628SMatthew Knepley n = ii[i+1] - jrow; 14412b692628SMatthew Knepley sum1 = 0.0; 14422b692628SMatthew Knepley sum2 = 0.0; 14432b692628SMatthew Knepley sum3 = 0.0; 14442b692628SMatthew Knepley sum4 = 0.0; 14452b692628SMatthew Knepley sum5 = 0.0; 14462b692628SMatthew Knepley sum6 = 0.0; 14472b692628SMatthew Knepley sum7 = 0.0; 14482b692628SMatthew Knepley sum8 = 0.0; 14492b692628SMatthew Knepley sum9 = 0.0; 1450b3c51c66SMatthew Knepley nonzerorow += (n>0); 14512b692628SMatthew Knepley for (j=0; j<n; j++) { 14522b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14532b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14542b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14552b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14562b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14572b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14582b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14592b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14602b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14612b692628SMatthew Knepley jrow++; 14622b692628SMatthew Knepley } 14632b692628SMatthew Knepley y[9*i] = sum1; 14642b692628SMatthew Knepley y[9*i+1] = sum2; 14652b692628SMatthew Knepley y[9*i+2] = sum3; 14662b692628SMatthew Knepley y[9*i+3] = sum4; 14672b692628SMatthew Knepley y[9*i+4] = sum5; 14682b692628SMatthew Knepley y[9*i+5] = sum6; 14692b692628SMatthew Knepley y[9*i+6] = sum7; 14702b692628SMatthew Knepley y[9*i+7] = sum8; 14712b692628SMatthew Knepley y[9*i+8] = sum9; 14722b692628SMatthew Knepley } 14732b692628SMatthew Knepley 1474b3c51c66SMatthew Knepley ierr = PetscLogFlops(18*a->nz - 9*nonzerorow);CHKERRQ(ierr); 14751ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14761ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14772b692628SMatthew Knepley PetscFunctionReturn(0); 14782b692628SMatthew Knepley } 14792b692628SMatthew Knepley 1480b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1481b9cda22cSBarry Smith 14822b692628SMatthew Knepley #undef __FUNCT__ 14832b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1484dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14852b692628SMatthew Knepley { 14862b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14872b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14882b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1489dfbe8321SBarry Smith PetscErrorCode ierr; 1490d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 14912b692628SMatthew Knepley 14922b692628SMatthew Knepley PetscFunctionBegin; 14932dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 14941ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14951ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14962b692628SMatthew Knepley 14972b692628SMatthew Knepley for (i=0; i<m; i++) { 14982b692628SMatthew Knepley idx = a->j + a->i[i] ; 14992b692628SMatthew Knepley v = a->a + a->i[i] ; 15002b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15012b692628SMatthew Knepley alpha1 = x[9*i]; 15022b692628SMatthew Knepley alpha2 = x[9*i+1]; 15032b692628SMatthew Knepley alpha3 = x[9*i+2]; 15042b692628SMatthew Knepley alpha4 = x[9*i+3]; 15052b692628SMatthew Knepley alpha5 = x[9*i+4]; 15062b692628SMatthew Knepley alpha6 = x[9*i+5]; 15072b692628SMatthew Knepley alpha7 = x[9*i+6]; 15082b692628SMatthew Knepley alpha8 = x[9*i+7]; 15092f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 15102b692628SMatthew Knepley while (n-->0) { 15112b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15122b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15132b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15142b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15152b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15162b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15172b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15182b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15192b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15202b692628SMatthew Knepley idx++; v++; 15212b692628SMatthew Knepley } 15222b692628SMatthew Knepley } 1523b3c51c66SMatthew Knepley ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15241ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15251ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15262b692628SMatthew Knepley PetscFunctionReturn(0); 15272b692628SMatthew Knepley } 15282b692628SMatthew Knepley 15292b692628SMatthew Knepley #undef __FUNCT__ 15302b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1531dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15322b692628SMatthew Knepley { 15332b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15342b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15352b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1536dfbe8321SBarry Smith PetscErrorCode ierr; 1537d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1538b24ad042SBarry Smith PetscInt n,i,jrow,j; 15392b692628SMatthew Knepley 15402b692628SMatthew Knepley PetscFunctionBegin; 15412b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15421ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15431ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15442b692628SMatthew Knepley idx = a->j; 15452b692628SMatthew Knepley v = a->a; 15462b692628SMatthew Knepley ii = a->i; 15472b692628SMatthew Knepley 15482b692628SMatthew Knepley for (i=0; i<m; i++) { 15492b692628SMatthew Knepley jrow = ii[i]; 15502b692628SMatthew Knepley n = ii[i+1] - jrow; 15512b692628SMatthew Knepley sum1 = 0.0; 15522b692628SMatthew Knepley sum2 = 0.0; 15532b692628SMatthew Knepley sum3 = 0.0; 15542b692628SMatthew Knepley sum4 = 0.0; 15552b692628SMatthew Knepley sum5 = 0.0; 15562b692628SMatthew Knepley sum6 = 0.0; 15572b692628SMatthew Knepley sum7 = 0.0; 15582b692628SMatthew Knepley sum8 = 0.0; 15592b692628SMatthew Knepley sum9 = 0.0; 15602b692628SMatthew Knepley for (j=0; j<n; j++) { 15612b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15622b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15632b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15642b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15652b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15662b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15672b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15682b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15692b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15702b692628SMatthew Knepley jrow++; 15712b692628SMatthew Knepley } 15722b692628SMatthew Knepley y[9*i] += sum1; 15732b692628SMatthew Knepley y[9*i+1] += sum2; 15742b692628SMatthew Knepley y[9*i+2] += sum3; 15752b692628SMatthew Knepley y[9*i+3] += sum4; 15762b692628SMatthew Knepley y[9*i+4] += sum5; 15772b692628SMatthew Knepley y[9*i+5] += sum6; 15782b692628SMatthew Knepley y[9*i+6] += sum7; 15792b692628SMatthew Knepley y[9*i+7] += sum8; 15802b692628SMatthew Knepley y[9*i+8] += sum9; 15812b692628SMatthew Knepley } 15822b692628SMatthew Knepley 1583efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15851ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15862b692628SMatthew Knepley PetscFunctionReturn(0); 15872b692628SMatthew Knepley } 15882b692628SMatthew Knepley 15892b692628SMatthew Knepley #undef __FUNCT__ 15902b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1591dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15922b692628SMatthew Knepley { 15932b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15942b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15952b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1596dfbe8321SBarry Smith PetscErrorCode ierr; 1597d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 15982b692628SMatthew Knepley 15992b692628SMatthew Knepley PetscFunctionBegin; 16002b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16011ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 16021ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 16032b692628SMatthew Knepley for (i=0; i<m; i++) { 16042b692628SMatthew Knepley idx = a->j + a->i[i] ; 16052b692628SMatthew Knepley v = a->a + a->i[i] ; 16062b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 16072b692628SMatthew Knepley alpha1 = x[9*i]; 16082b692628SMatthew Knepley alpha2 = x[9*i+1]; 16092b692628SMatthew Knepley alpha3 = x[9*i+2]; 16102b692628SMatthew Knepley alpha4 = x[9*i+3]; 16112b692628SMatthew Knepley alpha5 = x[9*i+4]; 16122b692628SMatthew Knepley alpha6 = x[9*i+5]; 16132b692628SMatthew Knepley alpha7 = x[9*i+6]; 16142b692628SMatthew Knepley alpha8 = x[9*i+7]; 16152b692628SMatthew Knepley alpha9 = x[9*i+8]; 16162b692628SMatthew Knepley while (n-->0) { 16172b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 16182b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 16192b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 16202b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 16212b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 16222b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 16232b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 16242b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 16252b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 16262b692628SMatthew Knepley idx++; v++; 16272b692628SMatthew Knepley } 16282b692628SMatthew Knepley } 1629efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 16301ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 16311ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16322b692628SMatthew Knepley PetscFunctionReturn(0); 16332b692628SMatthew Knepley } 1634b9cda22cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 1635b9cda22cSBarry Smith #undef __FUNCT__ 1636b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1637b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1638b9cda22cSBarry Smith { 1639b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1640b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1641b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1642b9cda22cSBarry Smith PetscErrorCode ierr; 1643d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1644b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1645b9cda22cSBarry Smith 1646b9cda22cSBarry Smith PetscFunctionBegin; 1647b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1648b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1649b9cda22cSBarry Smith idx = a->j; 1650b9cda22cSBarry Smith v = a->a; 1651b9cda22cSBarry Smith ii = a->i; 1652b9cda22cSBarry Smith 1653b9cda22cSBarry Smith for (i=0; i<m; i++) { 1654b9cda22cSBarry Smith jrow = ii[i]; 1655b9cda22cSBarry Smith n = ii[i+1] - jrow; 1656b9cda22cSBarry Smith sum1 = 0.0; 1657b9cda22cSBarry Smith sum2 = 0.0; 1658b9cda22cSBarry Smith sum3 = 0.0; 1659b9cda22cSBarry Smith sum4 = 0.0; 1660b9cda22cSBarry Smith sum5 = 0.0; 1661b9cda22cSBarry Smith sum6 = 0.0; 1662b9cda22cSBarry Smith sum7 = 0.0; 1663b9cda22cSBarry Smith sum8 = 0.0; 1664b9cda22cSBarry Smith sum9 = 0.0; 1665b9cda22cSBarry Smith sum10 = 0.0; 1666b3c51c66SMatthew Knepley nonzerorow += (n>0); 1667b9cda22cSBarry Smith for (j=0; j<n; j++) { 1668b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1669b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1670b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1671b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1672b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1673b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1674b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1675b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1676b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1677b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1678b9cda22cSBarry Smith jrow++; 1679b9cda22cSBarry Smith } 1680b9cda22cSBarry Smith y[10*i] = sum1; 1681b9cda22cSBarry Smith y[10*i+1] = sum2; 1682b9cda22cSBarry Smith y[10*i+2] = sum3; 1683b9cda22cSBarry Smith y[10*i+3] = sum4; 1684b9cda22cSBarry Smith y[10*i+4] = sum5; 1685b9cda22cSBarry Smith y[10*i+5] = sum6; 1686b9cda22cSBarry Smith y[10*i+6] = sum7; 1687b9cda22cSBarry Smith y[10*i+7] = sum8; 1688b9cda22cSBarry Smith y[10*i+8] = sum9; 1689b9cda22cSBarry Smith y[10*i+9] = sum10; 1690b9cda22cSBarry Smith } 1691b9cda22cSBarry Smith 1692b3c51c66SMatthew Knepley ierr = PetscLogFlops(20*a->nz - 10*nonzerorow);CHKERRQ(ierr); 1693b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1694b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1695b9cda22cSBarry Smith PetscFunctionReturn(0); 1696b9cda22cSBarry Smith } 1697b9cda22cSBarry Smith 1698b9cda22cSBarry Smith #undef __FUNCT__ 1699b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1700b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1701b9cda22cSBarry Smith { 1702b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1703b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1704b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1705b9cda22cSBarry Smith PetscErrorCode ierr; 1706d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1707b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1708b9cda22cSBarry Smith 1709b9cda22cSBarry Smith PetscFunctionBegin; 1710b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1711b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1712b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1713b9cda22cSBarry Smith idx = a->j; 1714b9cda22cSBarry Smith v = a->a; 1715b9cda22cSBarry Smith ii = a->i; 1716b9cda22cSBarry Smith 1717b9cda22cSBarry Smith for (i=0; i<m; i++) { 1718b9cda22cSBarry Smith jrow = ii[i]; 1719b9cda22cSBarry Smith n = ii[i+1] - jrow; 1720b9cda22cSBarry Smith sum1 = 0.0; 1721b9cda22cSBarry Smith sum2 = 0.0; 1722b9cda22cSBarry Smith sum3 = 0.0; 1723b9cda22cSBarry Smith sum4 = 0.0; 1724b9cda22cSBarry Smith sum5 = 0.0; 1725b9cda22cSBarry Smith sum6 = 0.0; 1726b9cda22cSBarry Smith sum7 = 0.0; 1727b9cda22cSBarry Smith sum8 = 0.0; 1728b9cda22cSBarry Smith sum9 = 0.0; 1729b9cda22cSBarry Smith sum10 = 0.0; 1730b9cda22cSBarry Smith for (j=0; j<n; j++) { 1731b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1732b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1733b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1734b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1735b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1736b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1737b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1738b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1739b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1740b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1741b9cda22cSBarry Smith jrow++; 1742b9cda22cSBarry Smith } 1743b9cda22cSBarry Smith y[10*i] += sum1; 1744b9cda22cSBarry Smith y[10*i+1] += sum2; 1745b9cda22cSBarry Smith y[10*i+2] += sum3; 1746b9cda22cSBarry Smith y[10*i+3] += sum4; 1747b9cda22cSBarry Smith y[10*i+4] += sum5; 1748b9cda22cSBarry Smith y[10*i+5] += sum6; 1749b9cda22cSBarry Smith y[10*i+6] += sum7; 1750b9cda22cSBarry Smith y[10*i+7] += sum8; 1751b9cda22cSBarry Smith y[10*i+8] += sum9; 1752b9cda22cSBarry Smith y[10*i+9] += sum10; 1753b9cda22cSBarry Smith } 1754b9cda22cSBarry Smith 1755b3c51c66SMatthew Knepley ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1756b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1757b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1758b9cda22cSBarry Smith PetscFunctionReturn(0); 1759b9cda22cSBarry Smith } 1760b9cda22cSBarry Smith 1761b9cda22cSBarry Smith #undef __FUNCT__ 1762b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1763b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1764b9cda22cSBarry Smith { 1765b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1766b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1767b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1768b9cda22cSBarry Smith PetscErrorCode ierr; 1769d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1770b9cda22cSBarry Smith 1771b9cda22cSBarry Smith PetscFunctionBegin; 1772b9cda22cSBarry Smith ierr = VecSet(yy,zero);CHKERRQ(ierr); 1773b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1774b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1775b9cda22cSBarry Smith 1776b9cda22cSBarry Smith for (i=0; i<m; i++) { 1777b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1778b9cda22cSBarry Smith v = a->a + a->i[i] ; 1779b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1780e29fdc22SBarry Smith alpha1 = x[10*i]; 1781e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1782e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1783e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1784e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1785e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1786e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1787e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1788e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1789b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1790b9cda22cSBarry Smith while (n-->0) { 1791e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1792e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1793e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1794e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1795e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1796e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1797e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1798e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1799e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1800b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1801b9cda22cSBarry Smith idx++; v++; 1802b9cda22cSBarry Smith } 1803b9cda22cSBarry Smith } 1804b3c51c66SMatthew Knepley ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1805b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1806b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1807b9cda22cSBarry Smith PetscFunctionReturn(0); 1808b9cda22cSBarry Smith } 1809b9cda22cSBarry Smith 1810b9cda22cSBarry Smith #undef __FUNCT__ 1811b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1812b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1813b9cda22cSBarry Smith { 1814b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1815b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1816b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1817b9cda22cSBarry Smith PetscErrorCode ierr; 1818d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1819b9cda22cSBarry Smith 1820b9cda22cSBarry Smith PetscFunctionBegin; 1821b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1822b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1823b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1824b9cda22cSBarry Smith for (i=0; i<m; i++) { 1825b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1826b9cda22cSBarry Smith v = a->a + a->i[i] ; 1827b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1828b9cda22cSBarry Smith alpha1 = x[10*i]; 1829b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1830b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1831b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1832b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1833b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1834b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1835b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1836b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1837b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1838b9cda22cSBarry Smith while (n-->0) { 1839b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1840b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1841b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1842b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1843b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1844b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1845b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1846b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1847b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1848b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1849b9cda22cSBarry Smith idx++; v++; 1850b9cda22cSBarry Smith } 1851b9cda22cSBarry Smith } 1852b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1853b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1854b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1855b9cda22cSBarry Smith PetscFunctionReturn(0); 1856b9cda22cSBarry Smith } 1857b9cda22cSBarry Smith 18582b692628SMatthew Knepley 18592f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 18602f7816d4SBarry Smith #undef __FUNCT__ 18612f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 1862dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 18632f7816d4SBarry Smith { 18642f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 18652f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18662f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 18672f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1868dfbe8321SBarry Smith PetscErrorCode ierr; 1869d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1870b24ad042SBarry Smith PetscInt n,i,jrow,j; 18712f7816d4SBarry Smith 18722f7816d4SBarry Smith PetscFunctionBegin; 18731ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 18741ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 18752f7816d4SBarry Smith idx = a->j; 18762f7816d4SBarry Smith v = a->a; 18772f7816d4SBarry Smith ii = a->i; 18782f7816d4SBarry Smith 18792f7816d4SBarry Smith for (i=0; i<m; i++) { 18802f7816d4SBarry Smith jrow = ii[i]; 18812f7816d4SBarry Smith n = ii[i+1] - jrow; 18822f7816d4SBarry Smith sum1 = 0.0; 18832f7816d4SBarry Smith sum2 = 0.0; 18842f7816d4SBarry Smith sum3 = 0.0; 18852f7816d4SBarry Smith sum4 = 0.0; 18862f7816d4SBarry Smith sum5 = 0.0; 18872f7816d4SBarry Smith sum6 = 0.0; 18882f7816d4SBarry Smith sum7 = 0.0; 18892f7816d4SBarry Smith sum8 = 0.0; 18902f7816d4SBarry Smith sum9 = 0.0; 18912f7816d4SBarry Smith sum10 = 0.0; 18922f7816d4SBarry Smith sum11 = 0.0; 18932f7816d4SBarry Smith sum12 = 0.0; 18942f7816d4SBarry Smith sum13 = 0.0; 18952f7816d4SBarry Smith sum14 = 0.0; 18962f7816d4SBarry Smith sum15 = 0.0; 18972f7816d4SBarry Smith sum16 = 0.0; 1898b3c51c66SMatthew Knepley nonzerorow += (n>0); 18992f7816d4SBarry Smith for (j=0; j<n; j++) { 19002f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 19012f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 19022f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 19032f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 19042f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 19052f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 19062f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 19072f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 19082f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 19092f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 19102f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 19112f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 19122f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 19132f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 19142f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 19152f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 19162f7816d4SBarry Smith jrow++; 19172f7816d4SBarry Smith } 19182f7816d4SBarry Smith y[16*i] = sum1; 19192f7816d4SBarry Smith y[16*i+1] = sum2; 19202f7816d4SBarry Smith y[16*i+2] = sum3; 19212f7816d4SBarry Smith y[16*i+3] = sum4; 19222f7816d4SBarry Smith y[16*i+4] = sum5; 19232f7816d4SBarry Smith y[16*i+5] = sum6; 19242f7816d4SBarry Smith y[16*i+6] = sum7; 19252f7816d4SBarry Smith y[16*i+7] = sum8; 19262f7816d4SBarry Smith y[16*i+8] = sum9; 19272f7816d4SBarry Smith y[16*i+9] = sum10; 19282f7816d4SBarry Smith y[16*i+10] = sum11; 19292f7816d4SBarry Smith y[16*i+11] = sum12; 19302f7816d4SBarry Smith y[16*i+12] = sum13; 19312f7816d4SBarry Smith y[16*i+13] = sum14; 19322f7816d4SBarry Smith y[16*i+14] = sum15; 19332f7816d4SBarry Smith y[16*i+15] = sum16; 19342f7816d4SBarry Smith } 19352f7816d4SBarry Smith 1936b3c51c66SMatthew Knepley ierr = PetscLogFlops(32*a->nz - 16*nonzerorow);CHKERRQ(ierr); 19371ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 19381ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 19392f7816d4SBarry Smith PetscFunctionReturn(0); 19402f7816d4SBarry Smith } 19412f7816d4SBarry Smith 19422f7816d4SBarry Smith #undef __FUNCT__ 19432f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1944dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 19452f7816d4SBarry Smith { 19462f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 19472f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19482f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 19492f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1950dfbe8321SBarry Smith PetscErrorCode ierr; 1951d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 19522f7816d4SBarry Smith 19532f7816d4SBarry Smith PetscFunctionBegin; 19542dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 19551ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19561ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1957bfec09a0SHong Zhang 19582f7816d4SBarry Smith for (i=0; i<m; i++) { 1959bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1960bfec09a0SHong Zhang v = a->a + a->i[i] ; 19612f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 19622f7816d4SBarry Smith alpha1 = x[16*i]; 19632f7816d4SBarry Smith alpha2 = x[16*i+1]; 19642f7816d4SBarry Smith alpha3 = x[16*i+2]; 19652f7816d4SBarry Smith alpha4 = x[16*i+3]; 19662f7816d4SBarry Smith alpha5 = x[16*i+4]; 19672f7816d4SBarry Smith alpha6 = x[16*i+5]; 19682f7816d4SBarry Smith alpha7 = x[16*i+6]; 19692f7816d4SBarry Smith alpha8 = x[16*i+7]; 19702f7816d4SBarry Smith alpha9 = x[16*i+8]; 19712f7816d4SBarry Smith alpha10 = x[16*i+9]; 19722f7816d4SBarry Smith alpha11 = x[16*i+10]; 19732f7816d4SBarry Smith alpha12 = x[16*i+11]; 19742f7816d4SBarry Smith alpha13 = x[16*i+12]; 19752f7816d4SBarry Smith alpha14 = x[16*i+13]; 19762f7816d4SBarry Smith alpha15 = x[16*i+14]; 19772f7816d4SBarry Smith alpha16 = x[16*i+15]; 19782f7816d4SBarry Smith while (n-->0) { 19792f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 19802f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 19812f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 19822f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 19832f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 19842f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 19852f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 19862f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 19872f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 19882f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 19892f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 19902f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 19912f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 19922f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 19932f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 19942f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 19952f7816d4SBarry Smith idx++; v++; 19962f7816d4SBarry Smith } 19972f7816d4SBarry Smith } 1998b3c51c66SMatthew Knepley ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 19991ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 20001ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 20012f7816d4SBarry Smith PetscFunctionReturn(0); 20022f7816d4SBarry Smith } 20032f7816d4SBarry Smith 20042f7816d4SBarry Smith #undef __FUNCT__ 20052f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2006dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 20072f7816d4SBarry Smith { 20082f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20092f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 20102f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 20112f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2012dfbe8321SBarry Smith PetscErrorCode ierr; 2013d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2014b24ad042SBarry Smith PetscInt n,i,jrow,j; 20152f7816d4SBarry Smith 20162f7816d4SBarry Smith PetscFunctionBegin; 20172f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20181ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 20191ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 20202f7816d4SBarry Smith idx = a->j; 20212f7816d4SBarry Smith v = a->a; 20222f7816d4SBarry Smith ii = a->i; 20232f7816d4SBarry Smith 20242f7816d4SBarry Smith for (i=0; i<m; i++) { 20252f7816d4SBarry Smith jrow = ii[i]; 20262f7816d4SBarry Smith n = ii[i+1] - jrow; 20272f7816d4SBarry Smith sum1 = 0.0; 20282f7816d4SBarry Smith sum2 = 0.0; 20292f7816d4SBarry Smith sum3 = 0.0; 20302f7816d4SBarry Smith sum4 = 0.0; 20312f7816d4SBarry Smith sum5 = 0.0; 20322f7816d4SBarry Smith sum6 = 0.0; 20332f7816d4SBarry Smith sum7 = 0.0; 20342f7816d4SBarry Smith sum8 = 0.0; 20352f7816d4SBarry Smith sum9 = 0.0; 20362f7816d4SBarry Smith sum10 = 0.0; 20372f7816d4SBarry Smith sum11 = 0.0; 20382f7816d4SBarry Smith sum12 = 0.0; 20392f7816d4SBarry Smith sum13 = 0.0; 20402f7816d4SBarry Smith sum14 = 0.0; 20412f7816d4SBarry Smith sum15 = 0.0; 20422f7816d4SBarry Smith sum16 = 0.0; 20432f7816d4SBarry Smith for (j=0; j<n; j++) { 20442f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 20452f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 20462f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 20472f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 20482f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 20492f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 20502f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 20512f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 20522f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 20532f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 20542f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 20552f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 20562f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 20572f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 20582f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 20592f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 20602f7816d4SBarry Smith jrow++; 20612f7816d4SBarry Smith } 20622f7816d4SBarry Smith y[16*i] += sum1; 20632f7816d4SBarry Smith y[16*i+1] += sum2; 20642f7816d4SBarry Smith y[16*i+2] += sum3; 20652f7816d4SBarry Smith y[16*i+3] += sum4; 20662f7816d4SBarry Smith y[16*i+4] += sum5; 20672f7816d4SBarry Smith y[16*i+5] += sum6; 20682f7816d4SBarry Smith y[16*i+6] += sum7; 20692f7816d4SBarry Smith y[16*i+7] += sum8; 20702f7816d4SBarry Smith y[16*i+8] += sum9; 20712f7816d4SBarry Smith y[16*i+9] += sum10; 20722f7816d4SBarry Smith y[16*i+10] += sum11; 20732f7816d4SBarry Smith y[16*i+11] += sum12; 20742f7816d4SBarry Smith y[16*i+12] += sum13; 20752f7816d4SBarry Smith y[16*i+13] += sum14; 20762f7816d4SBarry Smith y[16*i+14] += sum15; 20772f7816d4SBarry Smith y[16*i+15] += sum16; 20782f7816d4SBarry Smith } 20792f7816d4SBarry Smith 2080efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 20811ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 20821ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 20832f7816d4SBarry Smith PetscFunctionReturn(0); 20842f7816d4SBarry Smith } 20852f7816d4SBarry Smith 20862f7816d4SBarry Smith #undef __FUNCT__ 20872f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2088dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 20892f7816d4SBarry Smith { 20902f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20912f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 20922f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 20932f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2094dfbe8321SBarry Smith PetscErrorCode ierr; 2095d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 20962f7816d4SBarry Smith 20972f7816d4SBarry Smith PetscFunctionBegin; 20982f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20991ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 21001ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 21012f7816d4SBarry Smith for (i=0; i<m; i++) { 2102bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2103bfec09a0SHong Zhang v = a->a + a->i[i] ; 21042f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 21052f7816d4SBarry Smith alpha1 = x[16*i]; 21062f7816d4SBarry Smith alpha2 = x[16*i+1]; 21072f7816d4SBarry Smith alpha3 = x[16*i+2]; 21082f7816d4SBarry Smith alpha4 = x[16*i+3]; 21092f7816d4SBarry Smith alpha5 = x[16*i+4]; 21102f7816d4SBarry Smith alpha6 = x[16*i+5]; 21112f7816d4SBarry Smith alpha7 = x[16*i+6]; 21122f7816d4SBarry Smith alpha8 = x[16*i+7]; 21132f7816d4SBarry Smith alpha9 = x[16*i+8]; 21142f7816d4SBarry Smith alpha10 = x[16*i+9]; 21152f7816d4SBarry Smith alpha11 = x[16*i+10]; 21162f7816d4SBarry Smith alpha12 = x[16*i+11]; 21172f7816d4SBarry Smith alpha13 = x[16*i+12]; 21182f7816d4SBarry Smith alpha14 = x[16*i+13]; 21192f7816d4SBarry Smith alpha15 = x[16*i+14]; 21202f7816d4SBarry Smith alpha16 = x[16*i+15]; 21212f7816d4SBarry Smith while (n-->0) { 21222f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 21232f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 21242f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 21252f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 21262f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 21272f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 21282f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 21292f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 21302f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 21312f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 21322f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 21332f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 21342f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 21352f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 21362f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 21372f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 21382f7816d4SBarry Smith idx++; v++; 21392f7816d4SBarry Smith } 21402f7816d4SBarry Smith } 2141efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 21421ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 21431ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 214466ed3db0SBarry Smith PetscFunctionReturn(0); 214566ed3db0SBarry Smith } 214666ed3db0SBarry Smith 2147ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2148ed1c418cSBarry Smith #undef __FUNCT__ 2149ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18" 2150ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2151ed1c418cSBarry Smith { 2152ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2153ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2154ed1c418cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2155ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2156ed1c418cSBarry Smith PetscErrorCode ierr; 2157d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 2158ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2159ed1c418cSBarry Smith 2160ed1c418cSBarry Smith PetscFunctionBegin; 2161ed1c418cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2162ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2163ed1c418cSBarry Smith idx = a->j; 2164ed1c418cSBarry Smith v = a->a; 2165ed1c418cSBarry Smith ii = a->i; 2166ed1c418cSBarry Smith 2167ed1c418cSBarry Smith for (i=0; i<m; i++) { 2168ed1c418cSBarry Smith jrow = ii[i]; 2169ed1c418cSBarry Smith n = ii[i+1] - jrow; 2170ed1c418cSBarry Smith sum1 = 0.0; 2171ed1c418cSBarry Smith sum2 = 0.0; 2172ed1c418cSBarry Smith sum3 = 0.0; 2173ed1c418cSBarry Smith sum4 = 0.0; 2174ed1c418cSBarry Smith sum5 = 0.0; 2175ed1c418cSBarry Smith sum6 = 0.0; 2176ed1c418cSBarry Smith sum7 = 0.0; 2177ed1c418cSBarry Smith sum8 = 0.0; 2178ed1c418cSBarry Smith sum9 = 0.0; 2179ed1c418cSBarry Smith sum10 = 0.0; 2180ed1c418cSBarry Smith sum11 = 0.0; 2181ed1c418cSBarry Smith sum12 = 0.0; 2182ed1c418cSBarry Smith sum13 = 0.0; 2183ed1c418cSBarry Smith sum14 = 0.0; 2184ed1c418cSBarry Smith sum15 = 0.0; 2185ed1c418cSBarry Smith sum16 = 0.0; 2186ed1c418cSBarry Smith sum17 = 0.0; 2187ed1c418cSBarry Smith sum18 = 0.0; 2188ed1c418cSBarry Smith nonzerorow += (n>0); 2189ed1c418cSBarry Smith for (j=0; j<n; j++) { 2190ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2191ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2192ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2193ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2194ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2195ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2196ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2197ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2198ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2199ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2200ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2201ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2202ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2203ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2204ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2205ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2206ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2207ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2208ed1c418cSBarry Smith jrow++; 2209ed1c418cSBarry Smith } 2210ed1c418cSBarry Smith y[18*i] = sum1; 2211ed1c418cSBarry Smith y[18*i+1] = sum2; 2212ed1c418cSBarry Smith y[18*i+2] = sum3; 2213ed1c418cSBarry Smith y[18*i+3] = sum4; 2214ed1c418cSBarry Smith y[18*i+4] = sum5; 2215ed1c418cSBarry Smith y[18*i+5] = sum6; 2216ed1c418cSBarry Smith y[18*i+6] = sum7; 2217ed1c418cSBarry Smith y[18*i+7] = sum8; 2218ed1c418cSBarry Smith y[18*i+8] = sum9; 2219ed1c418cSBarry Smith y[18*i+9] = sum10; 2220ed1c418cSBarry Smith y[18*i+10] = sum11; 2221ed1c418cSBarry Smith y[18*i+11] = sum12; 2222ed1c418cSBarry Smith y[18*i+12] = sum13; 2223ed1c418cSBarry Smith y[18*i+13] = sum14; 2224ed1c418cSBarry Smith y[18*i+14] = sum15; 2225ed1c418cSBarry Smith y[18*i+15] = sum16; 2226ed1c418cSBarry Smith y[18*i+16] = sum17; 2227ed1c418cSBarry Smith y[18*i+17] = sum18; 2228ed1c418cSBarry Smith } 2229ed1c418cSBarry Smith 2230ed1c418cSBarry Smith ierr = PetscLogFlops(36*a->nz - 18*nonzerorow);CHKERRQ(ierr); 2231ed1c418cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2232ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2233ed1c418cSBarry Smith PetscFunctionReturn(0); 2234ed1c418cSBarry Smith } 2235ed1c418cSBarry Smith 2236ed1c418cSBarry Smith #undef __FUNCT__ 2237ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2238ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2239ed1c418cSBarry Smith { 2240ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2241ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2242ed1c418cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2243ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2244ed1c418cSBarry Smith PetscErrorCode ierr; 2245d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2246ed1c418cSBarry Smith 2247ed1c418cSBarry Smith PetscFunctionBegin; 2248ed1c418cSBarry Smith ierr = VecSet(yy,zero);CHKERRQ(ierr); 2249ed1c418cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2250ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2251ed1c418cSBarry Smith 2252ed1c418cSBarry Smith for (i=0; i<m; i++) { 2253ed1c418cSBarry Smith idx = a->j + a->i[i] ; 2254ed1c418cSBarry Smith v = a->a + a->i[i] ; 2255ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2256ed1c418cSBarry Smith alpha1 = x[18*i]; 2257ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2258ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2259ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2260ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2261ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2262ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2263ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2264ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2265ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2266ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2267ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2268ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2269ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2270ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2271ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2272ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2273ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2274ed1c418cSBarry Smith while (n-->0) { 2275ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2276ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2277ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2278ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2279ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2280ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2281ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2282ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2283ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2284ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2285ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2286ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2287ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2288ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2289ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2290ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2291ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2292ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2293ed1c418cSBarry Smith idx++; v++; 2294ed1c418cSBarry Smith } 2295ed1c418cSBarry Smith } 2296ed1c418cSBarry Smith ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2297ed1c418cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2298ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2299ed1c418cSBarry Smith PetscFunctionReturn(0); 2300ed1c418cSBarry Smith } 2301ed1c418cSBarry Smith 2302ed1c418cSBarry Smith #undef __FUNCT__ 2303ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2304ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2305ed1c418cSBarry Smith { 2306ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2307ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2308ed1c418cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2309ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2310ed1c418cSBarry Smith PetscErrorCode ierr; 2311d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2312ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2313ed1c418cSBarry Smith 2314ed1c418cSBarry Smith PetscFunctionBegin; 2315ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2316ed1c418cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2317ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2318ed1c418cSBarry Smith idx = a->j; 2319ed1c418cSBarry Smith v = a->a; 2320ed1c418cSBarry Smith ii = a->i; 2321ed1c418cSBarry Smith 2322ed1c418cSBarry Smith for (i=0; i<m; i++) { 2323ed1c418cSBarry Smith jrow = ii[i]; 2324ed1c418cSBarry Smith n = ii[i+1] - jrow; 2325ed1c418cSBarry Smith sum1 = 0.0; 2326ed1c418cSBarry Smith sum2 = 0.0; 2327ed1c418cSBarry Smith sum3 = 0.0; 2328ed1c418cSBarry Smith sum4 = 0.0; 2329ed1c418cSBarry Smith sum5 = 0.0; 2330ed1c418cSBarry Smith sum6 = 0.0; 2331ed1c418cSBarry Smith sum7 = 0.0; 2332ed1c418cSBarry Smith sum8 = 0.0; 2333ed1c418cSBarry Smith sum9 = 0.0; 2334ed1c418cSBarry Smith sum10 = 0.0; 2335ed1c418cSBarry Smith sum11 = 0.0; 2336ed1c418cSBarry Smith sum12 = 0.0; 2337ed1c418cSBarry Smith sum13 = 0.0; 2338ed1c418cSBarry Smith sum14 = 0.0; 2339ed1c418cSBarry Smith sum15 = 0.0; 2340ed1c418cSBarry Smith sum16 = 0.0; 2341ed1c418cSBarry Smith sum17 = 0.0; 2342ed1c418cSBarry Smith sum18 = 0.0; 2343ed1c418cSBarry Smith for (j=0; j<n; j++) { 2344ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2345ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2346ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2347ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2348ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2349ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2350ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2351ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2352ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2353ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2354ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2355ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2356ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2357ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2358ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2359ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2360ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2361ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2362ed1c418cSBarry Smith jrow++; 2363ed1c418cSBarry Smith } 2364ed1c418cSBarry Smith y[18*i] += sum1; 2365ed1c418cSBarry Smith y[18*i+1] += sum2; 2366ed1c418cSBarry Smith y[18*i+2] += sum3; 2367ed1c418cSBarry Smith y[18*i+3] += sum4; 2368ed1c418cSBarry Smith y[18*i+4] += sum5; 2369ed1c418cSBarry Smith y[18*i+5] += sum6; 2370ed1c418cSBarry Smith y[18*i+6] += sum7; 2371ed1c418cSBarry Smith y[18*i+7] += sum8; 2372ed1c418cSBarry Smith y[18*i+8] += sum9; 2373ed1c418cSBarry Smith y[18*i+9] += sum10; 2374ed1c418cSBarry Smith y[18*i+10] += sum11; 2375ed1c418cSBarry Smith y[18*i+11] += sum12; 2376ed1c418cSBarry Smith y[18*i+12] += sum13; 2377ed1c418cSBarry Smith y[18*i+13] += sum14; 2378ed1c418cSBarry Smith y[18*i+14] += sum15; 2379ed1c418cSBarry Smith y[18*i+15] += sum16; 2380ed1c418cSBarry Smith y[18*i+16] += sum17; 2381ed1c418cSBarry Smith y[18*i+17] += sum18; 2382ed1c418cSBarry Smith } 2383ed1c418cSBarry Smith 2384ed1c418cSBarry Smith ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2385ed1c418cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2386ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2387ed1c418cSBarry Smith PetscFunctionReturn(0); 2388ed1c418cSBarry Smith } 2389ed1c418cSBarry Smith 2390ed1c418cSBarry Smith #undef __FUNCT__ 2391ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2392ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2393ed1c418cSBarry Smith { 2394ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2395ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2396ed1c418cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2397ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2398ed1c418cSBarry Smith PetscErrorCode ierr; 2399d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2400ed1c418cSBarry Smith 2401ed1c418cSBarry Smith PetscFunctionBegin; 2402ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2403ed1c418cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2404ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2405ed1c418cSBarry Smith for (i=0; i<m; i++) { 2406ed1c418cSBarry Smith idx = a->j + a->i[i] ; 2407ed1c418cSBarry Smith v = a->a + a->i[i] ; 2408ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2409ed1c418cSBarry Smith alpha1 = x[18*i]; 2410ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2411ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2412ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2413ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2414ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2415ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2416ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2417ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2418ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2419ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2420ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2421ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2422ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2423ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2424ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2425ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2426ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2427ed1c418cSBarry Smith while (n-->0) { 2428ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2429ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2430ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2431ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2432ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2433ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2434ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2435ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2436ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2437ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2438ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2439ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2440ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2441ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2442ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2443ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2444ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2445ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2446ed1c418cSBarry Smith idx++; v++; 2447ed1c418cSBarry Smith } 2448ed1c418cSBarry Smith } 2449ed1c418cSBarry Smith ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2450ed1c418cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2451ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2452ed1c418cSBarry Smith PetscFunctionReturn(0); 2453ed1c418cSBarry Smith } 2454ed1c418cSBarry Smith 2455f4a53059SBarry Smith /*===================================================================================*/ 24564a2ae208SSatish Balay #undef __FUNCT__ 24574a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2458dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2459f4a53059SBarry Smith { 24604cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2461dfbe8321SBarry Smith PetscErrorCode ierr; 2462f4a53059SBarry Smith 2463b24ad042SBarry Smith PetscFunctionBegin; 2464f4a53059SBarry Smith /* start the scatter */ 2465ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 24664cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2467ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 24684cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2469f4a53059SBarry Smith PetscFunctionReturn(0); 2470f4a53059SBarry Smith } 2471f4a53059SBarry Smith 24724a2ae208SSatish Balay #undef __FUNCT__ 24734a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2474dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 24754cb9d9b8SBarry Smith { 24764cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2477dfbe8321SBarry Smith PetscErrorCode ierr; 2478b24ad042SBarry Smith 24794cb9d9b8SBarry Smith PetscFunctionBegin; 24804cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 24814cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2482ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2483ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 24844cb9d9b8SBarry Smith PetscFunctionReturn(0); 24854cb9d9b8SBarry Smith } 24864cb9d9b8SBarry Smith 24874a2ae208SSatish Balay #undef __FUNCT__ 24884a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2489dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 24904cb9d9b8SBarry Smith { 24914cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2492dfbe8321SBarry Smith PetscErrorCode ierr; 24934cb9d9b8SBarry Smith 2494b24ad042SBarry Smith PetscFunctionBegin; 24954cb9d9b8SBarry Smith /* start the scatter */ 2496ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2497d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2498ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2499717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 25004cb9d9b8SBarry Smith PetscFunctionReturn(0); 25014cb9d9b8SBarry Smith } 25024cb9d9b8SBarry Smith 25034a2ae208SSatish Balay #undef __FUNCT__ 25044a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2505dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 25064cb9d9b8SBarry Smith { 25074cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2508dfbe8321SBarry Smith PetscErrorCode ierr; 2509b24ad042SBarry Smith 25104cb9d9b8SBarry Smith PetscFunctionBegin; 25114cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2512ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2513d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2514ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 25154cb9d9b8SBarry Smith PetscFunctionReturn(0); 25164cb9d9b8SBarry Smith } 25174cb9d9b8SBarry Smith 251895ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 25199c4fc4c7SBarry Smith #undef __FUNCT__ 25207ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 25217ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 25227ba1a0bfSKris Buschelman { 25237ba1a0bfSKris Buschelman /* This routine requires testing -- but it's getting better. */ 25247ba1a0bfSKris Buschelman PetscErrorCode ierr; 2525a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 25267ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 25277ba1a0bfSKris Buschelman Mat P=pp->AIJ; 25287ba1a0bfSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 25297ba1a0bfSKris Buschelman PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 25307ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2531d0f46423SBarry Smith PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn; 25327ba1a0bfSKris Buschelman PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 25337ba1a0bfSKris Buschelman MatScalar *ca; 25347ba1a0bfSKris Buschelman 25357ba1a0bfSKris Buschelman PetscFunctionBegin; 25367ba1a0bfSKris Buschelman /* Start timer */ 25377ba1a0bfSKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 25387ba1a0bfSKris Buschelman 25397ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 25407ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 25417ba1a0bfSKris Buschelman 25427ba1a0bfSKris Buschelman cn = pn*ppdof; 25437ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 25447ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 25457ba1a0bfSKris Buschelman ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 25467ba1a0bfSKris Buschelman ci[0] = 0; 25477ba1a0bfSKris Buschelman 25487ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 25497ba1a0bfSKris Buschelman ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 25507ba1a0bfSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 25517ba1a0bfSKris Buschelman ptasparserow = ptadenserow + an; 25527ba1a0bfSKris Buschelman denserow = ptasparserow + an; 25537ba1a0bfSKris Buschelman sparserow = denserow + cn; 25547ba1a0bfSKris Buschelman 25557ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 25567ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 25577ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2558a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 25597ba1a0bfSKris Buschelman current_space = free_space; 25607ba1a0bfSKris Buschelman 25617ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 25627ba1a0bfSKris Buschelman for (i=0;i<pn;i++) { 25637ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 25647ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 25657ba1a0bfSKris Buschelman for (dof=0;dof<ppdof;dof++) { 25667ba1a0bfSKris Buschelman ptanzi = 0; 25677ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 25687ba1a0bfSKris Buschelman for (j=0;j<ptnzi;j++) { 25697ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 25707ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 25717ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 25727ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 25737ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 25747ba1a0bfSKris Buschelman for (k=0;k<anzj;k++) { 25757ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 25767ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 25777ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 25787ba1a0bfSKris Buschelman } 25797ba1a0bfSKris Buschelman } 25807ba1a0bfSKris Buschelman } 25817ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 25827ba1a0bfSKris Buschelman ptaj = ptasparserow; 25837ba1a0bfSKris Buschelman cnzi = 0; 25847ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 25857ba1a0bfSKris Buschelman /* Get offset within block of P */ 25867ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 25877ba1a0bfSKris Buschelman /* Get block row of P */ 25887ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 25897ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 25907ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 25917ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 25927ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 25937ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 25947ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 25957ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 25967ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 25977ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 25987ba1a0bfSKris Buschelman } 25997ba1a0bfSKris Buschelman } 26007ba1a0bfSKris Buschelman } 26017ba1a0bfSKris Buschelman 26027ba1a0bfSKris Buschelman /* sort sparserow */ 26037ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 26047ba1a0bfSKris Buschelman 26057ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 26067ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 26077ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 26084238b7adSHong Zhang ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 26097ba1a0bfSKris Buschelman } 26107ba1a0bfSKris Buschelman 26117ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 26127ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 26137ba1a0bfSKris Buschelman current_space->array += cnzi; 26147ba1a0bfSKris Buschelman current_space->local_used += cnzi; 26157ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 26167ba1a0bfSKris Buschelman 26177ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 26187ba1a0bfSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 26197ba1a0bfSKris Buschelman } 26207ba1a0bfSKris Buschelman for (j=0;j<cnzi;j++) { 26217ba1a0bfSKris Buschelman denserow[sparserow[j]] = 0; 26227ba1a0bfSKris Buschelman } 26237ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 26247ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 26257ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 26267ba1a0bfSKris Buschelman } 26277ba1a0bfSKris Buschelman } 26287ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 26297ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 26307ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 26317ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2632a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 26337ba1a0bfSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 26347ba1a0bfSKris Buschelman 26357ba1a0bfSKris Buschelman /* Allocate space for ca */ 26367ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 26377ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 26387ba1a0bfSKris Buschelman 26397ba1a0bfSKris Buschelman /* put together the new matrix */ 26407adad957SLisandro Dalcin ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 26417ba1a0bfSKris Buschelman 26427ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 26437ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 26447ba1a0bfSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 2645e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2646e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 26477ba1a0bfSKris Buschelman c->nonew = 0; 26487ba1a0bfSKris Buschelman 26497ba1a0bfSKris Buschelman /* Clean up. */ 26507ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 26517ba1a0bfSKris Buschelman 26527ba1a0bfSKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 26537ba1a0bfSKris Buschelman PetscFunctionReturn(0); 26547ba1a0bfSKris Buschelman } 26557ba1a0bfSKris Buschelman 26567ba1a0bfSKris Buschelman #undef __FUNCT__ 26577ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 26587ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 26597ba1a0bfSKris Buschelman { 26607ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 26617ba1a0bfSKris Buschelman PetscErrorCode ierr; 26627ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 26637ba1a0bfSKris Buschelman Mat P=pp->AIJ; 26647ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 26657ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 26667ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 26677ba1a0bfSKris Buschelman PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 26687ba1a0bfSKris Buschelman PetscInt *ci=c->i,*cj=c->j,*cjj; 2669d0f46423SBarry Smith PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 26707ba1a0bfSKris Buschelman PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 26717ba1a0bfSKris Buschelman MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 26727ba1a0bfSKris Buschelman 26737ba1a0bfSKris Buschelman PetscFunctionBegin; 26747ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 26757ba1a0bfSKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 26767ba1a0bfSKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 26777ba1a0bfSKris Buschelman 26787ba1a0bfSKris Buschelman apj = (PetscInt *)(apa + cn); 26797ba1a0bfSKris Buschelman apjdense = apj + cn; 26807ba1a0bfSKris Buschelman 26817ba1a0bfSKris Buschelman /* Clear old values in C */ 26827ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 26837ba1a0bfSKris Buschelman 26847ba1a0bfSKris Buschelman for (i=0;i<am;i++) { 26857ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 26867ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 26877ba1a0bfSKris Buschelman apnzj = 0; 26887ba1a0bfSKris Buschelman for (j=0;j<anzi;j++) { 26897ba1a0bfSKris Buschelman /* Get offset within block of P */ 26907ba1a0bfSKris Buschelman pshift = *aj%ppdof; 26917ba1a0bfSKris Buschelman /* Get block row of P */ 26927ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 26937ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 26947ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 26957ba1a0bfSKris Buschelman paj = pa + pi[prow]; 26967ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 26977ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 26987ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 26997ba1a0bfSKris Buschelman apjdense[poffset] = -1; 27007ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 27017ba1a0bfSKris Buschelman } 27027ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 27037ba1a0bfSKris Buschelman } 2704bf3909cdSBarry Smith ierr = PetscLogFlops(2*pnzj);CHKERRQ(ierr); 27057ba1a0bfSKris Buschelman aa++; 27067ba1a0bfSKris Buschelman } 27077ba1a0bfSKris Buschelman 27087ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 27097ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 27107ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 27117ba1a0bfSKris Buschelman 27127ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 27137ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 27147ba1a0bfSKris Buschelman pshift = i%ppdof; 27157ba1a0bfSKris Buschelman poffset = pi[prow]; 27167ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 27177ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 27187ba1a0bfSKris Buschelman pJ = pj+poffset; 27197ba1a0bfSKris Buschelman pA = pa+poffset; 27207ba1a0bfSKris Buschelman for (j=0;j<pnzi;j++) { 27217ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 27227ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 27237ba1a0bfSKris Buschelman caj = ca + ci[crow]; 27247ba1a0bfSKris Buschelman pJ++; 27257ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 27267ba1a0bfSKris Buschelman for (k=0,nextap=0;nextap<apnzj;k++) { 27277ba1a0bfSKris Buschelman if (cjj[k]==apj[nextap]) { 27287ba1a0bfSKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 27297ba1a0bfSKris Buschelman } 27307ba1a0bfSKris Buschelman } 2731bf3909cdSBarry Smith ierr = PetscLogFlops(2*apnzj);CHKERRQ(ierr); 27327ba1a0bfSKris Buschelman pA++; 27337ba1a0bfSKris Buschelman } 27347ba1a0bfSKris Buschelman 27357ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 27367ba1a0bfSKris Buschelman for (j=0;j<apnzj;j++) { 27377ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 27387ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 27397ba1a0bfSKris Buschelman } 27407ba1a0bfSKris Buschelman } 27417ba1a0bfSKris Buschelman 27427ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 27437ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27447ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27457ba1a0bfSKris Buschelman ierr = PetscFree(apa);CHKERRQ(ierr); 274695ddefa2SHong Zhang PetscFunctionReturn(0); 274795ddefa2SHong Zhang } 27487ba1a0bfSKris Buschelman 274995ddefa2SHong Zhang #undef __FUNCT__ 275095ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 275195ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 275295ddefa2SHong Zhang { 275395ddefa2SHong Zhang PetscErrorCode ierr; 275495ddefa2SHong Zhang 275595ddefa2SHong Zhang PetscFunctionBegin; 275695ddefa2SHong Zhang /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 275795ddefa2SHong Zhang ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 275895ddefa2SHong Zhang ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 275995ddefa2SHong Zhang PetscFunctionReturn(0); 276095ddefa2SHong Zhang } 276195ddefa2SHong Zhang 276295ddefa2SHong Zhang #undef __FUNCT__ 276395ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 276495ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 276595ddefa2SHong Zhang { 276695ddefa2SHong Zhang PetscFunctionBegin; 276795ddefa2SHong Zhang SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 27687ba1a0bfSKris Buschelman PetscFunctionReturn(0); 27697ba1a0bfSKris Buschelman } 27707ba1a0bfSKris Buschelman 2771be1d678aSKris Buschelman EXTERN_C_BEGIN 27727ba1a0bfSKris Buschelman #undef __FUNCT__ 27730fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2774f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 27759c4fc4c7SBarry Smith { 27769c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2777ceb03754SKris Buschelman Mat a = b->AIJ,B; 27789c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 27799c4fc4c7SBarry Smith PetscErrorCode ierr; 27800fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2781ba8c8a56SBarry Smith PetscInt *cols; 2782ba8c8a56SBarry Smith PetscScalar *vals; 27839c4fc4c7SBarry Smith 27849c4fc4c7SBarry Smith PetscFunctionBegin; 27859c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 27867c307921SBarry Smith ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 27879c4fc4c7SBarry Smith for (i=0; i<m; i++) { 27889c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 27890fd73130SBarry Smith for (j=0; j<dof; j++) { 27900fd73130SBarry Smith ilen[dof*i+j] = aij->ilen[i]; 27919c4fc4c7SBarry Smith } 27929c4fc4c7SBarry Smith } 2793ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 27949c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 27959c4fc4c7SBarry Smith ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 27969c4fc4c7SBarry Smith ii = 0; 27979c4fc4c7SBarry Smith for (i=0; i<m; i++) { 2798ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 27990fd73130SBarry Smith for (j=0; j<dof; j++) { 28009c4fc4c7SBarry Smith for (k=0; k<ncols; k++) { 28010fd73130SBarry Smith icols[k] = dof*cols[k]+j; 28029c4fc4c7SBarry Smith } 2803ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 28049c4fc4c7SBarry Smith ii++; 28059c4fc4c7SBarry Smith } 2806ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 28079c4fc4c7SBarry Smith } 28089c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 2809ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811ceb03754SKris Buschelman 2812ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 28138ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2814c3d102feSKris Buschelman } else { 2815ceb03754SKris Buschelman *newmat = B; 2816c3d102feSKris Buschelman } 28179c4fc4c7SBarry Smith PetscFunctionReturn(0); 28189c4fc4c7SBarry Smith } 2819be1d678aSKris Buschelman EXTERN_C_END 28209c4fc4c7SBarry Smith 28217c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h" 2822be1d678aSKris Buschelman 2823be1d678aSKris Buschelman EXTERN_C_BEGIN 28240fd73130SBarry Smith #undef __FUNCT__ 28250fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2826f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 28270fd73130SBarry Smith { 28280fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2829ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 28300fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 28310fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 28320fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 28330fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2834910ba992SMatthew Knepley PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2835910ba992SMatthew Knepley PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 28360fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 28370fd73130SBarry Smith PetscErrorCode ierr; 28380fd73130SBarry Smith PetscScalar *vals,*ovals; 28390fd73130SBarry Smith 28400fd73130SBarry Smith PetscFunctionBegin; 2841d0f46423SBarry Smith ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 2842d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 28430fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 28440fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 28450fd73130SBarry Smith for (j=0; j<dof; j++) { 28460fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 28470fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 28480fd73130SBarry Smith } 28490fd73130SBarry Smith } 2850d0f46423SBarry Smith ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 28510fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 28520fd73130SBarry Smith 28537a1a7badSBarry Smith ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2854d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 2855d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 28560fd73130SBarry Smith garray = mpiaij->garray; 28570fd73130SBarry Smith 28580fd73130SBarry Smith ii = rstart; 2859d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 28600fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 28610fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 28620fd73130SBarry Smith for (j=0; j<dof; j++) { 28630fd73130SBarry Smith for (k=0; k<ncols; k++) { 28640fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 28650fd73130SBarry Smith } 28660fd73130SBarry Smith for (k=0; k<oncols; k++) { 28670fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 28680fd73130SBarry Smith } 2869ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2870ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 28710fd73130SBarry Smith ii++; 28720fd73130SBarry Smith } 28730fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 28740fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 28750fd73130SBarry Smith } 28760fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 28770fd73130SBarry Smith 2878ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880ceb03754SKris Buschelman 2881ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 28827adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 28837adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 28848ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 28857adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 2886c3d102feSKris Buschelman } else { 2887ceb03754SKris Buschelman *newmat = B; 2888c3d102feSKris Buschelman } 28890fd73130SBarry Smith PetscFunctionReturn(0); 28900fd73130SBarry Smith } 2891be1d678aSKris Buschelman EXTERN_C_END 28920fd73130SBarry Smith 28930fd73130SBarry Smith 2894bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 2895*ff585edeSJed Brown #undef __FUNCT__ 2896*ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ" 2897*ff585edeSJed Brown /*@C 28980bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 28990bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 29000bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 29010bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 29020bad9183SKris Buschelman 2903*ff585edeSJed Brown Collective 2904*ff585edeSJed Brown 2905*ff585edeSJed Brown Input Parameters: 2906*ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 2907*ff585edeSJed Brown - dof - the block size (number of components per node) 2908*ff585edeSJed Brown 2909*ff585edeSJed Brown Output Parameter: 2910*ff585edeSJed Brown . maij - the new MAIJ matrix 2911*ff585edeSJed Brown 29120bad9183SKris Buschelman Operations provided: 29130fd73130SBarry Smith + MatMult 29140bad9183SKris Buschelman . MatMultTranspose 29150bad9183SKris Buschelman . MatMultAdd 29160bad9183SKris Buschelman . MatMultTransposeAdd 29170fd73130SBarry Smith - MatView 29180bad9183SKris Buschelman 29190bad9183SKris Buschelman Level: advanced 29200bad9183SKris Buschelman 2921*ff585edeSJed Brown .seealso: MatMAIJGetAIJ(), MatMAIJRedimension() 2922*ff585edeSJed Brown @*/ 2923be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 292482b1193eSBarry Smith { 2925dfbe8321SBarry Smith PetscErrorCode ierr; 2926b24ad042SBarry Smith PetscMPIInt size; 2927b24ad042SBarry Smith PetscInt n; 29284cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 292982b1193eSBarry Smith Mat B; 293082b1193eSBarry Smith 293182b1193eSBarry Smith PetscFunctionBegin; 2932d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2933d72c5c08SBarry Smith 2934d72c5c08SBarry Smith if (dof == 1) { 2935d72c5c08SBarry Smith *maij = A; 2936d72c5c08SBarry Smith } else { 29377adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2938d0f46423SBarry Smith ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 2939cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 2940d72c5c08SBarry Smith 29417adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2942f4a53059SBarry Smith if (size == 1) { 2943b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 29444cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 29450fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 2946b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2947b9b97703SBarry Smith b->dof = dof; 29484cb9d9b8SBarry Smith b->AIJ = A; 2949d72c5c08SBarry Smith if (dof == 2) { 2950cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 2951cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2952cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2953cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2954bcc973b7SBarry Smith } else if (dof == 3) { 2955bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 2956bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2957bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2958bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2959bcc973b7SBarry Smith } else if (dof == 4) { 2960bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 2961bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2962bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2963bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2964f9fae5adSBarry Smith } else if (dof == 5) { 2965f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 2966f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2967f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2968f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 29696cd98798SBarry Smith } else if (dof == 6) { 29706cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 29716cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 29726cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 29736cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2974ed8eea03SBarry Smith } else if (dof == 7) { 2975ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 2976ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2977ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2978ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 297966ed3db0SBarry Smith } else if (dof == 8) { 298066ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 298166ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 298266ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 298366ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 29842b692628SMatthew Knepley } else if (dof == 9) { 29852b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 29862b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 29872b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 29882b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2989b9cda22cSBarry Smith } else if (dof == 10) { 2990b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 2991b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2992b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2993b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 29942f7816d4SBarry Smith } else if (dof == 16) { 29952f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 29962f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 29972f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 29982f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2999ed1c418cSBarry Smith } else if (dof == 18) { 3000ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3001ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3002ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3003ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 300482b1193eSBarry Smith } else { 300577431f27SBarry Smith SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 300682b1193eSBarry Smith } 30077ba1a0bfSKris Buschelman B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 30087ba1a0bfSKris Buschelman B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 30099c4fc4c7SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3010f4a53059SBarry Smith } else { 3011f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3012f4a53059SBarry Smith IS from,to; 3013f4a53059SBarry Smith Vec gvec; 3014b24ad042SBarry Smith PetscInt *garray,i; 3015f4a53059SBarry Smith 3016b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3017d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 30180fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 3019b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3020b9b97703SBarry Smith b->dof = dof; 3021b9b97703SBarry Smith b->A = A; 30224cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 30234cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 30244cb9d9b8SBarry Smith 3025f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3026f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 302713288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3028f4a53059SBarry Smith 3029f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 3030b24ad042SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 3031f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 30327adad957SLisandro Dalcin ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 3033f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 3034f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3035f4a53059SBarry Smith 3036f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 3037d0f46423SBarry Smith ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 303813288a74SBarry Smith ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3039f4a53059SBarry Smith 3040f4a53059SBarry Smith /* generate the scatter context */ 3041f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3042f4a53059SBarry Smith 3043f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 3044f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 3045f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 3046f4a53059SBarry Smith 3047f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 30484cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 30494cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 30504cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 305195ddefa2SHong Zhang B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 305295ddefa2SHong Zhang B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 30530fd73130SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3054f4a53059SBarry Smith } 3055cd3bbe55SBarry Smith *maij = B; 30560fd73130SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 3057d72c5c08SBarry Smith } 305882b1193eSBarry Smith PetscFunctionReturn(0); 305982b1193eSBarry Smith } 3060