1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 382b1193eSBarry Smith /* 4cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 5cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 6cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 7cd3bbe55SBarry Smith independently. 8cd3bbe55SBarry Smith 9cd3bbe55SBarry Smith We provide: 10cd3bbe55SBarry Smith MatMult() 11cd3bbe55SBarry Smith MatMultTranspose() 12cd3bbe55SBarry Smith MatMultTransposeAdd() 13cd3bbe55SBarry Smith MatMultAdd() 14cd3bbe55SBarry Smith and 15cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 16f4a53059SBarry Smith 17f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1882b1193eSBarry Smith */ 1982b1193eSBarry Smith 20ff585edeSJed Brown #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/ 217c4f633dSBarry Smith #include "../src/mat/utils/freespace.h" 221d8d5f9aSSatish Balay #include "private/vecimpl.h" 2382b1193eSBarry Smith 244a2ae208SSatish Balay #undef __FUNCT__ 254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 26ff585edeSJed Brown /*@C 27ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 28ff585edeSJed Brown 29ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 30ff585edeSJed Brown 31ff585edeSJed Brown Input Parameter: 32ff585edeSJed Brown . A - the MAIJ matrix 33ff585edeSJed Brown 34ff585edeSJed Brown Output Parameter: 35ff585edeSJed Brown . B - the AIJ matrix 36ff585edeSJed Brown 37ff585edeSJed Brown Level: advanced 38ff585edeSJed Brown 39ff585edeSJed Brown Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 40ff585edeSJed Brown 41ff585edeSJed Brown .seealso: MatCreateMAIJ() 42ff585edeSJed Brown @*/ 43be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 44b9b97703SBarry Smith { 45dfbe8321SBarry Smith PetscErrorCode ierr; 46b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 47b9b97703SBarry Smith 48b9b97703SBarry Smith PetscFunctionBegin; 49b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 50b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 51b9b97703SBarry Smith if (ismpimaij) { 52b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 53b9b97703SBarry Smith 54b9b97703SBarry Smith *B = b->A; 55b9b97703SBarry Smith } else if (isseqmaij) { 56b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 57b9b97703SBarry Smith 58b9b97703SBarry Smith *B = b->AIJ; 59b9b97703SBarry Smith } else { 60b9b97703SBarry Smith *B = A; 61b9b97703SBarry Smith } 62b9b97703SBarry Smith PetscFunctionReturn(0); 63b9b97703SBarry Smith } 64b9b97703SBarry Smith 654a2ae208SSatish Balay #undef __FUNCT__ 664a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 67ff585edeSJed Brown /*@C 68ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 69ff585edeSJed Brown 703f9fe445SBarry Smith Logically Collective 71ff585edeSJed Brown 72ff585edeSJed Brown Input Parameter: 73ff585edeSJed Brown + A - the MAIJ matrix 74ff585edeSJed Brown - dof - the block size for the new matrix 75ff585edeSJed Brown 76ff585edeSJed Brown Output Parameter: 77ff585edeSJed Brown . B - the new MAIJ matrix 78ff585edeSJed Brown 79ff585edeSJed Brown Level: advanced 80ff585edeSJed Brown 81ff585edeSJed Brown .seealso: MatCreateMAIJ() 82ff585edeSJed Brown @*/ 83be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 84b9b97703SBarry Smith { 85dfbe8321SBarry Smith PetscErrorCode ierr; 863b98c0a2SBarry Smith Mat Aij = PETSC_NULL; 87b9b97703SBarry Smith 88b9b97703SBarry Smith PetscFunctionBegin; 89*c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A,dof,2); 90b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 91b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 92b9b97703SBarry Smith PetscFunctionReturn(0); 93b9b97703SBarry Smith } 94b9b97703SBarry Smith 954a2ae208SSatish Balay #undef __FUNCT__ 964a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 97dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 9882b1193eSBarry Smith { 99dfbe8321SBarry Smith PetscErrorCode ierr; 1004cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 10182b1193eSBarry Smith 10282b1193eSBarry Smith PetscFunctionBegin; 103cd3bbe55SBarry Smith if (b->AIJ) { 104cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 10582b1193eSBarry Smith } 1064cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 1074cb9d9b8SBarry Smith PetscFunctionReturn(0); 1084cb9d9b8SBarry Smith } 1094cb9d9b8SBarry Smith 1104a2ae208SSatish Balay #undef __FUNCT__ 1110fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 1120fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1130fd73130SBarry Smith { 1140fd73130SBarry Smith PetscErrorCode ierr; 1150fd73130SBarry Smith Mat B; 1160fd73130SBarry Smith 1170fd73130SBarry Smith PetscFunctionBegin; 118ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 1190fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1200fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1210fd73130SBarry Smith PetscFunctionReturn(0); 1220fd73130SBarry Smith } 1230fd73130SBarry Smith 1240fd73130SBarry Smith #undef __FUNCT__ 1250fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 1260fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1270fd73130SBarry Smith { 1280fd73130SBarry Smith PetscErrorCode ierr; 1290fd73130SBarry Smith Mat B; 1300fd73130SBarry Smith 1310fd73130SBarry Smith PetscFunctionBegin; 132ceb03754SKris Buschelman ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 1330fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1340fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1350fd73130SBarry Smith PetscFunctionReturn(0); 1360fd73130SBarry Smith } 1370fd73130SBarry Smith 1380fd73130SBarry Smith #undef __FUNCT__ 1394a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 140dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1414cb9d9b8SBarry Smith { 142dfbe8321SBarry Smith PetscErrorCode ierr; 1434cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1444cb9d9b8SBarry Smith 1454cb9d9b8SBarry Smith PetscFunctionBegin; 1464cb9d9b8SBarry Smith if (b->AIJ) { 1474cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 1484cb9d9b8SBarry Smith } 149f4a53059SBarry Smith if (b->OAIJ) { 150f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 151f4a53059SBarry Smith } 152b9b97703SBarry Smith if (b->A) { 153b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 154b9b97703SBarry Smith } 155f4a53059SBarry Smith if (b->ctx) { 156f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 157f4a53059SBarry Smith } 158f4a53059SBarry Smith if (b->w) { 159f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 160f4a53059SBarry Smith } 161cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 162dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 163b9b97703SBarry Smith PetscFunctionReturn(0); 164b9b97703SBarry Smith } 165b9b97703SBarry Smith 1660bad9183SKris Buschelman /*MC 167fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1680bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1690bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1700bad9183SKris Buschelman 1710bad9183SKris Buschelman Operations provided: 1720bad9183SKris Buschelman . MatMult 1730bad9183SKris Buschelman . MatMultTranspose 1740bad9183SKris Buschelman . MatMultAdd 1750bad9183SKris Buschelman . MatMultTransposeAdd 1760bad9183SKris Buschelman 1770bad9183SKris Buschelman Level: advanced 1780bad9183SKris Buschelman 1790bad9183SKris Buschelman .seealso: MatCreateSeqDense 1800bad9183SKris Buschelman M*/ 1810bad9183SKris Buschelman 18282b1193eSBarry Smith EXTERN_C_BEGIN 1834a2ae208SSatish Balay #undef __FUNCT__ 1844a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 185be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 18682b1193eSBarry Smith { 187dfbe8321SBarry Smith PetscErrorCode ierr; 1884cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 18964b52464SHong Zhang PetscMPIInt size; 19082b1193eSBarry Smith 19182b1193eSBarry Smith PetscFunctionBegin; 19238f2d2fdSLisandro Dalcin ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 193b0a32e0cSBarry Smith A->data = (void*)b; 194cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 195cd3bbe55SBarry Smith A->mapping = 0; 196f4a53059SBarry Smith 197cd3bbe55SBarry Smith b->AIJ = 0; 198cd3bbe55SBarry Smith b->dof = 0; 199f4a53059SBarry Smith b->OAIJ = 0; 200f4a53059SBarry Smith b->ctx = 0; 201f4a53059SBarry Smith b->w = 0; 2027adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 20364b52464SHong Zhang if (size == 1){ 20464b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 20564b52464SHong Zhang } else { 20664b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 20764b52464SHong Zhang } 20882b1193eSBarry Smith PetscFunctionReturn(0); 20982b1193eSBarry Smith } 21082b1193eSBarry Smith EXTERN_C_END 21182b1193eSBarry Smith 212cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 2134a2ae208SSatish Balay #undef __FUNCT__ 214b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 215dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 21682b1193eSBarry Smith { 2174cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 218bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 219d2840e09SBarry Smith const PetscScalar *x,*v; 220d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 221dfbe8321SBarry Smith PetscErrorCode ierr; 222d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 223d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 22482b1193eSBarry Smith 225bcc973b7SBarry Smith PetscFunctionBegin; 2263649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2271ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 228bcc973b7SBarry Smith idx = a->j; 229bcc973b7SBarry Smith v = a->a; 230bcc973b7SBarry Smith ii = a->i; 231bcc973b7SBarry Smith 232bcc973b7SBarry Smith for (i=0; i<m; i++) { 233bcc973b7SBarry Smith jrow = ii[i]; 234bcc973b7SBarry Smith n = ii[i+1] - jrow; 235bcc973b7SBarry Smith sum1 = 0.0; 236bcc973b7SBarry Smith sum2 = 0.0; 237b3c51c66SMatthew Knepley nonzerorow += (n>0); 238bcc973b7SBarry Smith for (j=0; j<n; j++) { 239bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 240bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 241bcc973b7SBarry Smith jrow++; 242bcc973b7SBarry Smith } 243bcc973b7SBarry Smith y[2*i] = sum1; 244bcc973b7SBarry Smith y[2*i+1] = sum2; 245bcc973b7SBarry Smith } 246bcc973b7SBarry Smith 247dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 2483649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2491ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 25082b1193eSBarry Smith PetscFunctionReturn(0); 25182b1193eSBarry Smith } 252bcc973b7SBarry Smith 2534a2ae208SSatish Balay #undef __FUNCT__ 254b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 255dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 25682b1193eSBarry Smith { 2574cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 258bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 259d2840e09SBarry Smith const PetscScalar *x,*v; 260d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 261dfbe8321SBarry Smith PetscErrorCode ierr; 262d2840e09SBarry Smith PetscInt n,i; 263d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 26482b1193eSBarry Smith 265bcc973b7SBarry Smith PetscFunctionBegin; 266d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2673649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2681ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 269bfec09a0SHong Zhang 270bcc973b7SBarry Smith for (i=0; i<m; i++) { 271bfec09a0SHong Zhang idx = a->j + a->i[i] ; 272bfec09a0SHong Zhang v = a->a + a->i[i] ; 273bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 274bcc973b7SBarry Smith alpha1 = x[2*i]; 275bcc973b7SBarry Smith alpha2 = x[2*i+1]; 276bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 277bcc973b7SBarry Smith } 278dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 2793649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2801ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 28182b1193eSBarry Smith PetscFunctionReturn(0); 28282b1193eSBarry Smith } 283bcc973b7SBarry Smith 2844a2ae208SSatish Balay #undef __FUNCT__ 285b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 286dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 28782b1193eSBarry Smith { 2884cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 289bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 290d2840e09SBarry Smith const PetscScalar *x,*v; 291d2840e09SBarry Smith PetscScalar *y,sum1, sum2; 292dfbe8321SBarry Smith PetscErrorCode ierr; 293b24ad042SBarry Smith PetscInt n,i,jrow,j; 294d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 29582b1193eSBarry Smith 296bcc973b7SBarry Smith PetscFunctionBegin; 297f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2983649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2991ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 300bcc973b7SBarry Smith idx = a->j; 301bcc973b7SBarry Smith v = a->a; 302bcc973b7SBarry Smith ii = a->i; 303bcc973b7SBarry Smith 304bcc973b7SBarry Smith for (i=0; i<m; i++) { 305bcc973b7SBarry Smith jrow = ii[i]; 306bcc973b7SBarry Smith n = ii[i+1] - jrow; 307bcc973b7SBarry Smith sum1 = 0.0; 308bcc973b7SBarry Smith sum2 = 0.0; 309bcc973b7SBarry Smith for (j=0; j<n; j++) { 310bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 311bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 312bcc973b7SBarry Smith jrow++; 313bcc973b7SBarry Smith } 314bcc973b7SBarry Smith y[2*i] += sum1; 315bcc973b7SBarry Smith y[2*i+1] += sum2; 316bcc973b7SBarry Smith } 317bcc973b7SBarry Smith 318dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3201ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 321bcc973b7SBarry Smith PetscFunctionReturn(0); 32282b1193eSBarry Smith } 3234a2ae208SSatish Balay #undef __FUNCT__ 324b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 325dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 32682b1193eSBarry Smith { 3274cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 328bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 329d2840e09SBarry Smith const PetscScalar *x,*v; 330d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 331dfbe8321SBarry Smith PetscErrorCode ierr; 332d2840e09SBarry Smith PetscInt n,i; 333d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 33482b1193eSBarry Smith 335bcc973b7SBarry Smith PetscFunctionBegin; 336f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3373649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3381ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 339bfec09a0SHong Zhang 340bcc973b7SBarry Smith for (i=0; i<m; i++) { 341bfec09a0SHong Zhang idx = a->j + a->i[i] ; 342bfec09a0SHong Zhang v = a->a + a->i[i] ; 343bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 344bcc973b7SBarry Smith alpha1 = x[2*i]; 345bcc973b7SBarry Smith alpha2 = x[2*i+1]; 346bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 347bcc973b7SBarry Smith } 348dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3493649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3501ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 351bcc973b7SBarry Smith PetscFunctionReturn(0); 35282b1193eSBarry Smith } 353cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3544a2ae208SSatish Balay #undef __FUNCT__ 355b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 356dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 357bcc973b7SBarry Smith { 3584cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 359bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 360d2840e09SBarry Smith const PetscScalar *x,*v; 361d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 362dfbe8321SBarry Smith PetscErrorCode ierr; 363d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 364d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 36582b1193eSBarry Smith 366bcc973b7SBarry Smith PetscFunctionBegin; 3673649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3681ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 369bcc973b7SBarry Smith idx = a->j; 370bcc973b7SBarry Smith v = a->a; 371bcc973b7SBarry Smith ii = a->i; 372bcc973b7SBarry Smith 373bcc973b7SBarry Smith for (i=0; i<m; i++) { 374bcc973b7SBarry Smith jrow = ii[i]; 375bcc973b7SBarry Smith n = ii[i+1] - jrow; 376bcc973b7SBarry Smith sum1 = 0.0; 377bcc973b7SBarry Smith sum2 = 0.0; 378bcc973b7SBarry Smith sum3 = 0.0; 379b3c51c66SMatthew Knepley nonzerorow += (n>0); 380bcc973b7SBarry Smith for (j=0; j<n; j++) { 381bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 382bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 383bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 384bcc973b7SBarry Smith jrow++; 385bcc973b7SBarry Smith } 386bcc973b7SBarry Smith y[3*i] = sum1; 387bcc973b7SBarry Smith y[3*i+1] = sum2; 388bcc973b7SBarry Smith y[3*i+2] = sum3; 389bcc973b7SBarry Smith } 390bcc973b7SBarry Smith 391dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 3923649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3931ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 394bcc973b7SBarry Smith PetscFunctionReturn(0); 395bcc973b7SBarry Smith } 396bcc973b7SBarry Smith 3974a2ae208SSatish Balay #undef __FUNCT__ 398b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 399dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 400bcc973b7SBarry Smith { 4014cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 402bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 403d2840e09SBarry Smith const PetscScalar *x,*v; 404d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 405dfbe8321SBarry Smith PetscErrorCode ierr; 406d2840e09SBarry Smith PetscInt n,i; 407d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 408bcc973b7SBarry Smith 409bcc973b7SBarry Smith PetscFunctionBegin; 410d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 4113649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4121ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 413bfec09a0SHong Zhang 414bcc973b7SBarry Smith for (i=0; i<m; i++) { 415bfec09a0SHong Zhang idx = a->j + a->i[i]; 416bfec09a0SHong Zhang v = a->a + a->i[i]; 417bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 418bcc973b7SBarry Smith alpha1 = x[3*i]; 419bcc973b7SBarry Smith alpha2 = x[3*i+1]; 420bcc973b7SBarry Smith alpha3 = x[3*i+2]; 421bcc973b7SBarry Smith while (n-->0) { 422bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 423bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 424bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 425bcc973b7SBarry Smith idx++; v++; 426bcc973b7SBarry Smith } 427bcc973b7SBarry Smith } 428dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4293649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4301ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 431bcc973b7SBarry Smith PetscFunctionReturn(0); 432bcc973b7SBarry Smith } 433bcc973b7SBarry Smith 4344a2ae208SSatish Balay #undef __FUNCT__ 435b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 436dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 437bcc973b7SBarry Smith { 4384cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 439bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 440d2840e09SBarry Smith const PetscScalar *x,*v; 441d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 442dfbe8321SBarry Smith PetscErrorCode ierr; 443b24ad042SBarry Smith PetscInt n,i,jrow,j; 444d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 445bcc973b7SBarry Smith 446bcc973b7SBarry Smith PetscFunctionBegin; 447f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4483649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4491ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 450bcc973b7SBarry Smith idx = a->j; 451bcc973b7SBarry Smith v = a->a; 452bcc973b7SBarry Smith ii = a->i; 453bcc973b7SBarry Smith 454bcc973b7SBarry Smith for (i=0; i<m; i++) { 455bcc973b7SBarry Smith jrow = ii[i]; 456bcc973b7SBarry Smith n = ii[i+1] - jrow; 457bcc973b7SBarry Smith sum1 = 0.0; 458bcc973b7SBarry Smith sum2 = 0.0; 459bcc973b7SBarry Smith sum3 = 0.0; 460bcc973b7SBarry Smith for (j=0; j<n; j++) { 461bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 462bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 463bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 464bcc973b7SBarry Smith jrow++; 465bcc973b7SBarry Smith } 466bcc973b7SBarry Smith y[3*i] += sum1; 467bcc973b7SBarry Smith y[3*i+1] += sum2; 468bcc973b7SBarry Smith y[3*i+2] += sum3; 469bcc973b7SBarry Smith } 470bcc973b7SBarry Smith 471dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4723649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4731ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 474bcc973b7SBarry Smith PetscFunctionReturn(0); 475bcc973b7SBarry Smith } 4764a2ae208SSatish Balay #undef __FUNCT__ 477b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 478dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 479bcc973b7SBarry Smith { 4804cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 481bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 482d2840e09SBarry Smith const PetscScalar *x,*v; 483d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 484dfbe8321SBarry Smith PetscErrorCode ierr; 485d2840e09SBarry Smith PetscInt n,i; 486d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 487bcc973b7SBarry Smith 488bcc973b7SBarry Smith PetscFunctionBegin; 489f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4903649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4911ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 492bcc973b7SBarry Smith for (i=0; i<m; i++) { 493bfec09a0SHong Zhang idx = a->j + a->i[i] ; 494bfec09a0SHong Zhang v = a->a + a->i[i] ; 495bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 496bcc973b7SBarry Smith alpha1 = x[3*i]; 497bcc973b7SBarry Smith alpha2 = x[3*i+1]; 498bcc973b7SBarry Smith alpha3 = x[3*i+2]; 499bcc973b7SBarry Smith while (n-->0) { 500bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 501bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 502bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 503bcc973b7SBarry Smith idx++; v++; 504bcc973b7SBarry Smith } 505bcc973b7SBarry Smith } 506dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 5073649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5081ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 509bcc973b7SBarry Smith PetscFunctionReturn(0); 510bcc973b7SBarry Smith } 511bcc973b7SBarry Smith 512bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 5134a2ae208SSatish Balay #undef __FUNCT__ 514b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 515dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 516bcc973b7SBarry Smith { 5174cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 518bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 519d2840e09SBarry Smith const PetscScalar *x,*v; 520d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 521dfbe8321SBarry Smith PetscErrorCode ierr; 522d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 523d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 524bcc973b7SBarry Smith 525bcc973b7SBarry Smith PetscFunctionBegin; 5263649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5271ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 528bcc973b7SBarry Smith idx = a->j; 529bcc973b7SBarry Smith v = a->a; 530bcc973b7SBarry Smith ii = a->i; 531bcc973b7SBarry Smith 532bcc973b7SBarry Smith for (i=0; i<m; i++) { 533bcc973b7SBarry Smith jrow = ii[i]; 534bcc973b7SBarry Smith n = ii[i+1] - jrow; 535bcc973b7SBarry Smith sum1 = 0.0; 536bcc973b7SBarry Smith sum2 = 0.0; 537bcc973b7SBarry Smith sum3 = 0.0; 538bcc973b7SBarry Smith sum4 = 0.0; 539b3c51c66SMatthew Knepley nonzerorow += (n>0); 540bcc973b7SBarry Smith for (j=0; j<n; j++) { 541bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 542bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 543bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 544bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 545bcc973b7SBarry Smith jrow++; 546bcc973b7SBarry Smith } 547bcc973b7SBarry Smith y[4*i] = sum1; 548bcc973b7SBarry Smith y[4*i+1] = sum2; 549bcc973b7SBarry Smith y[4*i+2] = sum3; 550bcc973b7SBarry Smith y[4*i+3] = sum4; 551bcc973b7SBarry Smith } 552bcc973b7SBarry Smith 553dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 5543649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5551ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 556bcc973b7SBarry Smith PetscFunctionReturn(0); 557bcc973b7SBarry Smith } 558bcc973b7SBarry Smith 5594a2ae208SSatish Balay #undef __FUNCT__ 560b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 561dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 562bcc973b7SBarry Smith { 5634cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 564bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 565d2840e09SBarry Smith const PetscScalar *x,*v; 566d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 567dfbe8321SBarry Smith PetscErrorCode ierr; 568d2840e09SBarry Smith PetscInt n,i; 569d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 570bcc973b7SBarry Smith 571bcc973b7SBarry Smith PetscFunctionBegin; 572d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 5733649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5741ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 575bcc973b7SBarry Smith for (i=0; i<m; i++) { 576bfec09a0SHong Zhang idx = a->j + a->i[i] ; 577bfec09a0SHong Zhang v = a->a + a->i[i] ; 578bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 579bcc973b7SBarry Smith alpha1 = x[4*i]; 580bcc973b7SBarry Smith alpha2 = x[4*i+1]; 581bcc973b7SBarry Smith alpha3 = x[4*i+2]; 582bcc973b7SBarry Smith alpha4 = x[4*i+3]; 583bcc973b7SBarry Smith while (n-->0) { 584bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 585bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 586bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 587bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 588bcc973b7SBarry Smith idx++; v++; 589bcc973b7SBarry Smith } 590bcc973b7SBarry Smith } 591dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 5923649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5931ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 594bcc973b7SBarry Smith PetscFunctionReturn(0); 595bcc973b7SBarry Smith } 596bcc973b7SBarry Smith 5974a2ae208SSatish Balay #undef __FUNCT__ 598b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 599dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 600bcc973b7SBarry Smith { 6014cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 602f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 603d2840e09SBarry Smith const PetscScalar *x,*v; 604d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 605dfbe8321SBarry Smith PetscErrorCode ierr; 606b24ad042SBarry Smith PetscInt n,i,jrow,j; 607d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 608f9fae5adSBarry Smith 609f9fae5adSBarry Smith PetscFunctionBegin; 610f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6113649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6121ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 613f9fae5adSBarry Smith idx = a->j; 614f9fae5adSBarry Smith v = a->a; 615f9fae5adSBarry Smith ii = a->i; 616f9fae5adSBarry Smith 617f9fae5adSBarry Smith for (i=0; i<m; i++) { 618f9fae5adSBarry Smith jrow = ii[i]; 619f9fae5adSBarry Smith n = ii[i+1] - jrow; 620f9fae5adSBarry Smith sum1 = 0.0; 621f9fae5adSBarry Smith sum2 = 0.0; 622f9fae5adSBarry Smith sum3 = 0.0; 623f9fae5adSBarry Smith sum4 = 0.0; 624f9fae5adSBarry Smith for (j=0; j<n; j++) { 625f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 626f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 627f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 628f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 629f9fae5adSBarry Smith jrow++; 630f9fae5adSBarry Smith } 631f9fae5adSBarry Smith y[4*i] += sum1; 632f9fae5adSBarry Smith y[4*i+1] += sum2; 633f9fae5adSBarry Smith y[4*i+2] += sum3; 634f9fae5adSBarry Smith y[4*i+3] += sum4; 635f9fae5adSBarry Smith } 636f9fae5adSBarry Smith 637dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6383649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6391ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 640f9fae5adSBarry Smith PetscFunctionReturn(0); 641bcc973b7SBarry Smith } 6424a2ae208SSatish Balay #undef __FUNCT__ 643b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 644dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 645bcc973b7SBarry Smith { 6464cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 647f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 648d2840e09SBarry Smith const PetscScalar *x,*v; 649d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 650dfbe8321SBarry Smith PetscErrorCode ierr; 651d2840e09SBarry Smith PetscInt n,i; 652d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 653f9fae5adSBarry Smith 654f9fae5adSBarry Smith PetscFunctionBegin; 655f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6563649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6571ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 658bfec09a0SHong Zhang 659f9fae5adSBarry Smith for (i=0; i<m; i++) { 660bfec09a0SHong Zhang idx = a->j + a->i[i] ; 661bfec09a0SHong Zhang v = a->a + a->i[i] ; 662f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 663f9fae5adSBarry Smith alpha1 = x[4*i]; 664f9fae5adSBarry Smith alpha2 = x[4*i+1]; 665f9fae5adSBarry Smith alpha3 = x[4*i+2]; 666f9fae5adSBarry Smith alpha4 = x[4*i+3]; 667f9fae5adSBarry Smith while (n-->0) { 668f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 669f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 670f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 671f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 672f9fae5adSBarry Smith idx++; v++; 673f9fae5adSBarry Smith } 674f9fae5adSBarry Smith } 675dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6763649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6771ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 678f9fae5adSBarry Smith PetscFunctionReturn(0); 679f9fae5adSBarry Smith } 680f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6816cd98798SBarry Smith 6824a2ae208SSatish Balay #undef __FUNCT__ 683b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 684dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 685f9fae5adSBarry Smith { 6864cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 687f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 688d2840e09SBarry Smith const PetscScalar *x,*v; 689d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 690dfbe8321SBarry Smith PetscErrorCode ierr; 691d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 692d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 693f9fae5adSBarry Smith 694f9fae5adSBarry Smith PetscFunctionBegin; 6953649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6961ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 697f9fae5adSBarry Smith idx = a->j; 698f9fae5adSBarry Smith v = a->a; 699f9fae5adSBarry Smith ii = a->i; 700f9fae5adSBarry Smith 701f9fae5adSBarry Smith for (i=0; i<m; i++) { 702f9fae5adSBarry Smith jrow = ii[i]; 703f9fae5adSBarry Smith n = ii[i+1] - jrow; 704f9fae5adSBarry Smith sum1 = 0.0; 705f9fae5adSBarry Smith sum2 = 0.0; 706f9fae5adSBarry Smith sum3 = 0.0; 707f9fae5adSBarry Smith sum4 = 0.0; 708f9fae5adSBarry Smith sum5 = 0.0; 709b3c51c66SMatthew Knepley nonzerorow += (n>0); 710f9fae5adSBarry Smith for (j=0; j<n; j++) { 711f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 712f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 713f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 714f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 715f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 716f9fae5adSBarry Smith jrow++; 717f9fae5adSBarry Smith } 718f9fae5adSBarry Smith y[5*i] = sum1; 719f9fae5adSBarry Smith y[5*i+1] = sum2; 720f9fae5adSBarry Smith y[5*i+2] = sum3; 721f9fae5adSBarry Smith y[5*i+3] = sum4; 722f9fae5adSBarry Smith y[5*i+4] = sum5; 723f9fae5adSBarry Smith } 724f9fae5adSBarry Smith 725dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 7263649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7271ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 728f9fae5adSBarry Smith PetscFunctionReturn(0); 729f9fae5adSBarry Smith } 730f9fae5adSBarry Smith 7314a2ae208SSatish Balay #undef __FUNCT__ 732b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 733dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 734f9fae5adSBarry Smith { 7354cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 736f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 737d2840e09SBarry Smith const PetscScalar *x,*v; 738d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 739dfbe8321SBarry Smith PetscErrorCode ierr; 740d2840e09SBarry Smith PetscInt n,i; 741d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 742f9fae5adSBarry Smith 743f9fae5adSBarry Smith PetscFunctionBegin; 744d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 7453649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7461ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 747bfec09a0SHong Zhang 748f9fae5adSBarry Smith for (i=0; i<m; i++) { 749bfec09a0SHong Zhang idx = a->j + a->i[i] ; 750bfec09a0SHong Zhang v = a->a + a->i[i] ; 751f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 752f9fae5adSBarry Smith alpha1 = x[5*i]; 753f9fae5adSBarry Smith alpha2 = x[5*i+1]; 754f9fae5adSBarry Smith alpha3 = x[5*i+2]; 755f9fae5adSBarry Smith alpha4 = x[5*i+3]; 756f9fae5adSBarry Smith alpha5 = x[5*i+4]; 757f9fae5adSBarry Smith while (n-->0) { 758f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 759f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 760f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 761f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 762f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 763f9fae5adSBarry Smith idx++; v++; 764f9fae5adSBarry Smith } 765f9fae5adSBarry Smith } 766dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7673649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7681ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 769f9fae5adSBarry Smith PetscFunctionReturn(0); 770f9fae5adSBarry Smith } 771f9fae5adSBarry Smith 7724a2ae208SSatish Balay #undef __FUNCT__ 773b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 774dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 775f9fae5adSBarry Smith { 7764cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 777f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 778d2840e09SBarry Smith const PetscScalar *x,*v; 779d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 780dfbe8321SBarry Smith PetscErrorCode ierr; 781b24ad042SBarry Smith PetscInt n,i,jrow,j; 782d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 783f9fae5adSBarry Smith 784f9fae5adSBarry Smith PetscFunctionBegin; 785f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7871ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 788f9fae5adSBarry Smith idx = a->j; 789f9fae5adSBarry Smith v = a->a; 790f9fae5adSBarry Smith ii = a->i; 791f9fae5adSBarry Smith 792f9fae5adSBarry Smith for (i=0; i<m; i++) { 793f9fae5adSBarry Smith jrow = ii[i]; 794f9fae5adSBarry Smith n = ii[i+1] - jrow; 795f9fae5adSBarry Smith sum1 = 0.0; 796f9fae5adSBarry Smith sum2 = 0.0; 797f9fae5adSBarry Smith sum3 = 0.0; 798f9fae5adSBarry Smith sum4 = 0.0; 799f9fae5adSBarry Smith sum5 = 0.0; 800f9fae5adSBarry Smith for (j=0; j<n; j++) { 801f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 802f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 803f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 804f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 805f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 806f9fae5adSBarry Smith jrow++; 807f9fae5adSBarry Smith } 808f9fae5adSBarry Smith y[5*i] += sum1; 809f9fae5adSBarry Smith y[5*i+1] += sum2; 810f9fae5adSBarry Smith y[5*i+2] += sum3; 811f9fae5adSBarry Smith y[5*i+3] += sum4; 812f9fae5adSBarry Smith y[5*i+4] += sum5; 813f9fae5adSBarry Smith } 814f9fae5adSBarry Smith 815dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8163649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8171ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 818f9fae5adSBarry Smith PetscFunctionReturn(0); 819f9fae5adSBarry Smith } 820f9fae5adSBarry Smith 8214a2ae208SSatish Balay #undef __FUNCT__ 822b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 823dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 824f9fae5adSBarry Smith { 8254cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 826f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 827d2840e09SBarry Smith const PetscScalar *x,*v; 828d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 829dfbe8321SBarry Smith PetscErrorCode ierr; 830d2840e09SBarry Smith PetscInt n,i; 831d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 832f9fae5adSBarry Smith 833f9fae5adSBarry Smith PetscFunctionBegin; 834f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8353649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8361ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 837bfec09a0SHong Zhang 838f9fae5adSBarry Smith for (i=0; i<m; i++) { 839bfec09a0SHong Zhang idx = a->j + a->i[i] ; 840bfec09a0SHong Zhang v = a->a + a->i[i] ; 841f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 842f9fae5adSBarry Smith alpha1 = x[5*i]; 843f9fae5adSBarry Smith alpha2 = x[5*i+1]; 844f9fae5adSBarry Smith alpha3 = x[5*i+2]; 845f9fae5adSBarry Smith alpha4 = x[5*i+3]; 846f9fae5adSBarry Smith alpha5 = x[5*i+4]; 847f9fae5adSBarry Smith while (n-->0) { 848f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 849f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 850f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 851f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 852f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 853f9fae5adSBarry Smith idx++; v++; 854f9fae5adSBarry Smith } 855f9fae5adSBarry Smith } 856dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8573649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8581ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 859f9fae5adSBarry Smith PetscFunctionReturn(0); 860bcc973b7SBarry Smith } 861bcc973b7SBarry Smith 8626cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 8634a2ae208SSatish Balay #undef __FUNCT__ 864b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 865dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8666cd98798SBarry Smith { 8676cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8686cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 869d2840e09SBarry Smith const PetscScalar *x,*v; 870d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 871dfbe8321SBarry Smith PetscErrorCode ierr; 872d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 873d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 8746cd98798SBarry Smith 8756cd98798SBarry Smith PetscFunctionBegin; 8763649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8771ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8786cd98798SBarry Smith idx = a->j; 8796cd98798SBarry Smith v = a->a; 8806cd98798SBarry Smith ii = a->i; 8816cd98798SBarry Smith 8826cd98798SBarry Smith for (i=0; i<m; i++) { 8836cd98798SBarry Smith jrow = ii[i]; 8846cd98798SBarry Smith n = ii[i+1] - jrow; 8856cd98798SBarry Smith sum1 = 0.0; 8866cd98798SBarry Smith sum2 = 0.0; 8876cd98798SBarry Smith sum3 = 0.0; 8886cd98798SBarry Smith sum4 = 0.0; 8896cd98798SBarry Smith sum5 = 0.0; 8906cd98798SBarry Smith sum6 = 0.0; 891b3c51c66SMatthew Knepley nonzerorow += (n>0); 8926cd98798SBarry Smith for (j=0; j<n; j++) { 8936cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8946cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8956cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8966cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8976cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8986cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8996cd98798SBarry Smith jrow++; 9006cd98798SBarry Smith } 9016cd98798SBarry Smith y[6*i] = sum1; 9026cd98798SBarry Smith y[6*i+1] = sum2; 9036cd98798SBarry Smith y[6*i+2] = sum3; 9046cd98798SBarry Smith y[6*i+3] = sum4; 9056cd98798SBarry Smith y[6*i+4] = sum5; 9066cd98798SBarry Smith y[6*i+5] = sum6; 9076cd98798SBarry Smith } 9086cd98798SBarry Smith 909dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 9103649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9111ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9126cd98798SBarry Smith PetscFunctionReturn(0); 9136cd98798SBarry Smith } 9146cd98798SBarry Smith 9154a2ae208SSatish Balay #undef __FUNCT__ 916b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 917dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 9186cd98798SBarry Smith { 9196cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9206cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 921d2840e09SBarry Smith const PetscScalar *x,*v; 922d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 923dfbe8321SBarry Smith PetscErrorCode ierr; 924d2840e09SBarry Smith PetscInt n,i; 925d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 9266cd98798SBarry Smith 9276cd98798SBarry Smith PetscFunctionBegin; 928d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 9293649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9301ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 931bfec09a0SHong Zhang 9326cd98798SBarry Smith for (i=0; i<m; i++) { 933bfec09a0SHong Zhang idx = a->j + a->i[i] ; 934bfec09a0SHong Zhang v = a->a + a->i[i] ; 9356cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9366cd98798SBarry Smith alpha1 = x[6*i]; 9376cd98798SBarry Smith alpha2 = x[6*i+1]; 9386cd98798SBarry Smith alpha3 = x[6*i+2]; 9396cd98798SBarry Smith alpha4 = x[6*i+3]; 9406cd98798SBarry Smith alpha5 = x[6*i+4]; 9416cd98798SBarry Smith alpha6 = x[6*i+5]; 9426cd98798SBarry Smith while (n-->0) { 9436cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9446cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9456cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9466cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9476cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9486cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9496cd98798SBarry Smith idx++; v++; 9506cd98798SBarry Smith } 9516cd98798SBarry Smith } 952dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9533649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9541ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9556cd98798SBarry Smith PetscFunctionReturn(0); 9566cd98798SBarry Smith } 9576cd98798SBarry Smith 9584a2ae208SSatish Balay #undef __FUNCT__ 959b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 960dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9616cd98798SBarry Smith { 9626cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9636cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 964d2840e09SBarry Smith const PetscScalar *x,*v; 965d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 966dfbe8321SBarry Smith PetscErrorCode ierr; 967b24ad042SBarry Smith PetscInt n,i,jrow,j; 968d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 9696cd98798SBarry Smith 9706cd98798SBarry Smith PetscFunctionBegin; 9716cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9723649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9731ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9746cd98798SBarry Smith idx = a->j; 9756cd98798SBarry Smith v = a->a; 9766cd98798SBarry Smith ii = a->i; 9776cd98798SBarry Smith 9786cd98798SBarry Smith for (i=0; i<m; i++) { 9796cd98798SBarry Smith jrow = ii[i]; 9806cd98798SBarry Smith n = ii[i+1] - jrow; 9816cd98798SBarry Smith sum1 = 0.0; 9826cd98798SBarry Smith sum2 = 0.0; 9836cd98798SBarry Smith sum3 = 0.0; 9846cd98798SBarry Smith sum4 = 0.0; 9856cd98798SBarry Smith sum5 = 0.0; 9866cd98798SBarry Smith sum6 = 0.0; 9876cd98798SBarry Smith for (j=0; j<n; j++) { 9886cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9896cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9906cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9916cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9926cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9936cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9946cd98798SBarry Smith jrow++; 9956cd98798SBarry Smith } 9966cd98798SBarry Smith y[6*i] += sum1; 9976cd98798SBarry Smith y[6*i+1] += sum2; 9986cd98798SBarry Smith y[6*i+2] += sum3; 9996cd98798SBarry Smith y[6*i+3] += sum4; 10006cd98798SBarry Smith y[6*i+4] += sum5; 10016cd98798SBarry Smith y[6*i+5] += sum6; 10026cd98798SBarry Smith } 10036cd98798SBarry Smith 1004dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 10053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 10061ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10076cd98798SBarry Smith PetscFunctionReturn(0); 10086cd98798SBarry Smith } 10096cd98798SBarry Smith 10104a2ae208SSatish Balay #undef __FUNCT__ 1011b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1012dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 10136cd98798SBarry Smith { 10146cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 10156cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1016d2840e09SBarry Smith const PetscScalar *x,*v; 1017d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1018dfbe8321SBarry Smith PetscErrorCode ierr; 1019d2840e09SBarry Smith PetscInt n,i; 1020d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 10216cd98798SBarry Smith 10226cd98798SBarry Smith PetscFunctionBegin; 10236cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 10243649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 10251ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1026bfec09a0SHong Zhang 10276cd98798SBarry Smith for (i=0; i<m; i++) { 1028bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1029bfec09a0SHong Zhang v = a->a + a->i[i] ; 10306cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 10316cd98798SBarry Smith alpha1 = x[6*i]; 10326cd98798SBarry Smith alpha2 = x[6*i+1]; 10336cd98798SBarry Smith alpha3 = x[6*i+2]; 10346cd98798SBarry Smith alpha4 = x[6*i+3]; 10356cd98798SBarry Smith alpha5 = x[6*i+4]; 10366cd98798SBarry Smith alpha6 = x[6*i+5]; 10376cd98798SBarry Smith while (n-->0) { 10386cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 10396cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 10406cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 10416cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 10426cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 10436cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 10446cd98798SBarry Smith idx++; v++; 10456cd98798SBarry Smith } 10466cd98798SBarry Smith } 1047dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 10483649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 10491ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10506cd98798SBarry Smith PetscFunctionReturn(0); 10516cd98798SBarry Smith } 10526cd98798SBarry Smith 105366ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 105466ed3db0SBarry Smith #undef __FUNCT__ 1055ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 1056ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1057ed8eea03SBarry Smith { 1058ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1059ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1060d2840e09SBarry Smith const PetscScalar *x,*v; 1061d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1062ed8eea03SBarry Smith PetscErrorCode ierr; 1063d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1064d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1065ed8eea03SBarry Smith 1066ed8eea03SBarry Smith PetscFunctionBegin; 10673649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1068ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1069ed8eea03SBarry Smith idx = a->j; 1070ed8eea03SBarry Smith v = a->a; 1071ed8eea03SBarry Smith ii = a->i; 1072ed8eea03SBarry Smith 1073ed8eea03SBarry Smith for (i=0; i<m; i++) { 1074ed8eea03SBarry Smith jrow = ii[i]; 1075ed8eea03SBarry Smith n = ii[i+1] - jrow; 1076ed8eea03SBarry Smith sum1 = 0.0; 1077ed8eea03SBarry Smith sum2 = 0.0; 1078ed8eea03SBarry Smith sum3 = 0.0; 1079ed8eea03SBarry Smith sum4 = 0.0; 1080ed8eea03SBarry Smith sum5 = 0.0; 1081ed8eea03SBarry Smith sum6 = 0.0; 1082ed8eea03SBarry Smith sum7 = 0.0; 1083b3c51c66SMatthew Knepley nonzerorow += (n>0); 1084ed8eea03SBarry Smith for (j=0; j<n; j++) { 1085ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1086ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1087ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1088ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1089ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1090ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1091ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1092ed8eea03SBarry Smith jrow++; 1093ed8eea03SBarry Smith } 1094ed8eea03SBarry Smith y[7*i] = sum1; 1095ed8eea03SBarry Smith y[7*i+1] = sum2; 1096ed8eea03SBarry Smith y[7*i+2] = sum3; 1097ed8eea03SBarry Smith y[7*i+3] = sum4; 1098ed8eea03SBarry Smith y[7*i+4] = sum5; 1099ed8eea03SBarry Smith y[7*i+5] = sum6; 1100ed8eea03SBarry Smith y[7*i+6] = sum7; 1101ed8eea03SBarry Smith } 1102ed8eea03SBarry Smith 1103dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 11043649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1105ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1106ed8eea03SBarry Smith PetscFunctionReturn(0); 1107ed8eea03SBarry Smith } 1108ed8eea03SBarry Smith 1109ed8eea03SBarry Smith #undef __FUNCT__ 1110ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1111ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1112ed8eea03SBarry Smith { 1113ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1114ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1115d2840e09SBarry Smith const PetscScalar *x,*v; 1116d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1117ed8eea03SBarry Smith PetscErrorCode ierr; 1118d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1119d2840e09SBarry Smith PetscInt n,i; 1120ed8eea03SBarry Smith 1121ed8eea03SBarry Smith PetscFunctionBegin; 1122d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 11233649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1124ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1125ed8eea03SBarry Smith 1126ed8eea03SBarry Smith for (i=0; i<m; i++) { 1127ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1128ed8eea03SBarry Smith v = a->a + a->i[i] ; 1129ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1130ed8eea03SBarry Smith alpha1 = x[7*i]; 1131ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1132ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1133ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1134ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1135ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1136ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1137ed8eea03SBarry Smith while (n-->0) { 1138ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1139ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1140ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1141ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1142ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1143ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1144ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1145ed8eea03SBarry Smith idx++; v++; 1146ed8eea03SBarry Smith } 1147ed8eea03SBarry Smith } 1148dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11493649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1150ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1151ed8eea03SBarry Smith PetscFunctionReturn(0); 1152ed8eea03SBarry Smith } 1153ed8eea03SBarry Smith 1154ed8eea03SBarry Smith #undef __FUNCT__ 1155ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1156ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1157ed8eea03SBarry Smith { 1158ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1159ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1160d2840e09SBarry Smith const PetscScalar *x,*v; 1161d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1162ed8eea03SBarry Smith PetscErrorCode ierr; 1163d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1164ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1165ed8eea03SBarry Smith 1166ed8eea03SBarry Smith PetscFunctionBegin; 1167ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11683649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1169ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1170ed8eea03SBarry Smith idx = a->j; 1171ed8eea03SBarry Smith v = a->a; 1172ed8eea03SBarry Smith ii = a->i; 1173ed8eea03SBarry Smith 1174ed8eea03SBarry Smith for (i=0; i<m; i++) { 1175ed8eea03SBarry Smith jrow = ii[i]; 1176ed8eea03SBarry Smith n = ii[i+1] - jrow; 1177ed8eea03SBarry Smith sum1 = 0.0; 1178ed8eea03SBarry Smith sum2 = 0.0; 1179ed8eea03SBarry Smith sum3 = 0.0; 1180ed8eea03SBarry Smith sum4 = 0.0; 1181ed8eea03SBarry Smith sum5 = 0.0; 1182ed8eea03SBarry Smith sum6 = 0.0; 1183ed8eea03SBarry Smith sum7 = 0.0; 1184ed8eea03SBarry Smith for (j=0; j<n; j++) { 1185ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1186ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1187ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1188ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1189ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1190ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1191ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1192ed8eea03SBarry Smith jrow++; 1193ed8eea03SBarry Smith } 1194ed8eea03SBarry Smith y[7*i] += sum1; 1195ed8eea03SBarry Smith y[7*i+1] += sum2; 1196ed8eea03SBarry Smith y[7*i+2] += sum3; 1197ed8eea03SBarry Smith y[7*i+3] += sum4; 1198ed8eea03SBarry Smith y[7*i+4] += sum5; 1199ed8eea03SBarry Smith y[7*i+5] += sum6; 1200ed8eea03SBarry Smith y[7*i+6] += sum7; 1201ed8eea03SBarry Smith } 1202ed8eea03SBarry Smith 1203dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 12043649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1205ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1206ed8eea03SBarry Smith PetscFunctionReturn(0); 1207ed8eea03SBarry Smith } 1208ed8eea03SBarry Smith 1209ed8eea03SBarry Smith #undef __FUNCT__ 1210ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1211ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1212ed8eea03SBarry Smith { 1213ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1214ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1215d2840e09SBarry Smith const PetscScalar *x,*v; 1216d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1217ed8eea03SBarry Smith PetscErrorCode ierr; 1218d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1219d2840e09SBarry Smith PetscInt n,i; 1220ed8eea03SBarry Smith 1221ed8eea03SBarry Smith PetscFunctionBegin; 1222ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12233649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1224ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1225ed8eea03SBarry Smith for (i=0; i<m; i++) { 1226ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1227ed8eea03SBarry Smith v = a->a + a->i[i] ; 1228ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1229ed8eea03SBarry Smith alpha1 = x[7*i]; 1230ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1231ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1232ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1233ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1234ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1235ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1236ed8eea03SBarry Smith while (n-->0) { 1237ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1238ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1239ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1240ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1241ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1242ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1243ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1244ed8eea03SBarry Smith idx++; v++; 1245ed8eea03SBarry Smith } 1246ed8eea03SBarry Smith } 1247dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 12483649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1249ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1250ed8eea03SBarry Smith PetscFunctionReturn(0); 1251ed8eea03SBarry Smith } 1252ed8eea03SBarry Smith 1253ed8eea03SBarry Smith #undef __FUNCT__ 125466ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1255dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 125666ed3db0SBarry Smith { 125766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 125866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1259d2840e09SBarry Smith const PetscScalar *x,*v; 1260d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1261dfbe8321SBarry Smith PetscErrorCode ierr; 1262d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1263d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 126466ed3db0SBarry Smith 126566ed3db0SBarry Smith PetscFunctionBegin; 12663649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12671ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 126866ed3db0SBarry Smith idx = a->j; 126966ed3db0SBarry Smith v = a->a; 127066ed3db0SBarry Smith ii = a->i; 127166ed3db0SBarry Smith 127266ed3db0SBarry Smith for (i=0; i<m; i++) { 127366ed3db0SBarry Smith jrow = ii[i]; 127466ed3db0SBarry Smith n = ii[i+1] - jrow; 127566ed3db0SBarry Smith sum1 = 0.0; 127666ed3db0SBarry Smith sum2 = 0.0; 127766ed3db0SBarry Smith sum3 = 0.0; 127866ed3db0SBarry Smith sum4 = 0.0; 127966ed3db0SBarry Smith sum5 = 0.0; 128066ed3db0SBarry Smith sum6 = 0.0; 128166ed3db0SBarry Smith sum7 = 0.0; 128266ed3db0SBarry Smith sum8 = 0.0; 1283b3c51c66SMatthew Knepley nonzerorow += (n>0); 128466ed3db0SBarry Smith for (j=0; j<n; j++) { 128566ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 128666ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 128766ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 128866ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 128966ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 129066ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 129166ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 129266ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 129366ed3db0SBarry Smith jrow++; 129466ed3db0SBarry Smith } 129566ed3db0SBarry Smith y[8*i] = sum1; 129666ed3db0SBarry Smith y[8*i+1] = sum2; 129766ed3db0SBarry Smith y[8*i+2] = sum3; 129866ed3db0SBarry Smith y[8*i+3] = sum4; 129966ed3db0SBarry Smith y[8*i+4] = sum5; 130066ed3db0SBarry Smith y[8*i+5] = sum6; 130166ed3db0SBarry Smith y[8*i+6] = sum7; 130266ed3db0SBarry Smith y[8*i+7] = sum8; 130366ed3db0SBarry Smith } 130466ed3db0SBarry Smith 1305dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 13063649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13071ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 130866ed3db0SBarry Smith PetscFunctionReturn(0); 130966ed3db0SBarry Smith } 131066ed3db0SBarry Smith 131166ed3db0SBarry Smith #undef __FUNCT__ 131266ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1313dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 131466ed3db0SBarry Smith { 131566ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 131666ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1317d2840e09SBarry Smith const PetscScalar *x,*v; 1318d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1319dfbe8321SBarry Smith PetscErrorCode ierr; 1320d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1321d2840e09SBarry Smith PetscInt n,i; 132266ed3db0SBarry Smith 132366ed3db0SBarry Smith PetscFunctionBegin; 1324d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 13253649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13261ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1327bfec09a0SHong Zhang 132866ed3db0SBarry Smith for (i=0; i<m; i++) { 1329bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1330bfec09a0SHong Zhang v = a->a + a->i[i] ; 133166ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 133266ed3db0SBarry Smith alpha1 = x[8*i]; 133366ed3db0SBarry Smith alpha2 = x[8*i+1]; 133466ed3db0SBarry Smith alpha3 = x[8*i+2]; 133566ed3db0SBarry Smith alpha4 = x[8*i+3]; 133666ed3db0SBarry Smith alpha5 = x[8*i+4]; 133766ed3db0SBarry Smith alpha6 = x[8*i+5]; 133866ed3db0SBarry Smith alpha7 = x[8*i+6]; 133966ed3db0SBarry Smith alpha8 = x[8*i+7]; 134066ed3db0SBarry Smith while (n-->0) { 134166ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 134266ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 134366ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 134466ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 134566ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 134666ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 134766ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 134866ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 134966ed3db0SBarry Smith idx++; v++; 135066ed3db0SBarry Smith } 135166ed3db0SBarry Smith } 1352dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13533649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13541ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 135566ed3db0SBarry Smith PetscFunctionReturn(0); 135666ed3db0SBarry Smith } 135766ed3db0SBarry Smith 135866ed3db0SBarry Smith #undef __FUNCT__ 135966ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1360dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 136166ed3db0SBarry Smith { 136266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 136366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1364d2840e09SBarry Smith const PetscScalar *x,*v; 1365d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1366dfbe8321SBarry Smith PetscErrorCode ierr; 1367d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1368b24ad042SBarry Smith PetscInt n,i,jrow,j; 136966ed3db0SBarry Smith 137066ed3db0SBarry Smith PetscFunctionBegin; 137166ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13723649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13731ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 137466ed3db0SBarry Smith idx = a->j; 137566ed3db0SBarry Smith v = a->a; 137666ed3db0SBarry Smith ii = a->i; 137766ed3db0SBarry Smith 137866ed3db0SBarry Smith for (i=0; i<m; i++) { 137966ed3db0SBarry Smith jrow = ii[i]; 138066ed3db0SBarry Smith n = ii[i+1] - jrow; 138166ed3db0SBarry Smith sum1 = 0.0; 138266ed3db0SBarry Smith sum2 = 0.0; 138366ed3db0SBarry Smith sum3 = 0.0; 138466ed3db0SBarry Smith sum4 = 0.0; 138566ed3db0SBarry Smith sum5 = 0.0; 138666ed3db0SBarry Smith sum6 = 0.0; 138766ed3db0SBarry Smith sum7 = 0.0; 138866ed3db0SBarry Smith sum8 = 0.0; 138966ed3db0SBarry Smith for (j=0; j<n; j++) { 139066ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 139166ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 139266ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 139366ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 139466ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 139566ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 139666ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 139766ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 139866ed3db0SBarry Smith jrow++; 139966ed3db0SBarry Smith } 140066ed3db0SBarry Smith y[8*i] += sum1; 140166ed3db0SBarry Smith y[8*i+1] += sum2; 140266ed3db0SBarry Smith y[8*i+2] += sum3; 140366ed3db0SBarry Smith y[8*i+3] += sum4; 140466ed3db0SBarry Smith y[8*i+4] += sum5; 140566ed3db0SBarry Smith y[8*i+5] += sum6; 140666ed3db0SBarry Smith y[8*i+6] += sum7; 140766ed3db0SBarry Smith y[8*i+7] += sum8; 140866ed3db0SBarry Smith } 140966ed3db0SBarry Smith 1410dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 14113649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14121ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 141366ed3db0SBarry Smith PetscFunctionReturn(0); 141466ed3db0SBarry Smith } 141566ed3db0SBarry Smith 141666ed3db0SBarry Smith #undef __FUNCT__ 141766ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1418dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 141966ed3db0SBarry Smith { 142066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 142166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1422d2840e09SBarry Smith const PetscScalar *x,*v; 1423d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1424dfbe8321SBarry Smith PetscErrorCode ierr; 1425d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1426d2840e09SBarry Smith PetscInt n,i; 142766ed3db0SBarry Smith 142866ed3db0SBarry Smith PetscFunctionBegin; 142966ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 14303649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14311ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 143266ed3db0SBarry Smith for (i=0; i<m; i++) { 1433bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1434bfec09a0SHong Zhang v = a->a + a->i[i] ; 143566ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 143666ed3db0SBarry Smith alpha1 = x[8*i]; 143766ed3db0SBarry Smith alpha2 = x[8*i+1]; 143866ed3db0SBarry Smith alpha3 = x[8*i+2]; 143966ed3db0SBarry Smith alpha4 = x[8*i+3]; 144066ed3db0SBarry Smith alpha5 = x[8*i+4]; 144166ed3db0SBarry Smith alpha6 = x[8*i+5]; 144266ed3db0SBarry Smith alpha7 = x[8*i+6]; 144366ed3db0SBarry Smith alpha8 = x[8*i+7]; 144466ed3db0SBarry Smith while (n-->0) { 144566ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 144666ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 144766ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 144866ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 144966ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 145066ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 145166ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 145266ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 145366ed3db0SBarry Smith idx++; v++; 145466ed3db0SBarry Smith } 145566ed3db0SBarry Smith } 1456dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 14573649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14581ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 14592f7816d4SBarry Smith PetscFunctionReturn(0); 14602f7816d4SBarry Smith } 14612f7816d4SBarry Smith 14622b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 14632b692628SMatthew Knepley #undef __FUNCT__ 14642b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1465dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14662b692628SMatthew Knepley { 14672b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14682b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1469d2840e09SBarry Smith const PetscScalar *x,*v; 1470d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1471dfbe8321SBarry Smith PetscErrorCode ierr; 1472d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1473d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 14742b692628SMatthew Knepley 14752b692628SMatthew Knepley PetscFunctionBegin; 14763649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14771ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14782b692628SMatthew Knepley idx = a->j; 14792b692628SMatthew Knepley v = a->a; 14802b692628SMatthew Knepley ii = a->i; 14812b692628SMatthew Knepley 14822b692628SMatthew Knepley for (i=0; i<m; i++) { 14832b692628SMatthew Knepley jrow = ii[i]; 14842b692628SMatthew Knepley n = ii[i+1] - jrow; 14852b692628SMatthew Knepley sum1 = 0.0; 14862b692628SMatthew Knepley sum2 = 0.0; 14872b692628SMatthew Knepley sum3 = 0.0; 14882b692628SMatthew Knepley sum4 = 0.0; 14892b692628SMatthew Knepley sum5 = 0.0; 14902b692628SMatthew Knepley sum6 = 0.0; 14912b692628SMatthew Knepley sum7 = 0.0; 14922b692628SMatthew Knepley sum8 = 0.0; 14932b692628SMatthew Knepley sum9 = 0.0; 1494b3c51c66SMatthew Knepley nonzerorow += (n>0); 14952b692628SMatthew Knepley for (j=0; j<n; j++) { 14962b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14972b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14982b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14992b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15002b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15012b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15022b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15032b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15042b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15052b692628SMatthew Knepley jrow++; 15062b692628SMatthew Knepley } 15072b692628SMatthew Knepley y[9*i] = sum1; 15082b692628SMatthew Knepley y[9*i+1] = sum2; 15092b692628SMatthew Knepley y[9*i+2] = sum3; 15102b692628SMatthew Knepley y[9*i+3] = sum4; 15112b692628SMatthew Knepley y[9*i+4] = sum5; 15122b692628SMatthew Knepley y[9*i+5] = sum6; 15132b692628SMatthew Knepley y[9*i+6] = sum7; 15142b692628SMatthew Knepley y[9*i+7] = sum8; 15152b692628SMatthew Knepley y[9*i+8] = sum9; 15162b692628SMatthew Knepley } 15172b692628SMatthew Knepley 1518dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 15193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15201ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15212b692628SMatthew Knepley PetscFunctionReturn(0); 15222b692628SMatthew Knepley } 15232b692628SMatthew Knepley 1524b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1525b9cda22cSBarry Smith 15262b692628SMatthew Knepley #undef __FUNCT__ 15272b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1528dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 15292b692628SMatthew Knepley { 15302b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15312b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1532d2840e09SBarry Smith const PetscScalar *x,*v; 1533d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1534dfbe8321SBarry Smith PetscErrorCode ierr; 1535d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1536d2840e09SBarry Smith PetscInt n,i; 15372b692628SMatthew Knepley 15382b692628SMatthew Knepley PetscFunctionBegin; 1539d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 15403649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15411ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 15422b692628SMatthew Knepley 15432b692628SMatthew Knepley for (i=0; i<m; i++) { 15442b692628SMatthew Knepley idx = a->j + a->i[i] ; 15452b692628SMatthew Knepley v = a->a + a->i[i] ; 15462b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15472b692628SMatthew Knepley alpha1 = x[9*i]; 15482b692628SMatthew Knepley alpha2 = x[9*i+1]; 15492b692628SMatthew Knepley alpha3 = x[9*i+2]; 15502b692628SMatthew Knepley alpha4 = x[9*i+3]; 15512b692628SMatthew Knepley alpha5 = x[9*i+4]; 15522b692628SMatthew Knepley alpha6 = x[9*i+5]; 15532b692628SMatthew Knepley alpha7 = x[9*i+6]; 15542b692628SMatthew Knepley alpha8 = x[9*i+7]; 15552f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 15562b692628SMatthew Knepley while (n-->0) { 15572b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15582b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15592b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15602b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15612b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15622b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15632b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15642b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15652b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15662b692628SMatthew Knepley idx++; v++; 15672b692628SMatthew Knepley } 15682b692628SMatthew Knepley } 1569dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 15703649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15711ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15722b692628SMatthew Knepley PetscFunctionReturn(0); 15732b692628SMatthew Knepley } 15742b692628SMatthew Knepley 15752b692628SMatthew Knepley #undef __FUNCT__ 15762b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1577dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15782b692628SMatthew Knepley { 15792b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15802b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1581d2840e09SBarry Smith const PetscScalar *x,*v; 1582d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1583dfbe8321SBarry Smith PetscErrorCode ierr; 1584d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1585b24ad042SBarry Smith PetscInt n,i,jrow,j; 15862b692628SMatthew Knepley 15872b692628SMatthew Knepley PetscFunctionBegin; 15882b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15893649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15901ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15912b692628SMatthew Knepley idx = a->j; 15922b692628SMatthew Knepley v = a->a; 15932b692628SMatthew Knepley ii = a->i; 15942b692628SMatthew Knepley 15952b692628SMatthew Knepley for (i=0; i<m; i++) { 15962b692628SMatthew Knepley jrow = ii[i]; 15972b692628SMatthew Knepley n = ii[i+1] - jrow; 15982b692628SMatthew Knepley sum1 = 0.0; 15992b692628SMatthew Knepley sum2 = 0.0; 16002b692628SMatthew Knepley sum3 = 0.0; 16012b692628SMatthew Knepley sum4 = 0.0; 16022b692628SMatthew Knepley sum5 = 0.0; 16032b692628SMatthew Knepley sum6 = 0.0; 16042b692628SMatthew Knepley sum7 = 0.0; 16052b692628SMatthew Knepley sum8 = 0.0; 16062b692628SMatthew Knepley sum9 = 0.0; 16072b692628SMatthew Knepley for (j=0; j<n; j++) { 16082b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 16092b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 16102b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 16112b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 16122b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 16132b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 16142b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 16152b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 16162b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 16172b692628SMatthew Knepley jrow++; 16182b692628SMatthew Knepley } 16192b692628SMatthew Knepley y[9*i] += sum1; 16202b692628SMatthew Knepley y[9*i+1] += sum2; 16212b692628SMatthew Knepley y[9*i+2] += sum3; 16222b692628SMatthew Knepley y[9*i+3] += sum4; 16232b692628SMatthew Knepley y[9*i+4] += sum5; 16242b692628SMatthew Knepley y[9*i+5] += sum6; 16252b692628SMatthew Knepley y[9*i+6] += sum7; 16262b692628SMatthew Knepley y[9*i+7] += sum8; 16272b692628SMatthew Knepley y[9*i+8] += sum9; 16282b692628SMatthew Knepley } 16292b692628SMatthew Knepley 1630efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 16313649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16321ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16332b692628SMatthew Knepley PetscFunctionReturn(0); 16342b692628SMatthew Knepley } 16352b692628SMatthew Knepley 16362b692628SMatthew Knepley #undef __FUNCT__ 16372b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1638dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 16392b692628SMatthew Knepley { 16402b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 16412b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1642d2840e09SBarry Smith const PetscScalar *x,*v; 1643d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1644dfbe8321SBarry Smith PetscErrorCode ierr; 1645d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1646d2840e09SBarry Smith PetscInt n,i; 16472b692628SMatthew Knepley 16482b692628SMatthew Knepley PetscFunctionBegin; 16492b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16503649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 16511ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 16522b692628SMatthew Knepley for (i=0; i<m; i++) { 16532b692628SMatthew Knepley idx = a->j + a->i[i] ; 16542b692628SMatthew Knepley v = a->a + a->i[i] ; 16552b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 16562b692628SMatthew Knepley alpha1 = x[9*i]; 16572b692628SMatthew Knepley alpha2 = x[9*i+1]; 16582b692628SMatthew Knepley alpha3 = x[9*i+2]; 16592b692628SMatthew Knepley alpha4 = x[9*i+3]; 16602b692628SMatthew Knepley alpha5 = x[9*i+4]; 16612b692628SMatthew Knepley alpha6 = x[9*i+5]; 16622b692628SMatthew Knepley alpha7 = x[9*i+6]; 16632b692628SMatthew Knepley alpha8 = x[9*i+7]; 16642b692628SMatthew Knepley alpha9 = x[9*i+8]; 16652b692628SMatthew Knepley while (n-->0) { 16662b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 16672b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 16682b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 16692b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 16702b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 16712b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 16722b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 16732b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 16742b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 16752b692628SMatthew Knepley idx++; v++; 16762b692628SMatthew Knepley } 16772b692628SMatthew Knepley } 1678dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 16793649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16801ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16812b692628SMatthew Knepley PetscFunctionReturn(0); 16822b692628SMatthew Knepley } 1683b9cda22cSBarry Smith #undef __FUNCT__ 1684b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1685b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1686b9cda22cSBarry Smith { 1687b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1688b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1689d2840e09SBarry Smith const PetscScalar *x,*v; 1690d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1691b9cda22cSBarry Smith PetscErrorCode ierr; 1692d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1693d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1694b9cda22cSBarry Smith 1695b9cda22cSBarry Smith PetscFunctionBegin; 16963649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1697b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1698b9cda22cSBarry Smith idx = a->j; 1699b9cda22cSBarry Smith v = a->a; 1700b9cda22cSBarry Smith ii = a->i; 1701b9cda22cSBarry Smith 1702b9cda22cSBarry Smith for (i=0; i<m; i++) { 1703b9cda22cSBarry Smith jrow = ii[i]; 1704b9cda22cSBarry Smith n = ii[i+1] - jrow; 1705b9cda22cSBarry Smith sum1 = 0.0; 1706b9cda22cSBarry Smith sum2 = 0.0; 1707b9cda22cSBarry Smith sum3 = 0.0; 1708b9cda22cSBarry Smith sum4 = 0.0; 1709b9cda22cSBarry Smith sum5 = 0.0; 1710b9cda22cSBarry Smith sum6 = 0.0; 1711b9cda22cSBarry Smith sum7 = 0.0; 1712b9cda22cSBarry Smith sum8 = 0.0; 1713b9cda22cSBarry Smith sum9 = 0.0; 1714b9cda22cSBarry Smith sum10 = 0.0; 1715b3c51c66SMatthew Knepley nonzerorow += (n>0); 1716b9cda22cSBarry Smith for (j=0; j<n; j++) { 1717b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1718b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1719b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1720b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1721b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1722b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1723b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1724b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1725b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1726b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1727b9cda22cSBarry Smith jrow++; 1728b9cda22cSBarry Smith } 1729b9cda22cSBarry Smith y[10*i] = sum1; 1730b9cda22cSBarry Smith y[10*i+1] = sum2; 1731b9cda22cSBarry Smith y[10*i+2] = sum3; 1732b9cda22cSBarry Smith y[10*i+3] = sum4; 1733b9cda22cSBarry Smith y[10*i+4] = sum5; 1734b9cda22cSBarry Smith y[10*i+5] = sum6; 1735b9cda22cSBarry Smith y[10*i+6] = sum7; 1736b9cda22cSBarry Smith y[10*i+7] = sum8; 1737b9cda22cSBarry Smith y[10*i+8] = sum9; 1738b9cda22cSBarry Smith y[10*i+9] = sum10; 1739b9cda22cSBarry Smith } 1740b9cda22cSBarry Smith 1741dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 17423649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1743b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1744b9cda22cSBarry Smith PetscFunctionReturn(0); 1745b9cda22cSBarry Smith } 1746b9cda22cSBarry Smith 1747b9cda22cSBarry Smith #undef __FUNCT__ 1748dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1749b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1750b9cda22cSBarry Smith { 1751b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1752b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1753d2840e09SBarry Smith const PetscScalar *x,*v; 1754d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1755b9cda22cSBarry Smith PetscErrorCode ierr; 1756d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1757b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1758b9cda22cSBarry Smith 1759b9cda22cSBarry Smith PetscFunctionBegin; 1760b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 17613649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1762b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1763b9cda22cSBarry Smith idx = a->j; 1764b9cda22cSBarry Smith v = a->a; 1765b9cda22cSBarry Smith ii = a->i; 1766b9cda22cSBarry Smith 1767b9cda22cSBarry Smith for (i=0; i<m; i++) { 1768b9cda22cSBarry Smith jrow = ii[i]; 1769b9cda22cSBarry Smith n = ii[i+1] - jrow; 1770b9cda22cSBarry Smith sum1 = 0.0; 1771b9cda22cSBarry Smith sum2 = 0.0; 1772b9cda22cSBarry Smith sum3 = 0.0; 1773b9cda22cSBarry Smith sum4 = 0.0; 1774b9cda22cSBarry Smith sum5 = 0.0; 1775b9cda22cSBarry Smith sum6 = 0.0; 1776b9cda22cSBarry Smith sum7 = 0.0; 1777b9cda22cSBarry Smith sum8 = 0.0; 1778b9cda22cSBarry Smith sum9 = 0.0; 1779b9cda22cSBarry Smith sum10 = 0.0; 1780b9cda22cSBarry Smith for (j=0; j<n; j++) { 1781b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1782b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1783b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1784b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1785b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1786b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1787b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1788b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1789b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1790b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1791b9cda22cSBarry Smith jrow++; 1792b9cda22cSBarry Smith } 1793b9cda22cSBarry Smith y[10*i] += sum1; 1794b9cda22cSBarry Smith y[10*i+1] += sum2; 1795b9cda22cSBarry Smith y[10*i+2] += sum3; 1796b9cda22cSBarry Smith y[10*i+3] += sum4; 1797b9cda22cSBarry Smith y[10*i+4] += sum5; 1798b9cda22cSBarry Smith y[10*i+5] += sum6; 1799b9cda22cSBarry Smith y[10*i+6] += sum7; 1800b9cda22cSBarry Smith y[10*i+7] += sum8; 1801b9cda22cSBarry Smith y[10*i+8] += sum9; 1802b9cda22cSBarry Smith y[10*i+9] += sum10; 1803b9cda22cSBarry Smith } 1804b9cda22cSBarry Smith 1805dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18063649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1807b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1808b9cda22cSBarry Smith PetscFunctionReturn(0); 1809b9cda22cSBarry Smith } 1810b9cda22cSBarry Smith 1811b9cda22cSBarry Smith #undef __FUNCT__ 1812b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1813b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1814b9cda22cSBarry Smith { 1815b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1816b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1817d2840e09SBarry Smith const PetscScalar *x,*v; 1818d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1819b9cda22cSBarry Smith PetscErrorCode ierr; 1820d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1821d2840e09SBarry Smith PetscInt n,i; 1822b9cda22cSBarry Smith 1823b9cda22cSBarry Smith PetscFunctionBegin; 1824d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 18253649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1826b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1827b9cda22cSBarry Smith 1828b9cda22cSBarry Smith for (i=0; i<m; i++) { 1829b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1830b9cda22cSBarry Smith v = a->a + a->i[i] ; 1831b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1832e29fdc22SBarry Smith alpha1 = x[10*i]; 1833e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1834e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1835e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1836e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1837e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1838e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1839e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1840e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1841b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1842b9cda22cSBarry Smith while (n-->0) { 1843e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1844e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1845e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1846e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1847e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1848e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1849e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1850e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1851e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1852b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1853b9cda22cSBarry Smith idx++; v++; 1854b9cda22cSBarry Smith } 1855b9cda22cSBarry Smith } 1856dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18573649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1858b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1859b9cda22cSBarry Smith PetscFunctionReturn(0); 1860b9cda22cSBarry Smith } 1861b9cda22cSBarry Smith 1862b9cda22cSBarry Smith #undef __FUNCT__ 1863b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1864b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1865b9cda22cSBarry Smith { 1866b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1867b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1868d2840e09SBarry Smith const PetscScalar *x,*v; 1869d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1870b9cda22cSBarry Smith PetscErrorCode ierr; 1871d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1872d2840e09SBarry Smith PetscInt n,i; 1873b9cda22cSBarry Smith 1874b9cda22cSBarry Smith PetscFunctionBegin; 1875b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 18763649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1877b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1878b9cda22cSBarry Smith for (i=0; i<m; i++) { 1879b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1880b9cda22cSBarry Smith v = a->a + a->i[i] ; 1881b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1882b9cda22cSBarry Smith alpha1 = x[10*i]; 1883b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1884b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1885b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1886b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1887b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1888b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1889b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1890b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1891b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1892b9cda22cSBarry Smith while (n-->0) { 1893b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1894b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1895b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1896b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1897b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1898b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1899b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1900b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1901b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1902b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1903b9cda22cSBarry Smith idx++; v++; 1904b9cda22cSBarry Smith } 1905b9cda22cSBarry Smith } 1906dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 19073649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1908b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1909b9cda22cSBarry Smith PetscFunctionReturn(0); 1910b9cda22cSBarry Smith } 1911b9cda22cSBarry Smith 19122b692628SMatthew Knepley 19132f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 19142f7816d4SBarry Smith #undef __FUNCT__ 1915dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11" 1916dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1917dbdd7285SBarry Smith { 1918dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1919dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1920d2840e09SBarry Smith const PetscScalar *x,*v; 1921d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1922dbdd7285SBarry Smith PetscErrorCode ierr; 1923d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1924d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1925dbdd7285SBarry Smith 1926dbdd7285SBarry Smith PetscFunctionBegin; 19273649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1928dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1929dbdd7285SBarry Smith idx = a->j; 1930dbdd7285SBarry Smith v = a->a; 1931dbdd7285SBarry Smith ii = a->i; 1932dbdd7285SBarry Smith 1933dbdd7285SBarry Smith for (i=0; i<m; i++) { 1934dbdd7285SBarry Smith jrow = ii[i]; 1935dbdd7285SBarry Smith n = ii[i+1] - jrow; 1936dbdd7285SBarry Smith sum1 = 0.0; 1937dbdd7285SBarry Smith sum2 = 0.0; 1938dbdd7285SBarry Smith sum3 = 0.0; 1939dbdd7285SBarry Smith sum4 = 0.0; 1940dbdd7285SBarry Smith sum5 = 0.0; 1941dbdd7285SBarry Smith sum6 = 0.0; 1942dbdd7285SBarry Smith sum7 = 0.0; 1943dbdd7285SBarry Smith sum8 = 0.0; 1944dbdd7285SBarry Smith sum9 = 0.0; 1945dbdd7285SBarry Smith sum10 = 0.0; 1946dbdd7285SBarry Smith sum11 = 0.0; 1947dbdd7285SBarry Smith nonzerorow += (n>0); 1948dbdd7285SBarry Smith for (j=0; j<n; j++) { 1949dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1950dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1951dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1952dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1953dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1954dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1955dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1956dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1957dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1958dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1959dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1960dbdd7285SBarry Smith jrow++; 1961dbdd7285SBarry Smith } 1962dbdd7285SBarry Smith y[11*i] = sum1; 1963dbdd7285SBarry Smith y[11*i+1] = sum2; 1964dbdd7285SBarry Smith y[11*i+2] = sum3; 1965dbdd7285SBarry Smith y[11*i+3] = sum4; 1966dbdd7285SBarry Smith y[11*i+4] = sum5; 1967dbdd7285SBarry Smith y[11*i+5] = sum6; 1968dbdd7285SBarry Smith y[11*i+6] = sum7; 1969dbdd7285SBarry Smith y[11*i+7] = sum8; 1970dbdd7285SBarry Smith y[11*i+8] = sum9; 1971dbdd7285SBarry Smith y[11*i+9] = sum10; 1972dbdd7285SBarry Smith y[11*i+10] = sum11; 1973dbdd7285SBarry Smith } 1974dbdd7285SBarry Smith 1975dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 19763649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1977dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1978dbdd7285SBarry Smith PetscFunctionReturn(0); 1979dbdd7285SBarry Smith } 1980dbdd7285SBarry Smith 1981dbdd7285SBarry Smith #undef __FUNCT__ 1982dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1983dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1984dbdd7285SBarry Smith { 1985dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1986dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1987d2840e09SBarry Smith const PetscScalar *x,*v; 1988d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1989dbdd7285SBarry Smith PetscErrorCode ierr; 1990d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1991dbdd7285SBarry Smith PetscInt n,i,jrow,j; 1992dbdd7285SBarry Smith 1993dbdd7285SBarry Smith PetscFunctionBegin; 1994dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19953649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1996dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1997dbdd7285SBarry Smith idx = a->j; 1998dbdd7285SBarry Smith v = a->a; 1999dbdd7285SBarry Smith ii = a->i; 2000dbdd7285SBarry Smith 2001dbdd7285SBarry Smith for (i=0; i<m; i++) { 2002dbdd7285SBarry Smith jrow = ii[i]; 2003dbdd7285SBarry Smith n = ii[i+1] - jrow; 2004dbdd7285SBarry Smith sum1 = 0.0; 2005dbdd7285SBarry Smith sum2 = 0.0; 2006dbdd7285SBarry Smith sum3 = 0.0; 2007dbdd7285SBarry Smith sum4 = 0.0; 2008dbdd7285SBarry Smith sum5 = 0.0; 2009dbdd7285SBarry Smith sum6 = 0.0; 2010dbdd7285SBarry Smith sum7 = 0.0; 2011dbdd7285SBarry Smith sum8 = 0.0; 2012dbdd7285SBarry Smith sum9 = 0.0; 2013dbdd7285SBarry Smith sum10 = 0.0; 2014dbdd7285SBarry Smith sum11 = 0.0; 2015dbdd7285SBarry Smith for (j=0; j<n; j++) { 2016dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 2017dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 2018dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 2019dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 2020dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 2021dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 2022dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 2023dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 2024dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 2025dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 2026dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 2027dbdd7285SBarry Smith jrow++; 2028dbdd7285SBarry Smith } 2029dbdd7285SBarry Smith y[11*i] += sum1; 2030dbdd7285SBarry Smith y[11*i+1] += sum2; 2031dbdd7285SBarry Smith y[11*i+2] += sum3; 2032dbdd7285SBarry Smith y[11*i+3] += sum4; 2033dbdd7285SBarry Smith y[11*i+4] += sum5; 2034dbdd7285SBarry Smith y[11*i+5] += sum6; 2035dbdd7285SBarry Smith y[11*i+6] += sum7; 2036dbdd7285SBarry Smith y[11*i+7] += sum8; 2037dbdd7285SBarry Smith y[11*i+8] += sum9; 2038dbdd7285SBarry Smith y[11*i+9] += sum10; 2039dbdd7285SBarry Smith y[11*i+10] += sum11; 2040dbdd7285SBarry Smith } 2041dbdd7285SBarry Smith 2042dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20433649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2044dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2045dbdd7285SBarry Smith PetscFunctionReturn(0); 2046dbdd7285SBarry Smith } 2047dbdd7285SBarry Smith 2048dbdd7285SBarry Smith #undef __FUNCT__ 2049dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2050dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2051dbdd7285SBarry Smith { 2052dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2053dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2054d2840e09SBarry Smith const PetscScalar *x,*v; 2055d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2056dbdd7285SBarry Smith PetscErrorCode ierr; 2057d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2058d2840e09SBarry Smith PetscInt n,i; 2059dbdd7285SBarry Smith 2060dbdd7285SBarry Smith PetscFunctionBegin; 2061d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 20623649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2063dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2064dbdd7285SBarry Smith 2065dbdd7285SBarry Smith for (i=0; i<m; i++) { 2066dbdd7285SBarry Smith idx = a->j + a->i[i] ; 2067dbdd7285SBarry Smith v = a->a + a->i[i] ; 2068dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2069dbdd7285SBarry Smith alpha1 = x[11*i]; 2070dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2071dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2072dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2073dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2074dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2075dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2076dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2077dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2078dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2079dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2080dbdd7285SBarry Smith while (n-->0) { 2081dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2082dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2083dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2084dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2085dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2086dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2087dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2088dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2089dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2090dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2091dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2092dbdd7285SBarry Smith idx++; v++; 2093dbdd7285SBarry Smith } 2094dbdd7285SBarry Smith } 2095dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20963649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2097dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2098dbdd7285SBarry Smith PetscFunctionReturn(0); 2099dbdd7285SBarry Smith } 2100dbdd7285SBarry Smith 2101dbdd7285SBarry Smith #undef __FUNCT__ 2102dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2103dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2104dbdd7285SBarry Smith { 2105dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2106dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2107d2840e09SBarry Smith const PetscScalar *x,*v; 2108d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2109dbdd7285SBarry Smith PetscErrorCode ierr; 2110d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2111d2840e09SBarry Smith PetscInt n,i; 2112dbdd7285SBarry Smith 2113dbdd7285SBarry Smith PetscFunctionBegin; 2114dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 21153649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2116dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2117dbdd7285SBarry Smith for (i=0; i<m; i++) { 2118dbdd7285SBarry Smith idx = a->j + a->i[i] ; 2119dbdd7285SBarry Smith v = a->a + a->i[i] ; 2120dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2121dbdd7285SBarry Smith alpha1 = x[11*i]; 2122dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2123dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2124dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2125dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2126dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2127dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2128dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2129dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2130dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2131dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2132dbdd7285SBarry Smith while (n-->0) { 2133dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2134dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2135dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2136dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2137dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2138dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2139dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2140dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2141dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2142dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2143dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2144dbdd7285SBarry Smith idx++; v++; 2145dbdd7285SBarry Smith } 2146dbdd7285SBarry Smith } 2147dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 21483649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2149dbdd7285SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2150dbdd7285SBarry Smith PetscFunctionReturn(0); 2151dbdd7285SBarry Smith } 2152dbdd7285SBarry Smith 2153dbdd7285SBarry Smith 2154dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2155dbdd7285SBarry Smith #undef __FUNCT__ 21562f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 2157dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 21582f7816d4SBarry Smith { 21592f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21602f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2161d2840e09SBarry Smith const PetscScalar *x,*v; 2162d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21632f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2164dfbe8321SBarry Smith PetscErrorCode ierr; 2165d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2166d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 21672f7816d4SBarry Smith 21682f7816d4SBarry Smith PetscFunctionBegin; 21693649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 21701ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 21712f7816d4SBarry Smith idx = a->j; 21722f7816d4SBarry Smith v = a->a; 21732f7816d4SBarry Smith ii = a->i; 21742f7816d4SBarry Smith 21752f7816d4SBarry Smith for (i=0; i<m; i++) { 21762f7816d4SBarry Smith jrow = ii[i]; 21772f7816d4SBarry Smith n = ii[i+1] - jrow; 21782f7816d4SBarry Smith sum1 = 0.0; 21792f7816d4SBarry Smith sum2 = 0.0; 21802f7816d4SBarry Smith sum3 = 0.0; 21812f7816d4SBarry Smith sum4 = 0.0; 21822f7816d4SBarry Smith sum5 = 0.0; 21832f7816d4SBarry Smith sum6 = 0.0; 21842f7816d4SBarry Smith sum7 = 0.0; 21852f7816d4SBarry Smith sum8 = 0.0; 21862f7816d4SBarry Smith sum9 = 0.0; 21872f7816d4SBarry Smith sum10 = 0.0; 21882f7816d4SBarry Smith sum11 = 0.0; 21892f7816d4SBarry Smith sum12 = 0.0; 21902f7816d4SBarry Smith sum13 = 0.0; 21912f7816d4SBarry Smith sum14 = 0.0; 21922f7816d4SBarry Smith sum15 = 0.0; 21932f7816d4SBarry Smith sum16 = 0.0; 2194b3c51c66SMatthew Knepley nonzerorow += (n>0); 21952f7816d4SBarry Smith for (j=0; j<n; j++) { 21962f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 21972f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 21982f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 21992f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 22002f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 22012f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 22022f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 22032f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 22042f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 22052f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 22062f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 22072f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 22082f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 22092f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 22102f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 22112f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 22122f7816d4SBarry Smith jrow++; 22132f7816d4SBarry Smith } 22142f7816d4SBarry Smith y[16*i] = sum1; 22152f7816d4SBarry Smith y[16*i+1] = sum2; 22162f7816d4SBarry Smith y[16*i+2] = sum3; 22172f7816d4SBarry Smith y[16*i+3] = sum4; 22182f7816d4SBarry Smith y[16*i+4] = sum5; 22192f7816d4SBarry Smith y[16*i+5] = sum6; 22202f7816d4SBarry Smith y[16*i+6] = sum7; 22212f7816d4SBarry Smith y[16*i+7] = sum8; 22222f7816d4SBarry Smith y[16*i+8] = sum9; 22232f7816d4SBarry Smith y[16*i+9] = sum10; 22242f7816d4SBarry Smith y[16*i+10] = sum11; 22252f7816d4SBarry Smith y[16*i+11] = sum12; 22262f7816d4SBarry Smith y[16*i+12] = sum13; 22272f7816d4SBarry Smith y[16*i+13] = sum14; 22282f7816d4SBarry Smith y[16*i+14] = sum15; 22292f7816d4SBarry Smith y[16*i+15] = sum16; 22302f7816d4SBarry Smith } 22312f7816d4SBarry Smith 2232dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 22333649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22341ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 22352f7816d4SBarry Smith PetscFunctionReturn(0); 22362f7816d4SBarry Smith } 22372f7816d4SBarry Smith 22382f7816d4SBarry Smith #undef __FUNCT__ 22392f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2240dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 22412f7816d4SBarry Smith { 22422f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22432f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2244d2840e09SBarry Smith const PetscScalar *x,*v; 2245d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 22462f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2247dfbe8321SBarry Smith PetscErrorCode ierr; 2248d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2249d2840e09SBarry Smith PetscInt n,i; 22502f7816d4SBarry Smith 22512f7816d4SBarry Smith PetscFunctionBegin; 2252d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 22533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 22541ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2255bfec09a0SHong Zhang 22562f7816d4SBarry Smith for (i=0; i<m; i++) { 2257bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2258bfec09a0SHong Zhang v = a->a + a->i[i] ; 22592f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 22602f7816d4SBarry Smith alpha1 = x[16*i]; 22612f7816d4SBarry Smith alpha2 = x[16*i+1]; 22622f7816d4SBarry Smith alpha3 = x[16*i+2]; 22632f7816d4SBarry Smith alpha4 = x[16*i+3]; 22642f7816d4SBarry Smith alpha5 = x[16*i+4]; 22652f7816d4SBarry Smith alpha6 = x[16*i+5]; 22662f7816d4SBarry Smith alpha7 = x[16*i+6]; 22672f7816d4SBarry Smith alpha8 = x[16*i+7]; 22682f7816d4SBarry Smith alpha9 = x[16*i+8]; 22692f7816d4SBarry Smith alpha10 = x[16*i+9]; 22702f7816d4SBarry Smith alpha11 = x[16*i+10]; 22712f7816d4SBarry Smith alpha12 = x[16*i+11]; 22722f7816d4SBarry Smith alpha13 = x[16*i+12]; 22732f7816d4SBarry Smith alpha14 = x[16*i+13]; 22742f7816d4SBarry Smith alpha15 = x[16*i+14]; 22752f7816d4SBarry Smith alpha16 = x[16*i+15]; 22762f7816d4SBarry Smith while (n-->0) { 22772f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 22782f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 22792f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 22802f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 22812f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 22822f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 22832f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 22842f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 22852f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 22862f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 22872f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 22882f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 22892f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 22902f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 22912f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 22922f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 22932f7816d4SBarry Smith idx++; v++; 22942f7816d4SBarry Smith } 22952f7816d4SBarry Smith } 2296dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 22973649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22981ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 22992f7816d4SBarry Smith PetscFunctionReturn(0); 23002f7816d4SBarry Smith } 23012f7816d4SBarry Smith 23022f7816d4SBarry Smith #undef __FUNCT__ 23032f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2304dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 23052f7816d4SBarry Smith { 23062f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 23072f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2308d2840e09SBarry Smith const PetscScalar *x,*v; 2309d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 23102f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2311dfbe8321SBarry Smith PetscErrorCode ierr; 2312d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2313b24ad042SBarry Smith PetscInt n,i,jrow,j; 23142f7816d4SBarry Smith 23152f7816d4SBarry Smith PetscFunctionBegin; 23162f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 23173649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 23181ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 23192f7816d4SBarry Smith idx = a->j; 23202f7816d4SBarry Smith v = a->a; 23212f7816d4SBarry Smith ii = a->i; 23222f7816d4SBarry Smith 23232f7816d4SBarry Smith for (i=0; i<m; i++) { 23242f7816d4SBarry Smith jrow = ii[i]; 23252f7816d4SBarry Smith n = ii[i+1] - jrow; 23262f7816d4SBarry Smith sum1 = 0.0; 23272f7816d4SBarry Smith sum2 = 0.0; 23282f7816d4SBarry Smith sum3 = 0.0; 23292f7816d4SBarry Smith sum4 = 0.0; 23302f7816d4SBarry Smith sum5 = 0.0; 23312f7816d4SBarry Smith sum6 = 0.0; 23322f7816d4SBarry Smith sum7 = 0.0; 23332f7816d4SBarry Smith sum8 = 0.0; 23342f7816d4SBarry Smith sum9 = 0.0; 23352f7816d4SBarry Smith sum10 = 0.0; 23362f7816d4SBarry Smith sum11 = 0.0; 23372f7816d4SBarry Smith sum12 = 0.0; 23382f7816d4SBarry Smith sum13 = 0.0; 23392f7816d4SBarry Smith sum14 = 0.0; 23402f7816d4SBarry Smith sum15 = 0.0; 23412f7816d4SBarry Smith sum16 = 0.0; 23422f7816d4SBarry Smith for (j=0; j<n; j++) { 23432f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 23442f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 23452f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 23462f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 23472f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 23482f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 23492f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 23502f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 23512f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 23522f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 23532f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 23542f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 23552f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 23562f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 23572f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 23582f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 23592f7816d4SBarry Smith jrow++; 23602f7816d4SBarry Smith } 23612f7816d4SBarry Smith y[16*i] += sum1; 23622f7816d4SBarry Smith y[16*i+1] += sum2; 23632f7816d4SBarry Smith y[16*i+2] += sum3; 23642f7816d4SBarry Smith y[16*i+3] += sum4; 23652f7816d4SBarry Smith y[16*i+4] += sum5; 23662f7816d4SBarry Smith y[16*i+5] += sum6; 23672f7816d4SBarry Smith y[16*i+6] += sum7; 23682f7816d4SBarry Smith y[16*i+7] += sum8; 23692f7816d4SBarry Smith y[16*i+8] += sum9; 23702f7816d4SBarry Smith y[16*i+9] += sum10; 23712f7816d4SBarry Smith y[16*i+10] += sum11; 23722f7816d4SBarry Smith y[16*i+11] += sum12; 23732f7816d4SBarry Smith y[16*i+12] += sum13; 23742f7816d4SBarry Smith y[16*i+13] += sum14; 23752f7816d4SBarry Smith y[16*i+14] += sum15; 23762f7816d4SBarry Smith y[16*i+15] += sum16; 23772f7816d4SBarry Smith } 23782f7816d4SBarry Smith 2379dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 23803649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 23811ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 23822f7816d4SBarry Smith PetscFunctionReturn(0); 23832f7816d4SBarry Smith } 23842f7816d4SBarry Smith 23852f7816d4SBarry Smith #undef __FUNCT__ 23862f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2387dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 23882f7816d4SBarry Smith { 23892f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 23902f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2391d2840e09SBarry Smith const PetscScalar *x,*v; 2392d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 23932f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2394dfbe8321SBarry Smith PetscErrorCode ierr; 2395d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2396d2840e09SBarry Smith PetscInt n,i; 23972f7816d4SBarry Smith 23982f7816d4SBarry Smith PetscFunctionBegin; 23992f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 24003649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 24011ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 24022f7816d4SBarry Smith for (i=0; i<m; i++) { 2403bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2404bfec09a0SHong Zhang v = a->a + a->i[i] ; 24052f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 24062f7816d4SBarry Smith alpha1 = x[16*i]; 24072f7816d4SBarry Smith alpha2 = x[16*i+1]; 24082f7816d4SBarry Smith alpha3 = x[16*i+2]; 24092f7816d4SBarry Smith alpha4 = x[16*i+3]; 24102f7816d4SBarry Smith alpha5 = x[16*i+4]; 24112f7816d4SBarry Smith alpha6 = x[16*i+5]; 24122f7816d4SBarry Smith alpha7 = x[16*i+6]; 24132f7816d4SBarry Smith alpha8 = x[16*i+7]; 24142f7816d4SBarry Smith alpha9 = x[16*i+8]; 24152f7816d4SBarry Smith alpha10 = x[16*i+9]; 24162f7816d4SBarry Smith alpha11 = x[16*i+10]; 24172f7816d4SBarry Smith alpha12 = x[16*i+11]; 24182f7816d4SBarry Smith alpha13 = x[16*i+12]; 24192f7816d4SBarry Smith alpha14 = x[16*i+13]; 24202f7816d4SBarry Smith alpha15 = x[16*i+14]; 24212f7816d4SBarry Smith alpha16 = x[16*i+15]; 24222f7816d4SBarry Smith while (n-->0) { 24232f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 24242f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 24252f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 24262f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 24272f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 24282f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 24292f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 24302f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 24312f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 24322f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 24332f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 24342f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 24352f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 24362f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 24372f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 24382f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 24392f7816d4SBarry Smith idx++; v++; 24402f7816d4SBarry Smith } 24412f7816d4SBarry Smith } 2442dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 24433649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 24441ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 244566ed3db0SBarry Smith PetscFunctionReturn(0); 244666ed3db0SBarry Smith } 244766ed3db0SBarry Smith 2448ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2449ed1c418cSBarry Smith #undef __FUNCT__ 2450ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18" 2451ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2452ed1c418cSBarry Smith { 2453ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2454ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2455d2840e09SBarry Smith const PetscScalar *x,*v; 2456d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2457ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2458ed1c418cSBarry Smith PetscErrorCode ierr; 2459d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2460d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 2461ed1c418cSBarry Smith 2462ed1c418cSBarry Smith PetscFunctionBegin; 24633649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2464ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2465ed1c418cSBarry Smith idx = a->j; 2466ed1c418cSBarry Smith v = a->a; 2467ed1c418cSBarry Smith ii = a->i; 2468ed1c418cSBarry Smith 2469ed1c418cSBarry Smith for (i=0; i<m; i++) { 2470ed1c418cSBarry Smith jrow = ii[i]; 2471ed1c418cSBarry Smith n = ii[i+1] - jrow; 2472ed1c418cSBarry Smith sum1 = 0.0; 2473ed1c418cSBarry Smith sum2 = 0.0; 2474ed1c418cSBarry Smith sum3 = 0.0; 2475ed1c418cSBarry Smith sum4 = 0.0; 2476ed1c418cSBarry Smith sum5 = 0.0; 2477ed1c418cSBarry Smith sum6 = 0.0; 2478ed1c418cSBarry Smith sum7 = 0.0; 2479ed1c418cSBarry Smith sum8 = 0.0; 2480ed1c418cSBarry Smith sum9 = 0.0; 2481ed1c418cSBarry Smith sum10 = 0.0; 2482ed1c418cSBarry Smith sum11 = 0.0; 2483ed1c418cSBarry Smith sum12 = 0.0; 2484ed1c418cSBarry Smith sum13 = 0.0; 2485ed1c418cSBarry Smith sum14 = 0.0; 2486ed1c418cSBarry Smith sum15 = 0.0; 2487ed1c418cSBarry Smith sum16 = 0.0; 2488ed1c418cSBarry Smith sum17 = 0.0; 2489ed1c418cSBarry Smith sum18 = 0.0; 2490ed1c418cSBarry Smith nonzerorow += (n>0); 2491ed1c418cSBarry Smith for (j=0; j<n; j++) { 2492ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2493ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2494ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2495ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2496ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2497ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2498ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2499ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2500ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2501ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2502ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2503ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2504ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2505ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2506ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2507ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2508ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2509ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2510ed1c418cSBarry Smith jrow++; 2511ed1c418cSBarry Smith } 2512ed1c418cSBarry Smith y[18*i] = sum1; 2513ed1c418cSBarry Smith y[18*i+1] = sum2; 2514ed1c418cSBarry Smith y[18*i+2] = sum3; 2515ed1c418cSBarry Smith y[18*i+3] = sum4; 2516ed1c418cSBarry Smith y[18*i+4] = sum5; 2517ed1c418cSBarry Smith y[18*i+5] = sum6; 2518ed1c418cSBarry Smith y[18*i+6] = sum7; 2519ed1c418cSBarry Smith y[18*i+7] = sum8; 2520ed1c418cSBarry Smith y[18*i+8] = sum9; 2521ed1c418cSBarry Smith y[18*i+9] = sum10; 2522ed1c418cSBarry Smith y[18*i+10] = sum11; 2523ed1c418cSBarry Smith y[18*i+11] = sum12; 2524ed1c418cSBarry Smith y[18*i+12] = sum13; 2525ed1c418cSBarry Smith y[18*i+13] = sum14; 2526ed1c418cSBarry Smith y[18*i+14] = sum15; 2527ed1c418cSBarry Smith y[18*i+15] = sum16; 2528ed1c418cSBarry Smith y[18*i+16] = sum17; 2529ed1c418cSBarry Smith y[18*i+17] = sum18; 2530ed1c418cSBarry Smith } 2531ed1c418cSBarry Smith 2532dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 25333649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2534ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2535ed1c418cSBarry Smith PetscFunctionReturn(0); 2536ed1c418cSBarry Smith } 2537ed1c418cSBarry Smith 2538ed1c418cSBarry Smith #undef __FUNCT__ 2539ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2540ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2541ed1c418cSBarry Smith { 2542ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2543ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2544d2840e09SBarry Smith const PetscScalar *x,*v; 2545d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2546ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2547ed1c418cSBarry Smith PetscErrorCode ierr; 2548d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2549d2840e09SBarry Smith PetscInt n,i; 2550ed1c418cSBarry Smith 2551ed1c418cSBarry Smith PetscFunctionBegin; 2552d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 25533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2554ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2555ed1c418cSBarry Smith 2556ed1c418cSBarry Smith for (i=0; i<m; i++) { 2557ed1c418cSBarry Smith idx = a->j + a->i[i] ; 2558ed1c418cSBarry Smith v = a->a + a->i[i] ; 2559ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2560ed1c418cSBarry Smith alpha1 = x[18*i]; 2561ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2562ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2563ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2564ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2565ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2566ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2567ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2568ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2569ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2570ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2571ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2572ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2573ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2574ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2575ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2576ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2577ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2578ed1c418cSBarry Smith while (n-->0) { 2579ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2580ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2581ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2582ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2583ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2584ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2585ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2586ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2587ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2588ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2589ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2590ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2591ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2592ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2593ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2594ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2595ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2596ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2597ed1c418cSBarry Smith idx++; v++; 2598ed1c418cSBarry Smith } 2599ed1c418cSBarry Smith } 2600dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26013649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2602ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2603ed1c418cSBarry Smith PetscFunctionReturn(0); 2604ed1c418cSBarry Smith } 2605ed1c418cSBarry Smith 2606ed1c418cSBarry Smith #undef __FUNCT__ 2607ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2608ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2609ed1c418cSBarry Smith { 2610ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2611ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2612d2840e09SBarry Smith const PetscScalar *x,*v; 2613d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2614ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2615ed1c418cSBarry Smith PetscErrorCode ierr; 2616d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2617ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2618ed1c418cSBarry Smith 2619ed1c418cSBarry Smith PetscFunctionBegin; 2620ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 26213649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2622ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2623ed1c418cSBarry Smith idx = a->j; 2624ed1c418cSBarry Smith v = a->a; 2625ed1c418cSBarry Smith ii = a->i; 2626ed1c418cSBarry Smith 2627ed1c418cSBarry Smith for (i=0; i<m; i++) { 2628ed1c418cSBarry Smith jrow = ii[i]; 2629ed1c418cSBarry Smith n = ii[i+1] - jrow; 2630ed1c418cSBarry Smith sum1 = 0.0; 2631ed1c418cSBarry Smith sum2 = 0.0; 2632ed1c418cSBarry Smith sum3 = 0.0; 2633ed1c418cSBarry Smith sum4 = 0.0; 2634ed1c418cSBarry Smith sum5 = 0.0; 2635ed1c418cSBarry Smith sum6 = 0.0; 2636ed1c418cSBarry Smith sum7 = 0.0; 2637ed1c418cSBarry Smith sum8 = 0.0; 2638ed1c418cSBarry Smith sum9 = 0.0; 2639ed1c418cSBarry Smith sum10 = 0.0; 2640ed1c418cSBarry Smith sum11 = 0.0; 2641ed1c418cSBarry Smith sum12 = 0.0; 2642ed1c418cSBarry Smith sum13 = 0.0; 2643ed1c418cSBarry Smith sum14 = 0.0; 2644ed1c418cSBarry Smith sum15 = 0.0; 2645ed1c418cSBarry Smith sum16 = 0.0; 2646ed1c418cSBarry Smith sum17 = 0.0; 2647ed1c418cSBarry Smith sum18 = 0.0; 2648ed1c418cSBarry Smith for (j=0; j<n; j++) { 2649ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2650ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2651ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2652ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2653ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2654ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2655ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2656ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2657ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2658ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2659ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2660ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2661ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2662ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2663ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2664ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2665ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2666ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2667ed1c418cSBarry Smith jrow++; 2668ed1c418cSBarry Smith } 2669ed1c418cSBarry Smith y[18*i] += sum1; 2670ed1c418cSBarry Smith y[18*i+1] += sum2; 2671ed1c418cSBarry Smith y[18*i+2] += sum3; 2672ed1c418cSBarry Smith y[18*i+3] += sum4; 2673ed1c418cSBarry Smith y[18*i+4] += sum5; 2674ed1c418cSBarry Smith y[18*i+5] += sum6; 2675ed1c418cSBarry Smith y[18*i+6] += sum7; 2676ed1c418cSBarry Smith y[18*i+7] += sum8; 2677ed1c418cSBarry Smith y[18*i+8] += sum9; 2678ed1c418cSBarry Smith y[18*i+9] += sum10; 2679ed1c418cSBarry Smith y[18*i+10] += sum11; 2680ed1c418cSBarry Smith y[18*i+11] += sum12; 2681ed1c418cSBarry Smith y[18*i+12] += sum13; 2682ed1c418cSBarry Smith y[18*i+13] += sum14; 2683ed1c418cSBarry Smith y[18*i+14] += sum15; 2684ed1c418cSBarry Smith y[18*i+15] += sum16; 2685ed1c418cSBarry Smith y[18*i+16] += sum17; 2686ed1c418cSBarry Smith y[18*i+17] += sum18; 2687ed1c418cSBarry Smith } 2688ed1c418cSBarry Smith 2689dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26903649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2691ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2692ed1c418cSBarry Smith PetscFunctionReturn(0); 2693ed1c418cSBarry Smith } 2694ed1c418cSBarry Smith 2695ed1c418cSBarry Smith #undef __FUNCT__ 2696ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2697ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2698ed1c418cSBarry Smith { 2699ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2700ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2701d2840e09SBarry Smith const PetscScalar *x,*v; 2702d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2703ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2704ed1c418cSBarry Smith PetscErrorCode ierr; 2705d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2706d2840e09SBarry Smith PetscInt n,i; 2707ed1c418cSBarry Smith 2708ed1c418cSBarry Smith PetscFunctionBegin; 2709ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 27103649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2711ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2712ed1c418cSBarry Smith for (i=0; i<m; i++) { 2713ed1c418cSBarry Smith idx = a->j + a->i[i] ; 2714ed1c418cSBarry Smith v = a->a + a->i[i] ; 2715ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2716ed1c418cSBarry Smith alpha1 = x[18*i]; 2717ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2718ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2719ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2720ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2721ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2722ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2723ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2724ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2725ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2726ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2727ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2728ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2729ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2730ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2731ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2732ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2733ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2734ed1c418cSBarry Smith while (n-->0) { 2735ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2736ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2737ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2738ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2739ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2740ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2741ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2742ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2743ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2744ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2745ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2746ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2747ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2748ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2749ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2750ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2751ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2752ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2753ed1c418cSBarry Smith idx++; v++; 2754ed1c418cSBarry Smith } 2755ed1c418cSBarry Smith } 2756dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 27573649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2758ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2759ed1c418cSBarry Smith PetscFunctionReturn(0); 2760ed1c418cSBarry Smith } 2761ed1c418cSBarry Smith 27626861f193SBarry Smith #undef __FUNCT__ 27636861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N" 27646861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 27656861f193SBarry Smith { 27666861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27676861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27686861f193SBarry Smith const PetscScalar *x,*v; 27696861f193SBarry Smith PetscScalar *y,*sums; 27706861f193SBarry Smith PetscErrorCode ierr; 27716861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27726861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 27736861f193SBarry Smith 27746861f193SBarry Smith PetscFunctionBegin; 27753649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27766861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 27776861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 27786861f193SBarry Smith idx = a->j; 27796861f193SBarry Smith v = a->a; 27806861f193SBarry Smith ii = a->i; 27816861f193SBarry Smith 27826861f193SBarry Smith for (i=0; i<m; i++) { 27836861f193SBarry Smith jrow = ii[i]; 27846861f193SBarry Smith n = ii[i+1] - jrow; 27856861f193SBarry Smith sums = y + dof*i; 27866861f193SBarry Smith for (j=0; j<n; j++) { 27876861f193SBarry Smith for (k=0; k<dof; k++) { 27886861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 27896861f193SBarry Smith } 27906861f193SBarry Smith jrow++; 27916861f193SBarry Smith } 27926861f193SBarry Smith } 27936861f193SBarry Smith 27946861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27953649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27966861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27976861f193SBarry Smith PetscFunctionReturn(0); 27986861f193SBarry Smith } 27996861f193SBarry Smith 28006861f193SBarry Smith #undef __FUNCT__ 28016861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 28026861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 28036861f193SBarry Smith { 28046861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 28056861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28066861f193SBarry Smith const PetscScalar *x,*v; 28076861f193SBarry Smith PetscScalar *y,*sums; 28086861f193SBarry Smith PetscErrorCode ierr; 28096861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 28106861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 28116861f193SBarry Smith 28126861f193SBarry Smith PetscFunctionBegin; 28136861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 28143649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 28156861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 28166861f193SBarry Smith idx = a->j; 28176861f193SBarry Smith v = a->a; 28186861f193SBarry Smith ii = a->i; 28196861f193SBarry Smith 28206861f193SBarry Smith for (i=0; i<m; i++) { 28216861f193SBarry Smith jrow = ii[i]; 28226861f193SBarry Smith n = ii[i+1] - jrow; 28236861f193SBarry Smith sums = y + dof*i; 28246861f193SBarry Smith for (j=0; j<n; j++) { 28256861f193SBarry Smith for (k=0; k<dof; k++) { 28266861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 28276861f193SBarry Smith } 28286861f193SBarry Smith jrow++; 28296861f193SBarry Smith } 28306861f193SBarry Smith } 28316861f193SBarry Smith 28326861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28333649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28346861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 28356861f193SBarry Smith PetscFunctionReturn(0); 28366861f193SBarry Smith } 28376861f193SBarry Smith 28386861f193SBarry Smith #undef __FUNCT__ 28396861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 28406861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 28416861f193SBarry Smith { 28426861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 28436861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28446861f193SBarry Smith const PetscScalar *x,*v,*alpha; 28456861f193SBarry Smith PetscScalar *y; 28466861f193SBarry Smith PetscErrorCode ierr; 28476861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 28486861f193SBarry Smith PetscInt n,i,k; 28496861f193SBarry Smith 28506861f193SBarry Smith PetscFunctionBegin; 28513649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 28526861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 28536861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 28546861f193SBarry Smith for (i=0; i<m; i++) { 28556861f193SBarry Smith idx = a->j + a->i[i] ; 28566861f193SBarry Smith v = a->a + a->i[i] ; 28576861f193SBarry Smith n = a->i[i+1] - a->i[i]; 28586861f193SBarry Smith alpha = x + dof*i; 28596861f193SBarry Smith while (n-->0) { 28606861f193SBarry Smith for (k=0; k<dof; k++) { 28616861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 28626861f193SBarry Smith } 286383ce7366SBarry Smith idx++; v++; 28646861f193SBarry Smith } 28656861f193SBarry Smith } 28666861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28673649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28686861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 28696861f193SBarry Smith PetscFunctionReturn(0); 28706861f193SBarry Smith } 28716861f193SBarry Smith 28726861f193SBarry Smith #undef __FUNCT__ 28736861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 28746861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 28756861f193SBarry Smith { 28766861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 28776861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28786861f193SBarry Smith const PetscScalar *x,*v,*alpha; 28796861f193SBarry Smith PetscScalar *y; 28806861f193SBarry Smith PetscErrorCode ierr; 28816861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 28826861f193SBarry Smith PetscInt n,i,k; 28836861f193SBarry Smith 28846861f193SBarry Smith PetscFunctionBegin; 28856861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 28863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 28876861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 28886861f193SBarry Smith for (i=0; i<m; i++) { 28896861f193SBarry Smith idx = a->j + a->i[i] ; 28906861f193SBarry Smith v = a->a + a->i[i] ; 28916861f193SBarry Smith n = a->i[i+1] - a->i[i]; 28926861f193SBarry Smith alpha = x + dof*i; 28936861f193SBarry Smith while (n-->0) { 28946861f193SBarry Smith for (k=0; k<dof; k++) { 28956861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 28966861f193SBarry Smith } 289783ce7366SBarry Smith idx++; v++; 28986861f193SBarry Smith } 28996861f193SBarry Smith } 29006861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 29013649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 29026861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 29036861f193SBarry Smith PetscFunctionReturn(0); 29046861f193SBarry Smith } 29056861f193SBarry Smith 2906f4a53059SBarry Smith /*===================================================================================*/ 29074a2ae208SSatish Balay #undef __FUNCT__ 29084a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2909dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2910f4a53059SBarry Smith { 29114cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2912dfbe8321SBarry Smith PetscErrorCode ierr; 2913f4a53059SBarry Smith 2914b24ad042SBarry Smith PetscFunctionBegin; 2915f4a53059SBarry Smith /* start the scatter */ 2916ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 29174cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2918ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 29194cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2920f4a53059SBarry Smith PetscFunctionReturn(0); 2921f4a53059SBarry Smith } 2922f4a53059SBarry Smith 29234a2ae208SSatish Balay #undef __FUNCT__ 29244a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2925dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 29264cb9d9b8SBarry Smith { 29274cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2928dfbe8321SBarry Smith PetscErrorCode ierr; 2929b24ad042SBarry Smith 29304cb9d9b8SBarry Smith PetscFunctionBegin; 29314cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 29324cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2933ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2934ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 29354cb9d9b8SBarry Smith PetscFunctionReturn(0); 29364cb9d9b8SBarry Smith } 29374cb9d9b8SBarry Smith 29384a2ae208SSatish Balay #undef __FUNCT__ 29394a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2940dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 29414cb9d9b8SBarry Smith { 29424cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2943dfbe8321SBarry Smith PetscErrorCode ierr; 29444cb9d9b8SBarry Smith 2945b24ad042SBarry Smith PetscFunctionBegin; 29464cb9d9b8SBarry Smith /* start the scatter */ 2947ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2948d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2949ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2950717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 29514cb9d9b8SBarry Smith PetscFunctionReturn(0); 29524cb9d9b8SBarry Smith } 29534cb9d9b8SBarry Smith 29544a2ae208SSatish Balay #undef __FUNCT__ 29554a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2956dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 29574cb9d9b8SBarry Smith { 29584cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2959dfbe8321SBarry Smith PetscErrorCode ierr; 2960b24ad042SBarry Smith 29614cb9d9b8SBarry Smith PetscFunctionBegin; 29624cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2963ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2964d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2965ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 29664cb9d9b8SBarry Smith PetscFunctionReturn(0); 29674cb9d9b8SBarry Smith } 29684cb9d9b8SBarry Smith 296995ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 29709c4fc4c7SBarry Smith #undef __FUNCT__ 29717ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 29727ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 29737ba1a0bfSKris Buschelman { 29747ba1a0bfSKris Buschelman /* This routine requires testing -- but it's getting better. */ 29757ba1a0bfSKris Buschelman PetscErrorCode ierr; 2976a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 29777ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 29787ba1a0bfSKris Buschelman Mat P=pp->AIJ; 29797ba1a0bfSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2980d2840e09SBarry Smith PetscInt *pti,*ptj,*ptJ; 29817ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2982d2840e09SBarry Smith const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2983d2840e09SBarry Smith PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 29847ba1a0bfSKris Buschelman MatScalar *ca; 2985d2840e09SBarry Smith const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 29867ba1a0bfSKris Buschelman 29877ba1a0bfSKris Buschelman PetscFunctionBegin; 29887ba1a0bfSKris Buschelman /* Start timer */ 29897ba1a0bfSKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 29907ba1a0bfSKris Buschelman 29917ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 29927ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 29937ba1a0bfSKris Buschelman 29947ba1a0bfSKris Buschelman cn = pn*ppdof; 29957ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 29967ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 29977ba1a0bfSKris Buschelman ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 29987ba1a0bfSKris Buschelman ci[0] = 0; 29997ba1a0bfSKris Buschelman 30007ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 300174ed9c26SBarry Smith ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 3002c43dabc9SBarry Smith ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 3003c43dabc9SBarry Smith ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 30047ba1a0bfSKris Buschelman 30057ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 30067ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 30077ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3008a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 30097ba1a0bfSKris Buschelman current_space = free_space; 30107ba1a0bfSKris Buschelman 30117ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 30127ba1a0bfSKris Buschelman for (i=0;i<pn;i++) { 30137ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 30147ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 30157ba1a0bfSKris Buschelman for (dof=0;dof<ppdof;dof++) { 30167ba1a0bfSKris Buschelman ptanzi = 0; 30177ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 30187ba1a0bfSKris Buschelman for (j=0;j<ptnzi;j++) { 30197ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 30207ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 30217ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 30227ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 30237ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 30247ba1a0bfSKris Buschelman for (k=0;k<anzj;k++) { 30257ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 30267ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 30277ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 30287ba1a0bfSKris Buschelman } 30297ba1a0bfSKris Buschelman } 30307ba1a0bfSKris Buschelman } 30317ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 30327ba1a0bfSKris Buschelman ptaj = ptasparserow; 30337ba1a0bfSKris Buschelman cnzi = 0; 30347ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 30357ba1a0bfSKris Buschelman /* Get offset within block of P */ 30367ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 30377ba1a0bfSKris Buschelman /* Get block row of P */ 30387ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 30397ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 30407ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 30417ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30427ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 30437ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 30447ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 30457ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 30467ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 30477ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 30487ba1a0bfSKris Buschelman } 30497ba1a0bfSKris Buschelman } 30507ba1a0bfSKris Buschelman } 30517ba1a0bfSKris Buschelman 30527ba1a0bfSKris Buschelman /* sort sparserow */ 30537ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 30547ba1a0bfSKris Buschelman 30557ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 30567ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 30577ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 30584238b7adSHong Zhang ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 30597ba1a0bfSKris Buschelman } 30607ba1a0bfSKris Buschelman 30617ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 30627ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 30637ba1a0bfSKris Buschelman current_space->array += cnzi; 30647ba1a0bfSKris Buschelman current_space->local_used += cnzi; 30657ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 30667ba1a0bfSKris Buschelman 30677ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 30687ba1a0bfSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 30697ba1a0bfSKris Buschelman } 30707ba1a0bfSKris Buschelman for (j=0;j<cnzi;j++) { 30717ba1a0bfSKris Buschelman denserow[sparserow[j]] = 0; 30727ba1a0bfSKris Buschelman } 30737ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 30747ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 30757ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 30767ba1a0bfSKris Buschelman } 30777ba1a0bfSKris Buschelman } 30787ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 30797ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 30807ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 30817ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3082a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 308374ed9c26SBarry Smith ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 30847ba1a0bfSKris Buschelman 30857ba1a0bfSKris Buschelman /* Allocate space for ca */ 30867ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 30877ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 30887ba1a0bfSKris Buschelman 30897ba1a0bfSKris Buschelman /* put together the new matrix */ 30907adad957SLisandro Dalcin ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 30917ba1a0bfSKris Buschelman 30927ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 30937ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 30947ba1a0bfSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 3095e6b907acSBarry Smith c->free_a = PETSC_TRUE; 3096e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 30977ba1a0bfSKris Buschelman c->nonew = 0; 30987ba1a0bfSKris Buschelman 30997ba1a0bfSKris Buschelman /* Clean up. */ 31007ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 31017ba1a0bfSKris Buschelman 31027ba1a0bfSKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 31037ba1a0bfSKris Buschelman PetscFunctionReturn(0); 31047ba1a0bfSKris Buschelman } 31057ba1a0bfSKris Buschelman 31067ba1a0bfSKris Buschelman #undef __FUNCT__ 31077ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 31087ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 31097ba1a0bfSKris Buschelman { 31107ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 31117ba1a0bfSKris Buschelman PetscErrorCode ierr; 31127ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 31137ba1a0bfSKris Buschelman Mat P=pp->AIJ; 31147ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 31157ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 31167ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 3117d2840e09SBarry Smith const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 3118d2840e09SBarry Smith const PetscInt *ci=c->i,*cj=c->j,*cjj; 3119d2840e09SBarry Smith const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3120d2840e09SBarry Smith PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3121d2840e09SBarry Smith const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj; 3122d2840e09SBarry Smith MatScalar *ca=c->a,*caj,*apa; 31237ba1a0bfSKris Buschelman 31247ba1a0bfSKris Buschelman PetscFunctionBegin; 31257ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 312674ed9c26SBarry Smith ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 312774ed9c26SBarry Smith ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 312874ed9c26SBarry Smith ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 312974ed9c26SBarry Smith ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 31307ba1a0bfSKris Buschelman 31317ba1a0bfSKris Buschelman /* Clear old values in C */ 31327ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 31337ba1a0bfSKris Buschelman 31347ba1a0bfSKris Buschelman for (i=0;i<am;i++) { 31357ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 31367ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 31377ba1a0bfSKris Buschelman apnzj = 0; 31387ba1a0bfSKris Buschelman for (j=0;j<anzi;j++) { 31397ba1a0bfSKris Buschelman /* Get offset within block of P */ 31407ba1a0bfSKris Buschelman pshift = *aj%ppdof; 31417ba1a0bfSKris Buschelman /* Get block row of P */ 31427ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 31437ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 31447ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 31457ba1a0bfSKris Buschelman paj = pa + pi[prow]; 31467ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 31477ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 31487ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 31497ba1a0bfSKris Buschelman apjdense[poffset] = -1; 31507ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 31517ba1a0bfSKris Buschelman } 31527ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 31537ba1a0bfSKris Buschelman } 3154dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 31557ba1a0bfSKris Buschelman aa++; 31567ba1a0bfSKris Buschelman } 31577ba1a0bfSKris Buschelman 31587ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 31597ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 31607ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 31617ba1a0bfSKris Buschelman 31627ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 31637ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 31647ba1a0bfSKris Buschelman pshift = i%ppdof; 31657ba1a0bfSKris Buschelman poffset = pi[prow]; 31667ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 31677ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 31687ba1a0bfSKris Buschelman pJ = pj+poffset; 31697ba1a0bfSKris Buschelman pA = pa+poffset; 31707ba1a0bfSKris Buschelman for (j=0;j<pnzi;j++) { 31717ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 31727ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 31737ba1a0bfSKris Buschelman caj = ca + ci[crow]; 31747ba1a0bfSKris Buschelman pJ++; 31757ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 31767ba1a0bfSKris Buschelman for (k=0,nextap=0;nextap<apnzj;k++) { 31777ba1a0bfSKris Buschelman if (cjj[k]==apj[nextap]) { 31787ba1a0bfSKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 31797ba1a0bfSKris Buschelman } 31807ba1a0bfSKris Buschelman } 3181dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 31827ba1a0bfSKris Buschelman pA++; 31837ba1a0bfSKris Buschelman } 31847ba1a0bfSKris Buschelman 31857ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 31867ba1a0bfSKris Buschelman for (j=0;j<apnzj;j++) { 31877ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 31887ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 31897ba1a0bfSKris Buschelman } 31907ba1a0bfSKris Buschelman } 31917ba1a0bfSKris Buschelman 31927ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 31937ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31947ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 319574ed9c26SBarry Smith ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 319695ddefa2SHong Zhang PetscFunctionReturn(0); 319795ddefa2SHong Zhang } 31987ba1a0bfSKris Buschelman 319995ddefa2SHong Zhang #undef __FUNCT__ 320095ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 320195ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 320295ddefa2SHong Zhang { 320395ddefa2SHong Zhang PetscErrorCode ierr; 320495ddefa2SHong Zhang 320595ddefa2SHong Zhang PetscFunctionBegin; 320695ddefa2SHong Zhang /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 320795ddefa2SHong Zhang ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 320895ddefa2SHong Zhang ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 320995ddefa2SHong Zhang PetscFunctionReturn(0); 321095ddefa2SHong Zhang } 321195ddefa2SHong Zhang 321295ddefa2SHong Zhang #undef __FUNCT__ 321395ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 321495ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 321595ddefa2SHong Zhang { 321695ddefa2SHong Zhang PetscFunctionBegin; 3217e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 32187ba1a0bfSKris Buschelman PetscFunctionReturn(0); 32197ba1a0bfSKris Buschelman } 32207ba1a0bfSKris Buschelman 3221be1d678aSKris Buschelman EXTERN_C_BEGIN 32227ba1a0bfSKris Buschelman #undef __FUNCT__ 32230fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3224f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32259c4fc4c7SBarry Smith { 32269c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3227ceb03754SKris Buschelman Mat a = b->AIJ,B; 32289c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 32299c4fc4c7SBarry Smith PetscErrorCode ierr; 32300fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3231ba8c8a56SBarry Smith PetscInt *cols; 3232ba8c8a56SBarry Smith PetscScalar *vals; 32339c4fc4c7SBarry Smith 32349c4fc4c7SBarry Smith PetscFunctionBegin; 32359c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 32367c307921SBarry Smith ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 32379c4fc4c7SBarry Smith for (i=0; i<m; i++) { 32389c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 32390fd73130SBarry Smith for (j=0; j<dof; j++) { 32400fd73130SBarry Smith ilen[dof*i+j] = aij->ilen[i]; 32419c4fc4c7SBarry Smith } 32429c4fc4c7SBarry Smith } 3243ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 32449c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 32459c4fc4c7SBarry Smith ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 32469c4fc4c7SBarry Smith ii = 0; 32479c4fc4c7SBarry Smith for (i=0; i<m; i++) { 3248ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32490fd73130SBarry Smith for (j=0; j<dof; j++) { 32509c4fc4c7SBarry Smith for (k=0; k<ncols; k++) { 32510fd73130SBarry Smith icols[k] = dof*cols[k]+j; 32529c4fc4c7SBarry Smith } 3253ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 32549c4fc4c7SBarry Smith ii++; 32559c4fc4c7SBarry Smith } 3256ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32579c4fc4c7SBarry Smith } 32589c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 3259ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3260ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3261ceb03754SKris Buschelman 3262ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 32638ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3264c3d102feSKris Buschelman } else { 3265ceb03754SKris Buschelman *newmat = B; 3266c3d102feSKris Buschelman } 32679c4fc4c7SBarry Smith PetscFunctionReturn(0); 32689c4fc4c7SBarry Smith } 3269be1d678aSKris Buschelman EXTERN_C_END 32709c4fc4c7SBarry Smith 32717c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h" 3272be1d678aSKris Buschelman 3273be1d678aSKris Buschelman EXTERN_C_BEGIN 32740fd73130SBarry Smith #undef __FUNCT__ 32750fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3276f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32770fd73130SBarry Smith { 32780fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3279ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 32800fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 32810fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 32820fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 32830fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3284910ba992SMatthew Knepley PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3285910ba992SMatthew Knepley PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 32860fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 32870fd73130SBarry Smith PetscErrorCode ierr; 32880fd73130SBarry Smith PetscScalar *vals,*ovals; 32890fd73130SBarry Smith 32900fd73130SBarry Smith PetscFunctionBegin; 3291d0f46423SBarry Smith ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3292d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32930fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 32940fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 32950fd73130SBarry Smith for (j=0; j<dof; j++) { 32960fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 32970fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 32980fd73130SBarry Smith } 32990fd73130SBarry Smith } 3300d0f46423SBarry Smith ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 33010fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 33020fd73130SBarry Smith 33037a1a7badSBarry Smith ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3304d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 3305d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 33060fd73130SBarry Smith garray = mpiaij->garray; 33070fd73130SBarry Smith 33080fd73130SBarry Smith ii = rstart; 3309d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 33100fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 33110fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 33120fd73130SBarry Smith for (j=0; j<dof; j++) { 33130fd73130SBarry Smith for (k=0; k<ncols; k++) { 33140fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 33150fd73130SBarry Smith } 33160fd73130SBarry Smith for (k=0; k<oncols; k++) { 33170fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 33180fd73130SBarry Smith } 3319ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3320ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 33210fd73130SBarry Smith ii++; 33220fd73130SBarry Smith } 33230fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 33240fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 33250fd73130SBarry Smith } 33260fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 33270fd73130SBarry Smith 3328ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3330ceb03754SKris Buschelman 3331ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 33327adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 33337adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 33348ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 33357adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3336c3d102feSKris Buschelman } else { 3337ceb03754SKris Buschelman *newmat = B; 3338c3d102feSKris Buschelman } 33390fd73130SBarry Smith PetscFunctionReturn(0); 33400fd73130SBarry Smith } 3341be1d678aSKris Buschelman EXTERN_C_END 33420fd73130SBarry Smith 33430fd73130SBarry Smith 3344bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3345ff585edeSJed Brown #undef __FUNCT__ 3346ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ" 3347ff585edeSJed Brown /*@C 33480bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 33490bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 33500bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 33510bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 33520bad9183SKris Buschelman 3353ff585edeSJed Brown Collective 3354ff585edeSJed Brown 3355ff585edeSJed Brown Input Parameters: 3356ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3357ff585edeSJed Brown - dof - the block size (number of components per node) 3358ff585edeSJed Brown 3359ff585edeSJed Brown Output Parameter: 3360ff585edeSJed Brown . maij - the new MAIJ matrix 3361ff585edeSJed Brown 33620bad9183SKris Buschelman Operations provided: 33630fd73130SBarry Smith + MatMult 33640bad9183SKris Buschelman . MatMultTranspose 33650bad9183SKris Buschelman . MatMultAdd 33660bad9183SKris Buschelman . MatMultTransposeAdd 33670fd73130SBarry Smith - MatView 33680bad9183SKris Buschelman 33690bad9183SKris Buschelman Level: advanced 33700bad9183SKris Buschelman 3371ff585edeSJed Brown .seealso: MatMAIJGetAIJ(), MatMAIJRedimension() 3372ff585edeSJed Brown @*/ 3373be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 337482b1193eSBarry Smith { 3375dfbe8321SBarry Smith PetscErrorCode ierr; 3376b24ad042SBarry Smith PetscMPIInt size; 3377b24ad042SBarry Smith PetscInt n; 337882b1193eSBarry Smith Mat B; 337982b1193eSBarry Smith 338082b1193eSBarry Smith PetscFunctionBegin; 3381d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3382d72c5c08SBarry Smith 3383d72c5c08SBarry Smith if (dof == 1) { 3384d72c5c08SBarry Smith *maij = A; 3385d72c5c08SBarry Smith } else { 33867adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3387d0f46423SBarry Smith ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3388bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3389bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3390bccba955SJed Brown ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3391bccba955SJed Brown ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3392cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3393d72c5c08SBarry Smith 33947adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3395f4a53059SBarry Smith if (size == 1) { 3396feb9fe23SJed Brown Mat_SeqMAIJ *b; 3397feb9fe23SJed Brown 3398b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 33994cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 34000fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 3401feb9fe23SJed Brown b = (Mat_SeqMAIJ*)B->data; 3402b9b97703SBarry Smith b->dof = dof; 34034cb9d9b8SBarry Smith b->AIJ = A; 3404d72c5c08SBarry Smith if (dof == 2) { 3405cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3406cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3407cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3408cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3409bcc973b7SBarry Smith } else if (dof == 3) { 3410bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3411bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3412bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3413bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3414bcc973b7SBarry Smith } else if (dof == 4) { 3415bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3416bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3417bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3418bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3419f9fae5adSBarry Smith } else if (dof == 5) { 3420f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3421f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3422f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3423f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 34246cd98798SBarry Smith } else if (dof == 6) { 34256cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 34266cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 34276cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 34286cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3429ed8eea03SBarry Smith } else if (dof == 7) { 3430ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3431ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3432ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3433ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 343466ed3db0SBarry Smith } else if (dof == 8) { 343566ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 343666ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 343766ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 343866ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 34392b692628SMatthew Knepley } else if (dof == 9) { 34402b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 34412b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 34422b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 34432b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3444b9cda22cSBarry Smith } else if (dof == 10) { 3445b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3446b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3447b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3448b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3449dbdd7285SBarry Smith } else if (dof == 11) { 3450dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3451dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3452dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3453dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 34542f7816d4SBarry Smith } else if (dof == 16) { 34552f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 34562f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 34572f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 34582f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3459ed1c418cSBarry Smith } else if (dof == 18) { 3460ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3461ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3462ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3463ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 346482b1193eSBarry Smith } else { 34656861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 34666861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 34676861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 34686861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 346982b1193eSBarry Smith } 34707ba1a0bfSKris Buschelman B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 34717ba1a0bfSKris Buschelman B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 34729c4fc4c7SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3473f4a53059SBarry Smith } else { 3474f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3475feb9fe23SJed Brown Mat_MPIMAIJ *b; 3476f4a53059SBarry Smith IS from,to; 3477f4a53059SBarry Smith Vec gvec; 3478b24ad042SBarry Smith PetscInt *garray,i; 3479f4a53059SBarry Smith 3480b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3481d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 34820fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 3483b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3484b9b97703SBarry Smith b->dof = dof; 3485b9b97703SBarry Smith b->A = A; 34864cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 34874cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 34884cb9d9b8SBarry Smith 3489f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3490f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 349113288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3492f4a53059SBarry Smith 3493f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 3494b24ad042SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 3495f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 34967adad957SLisandro Dalcin ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 3497f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 3498f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3499f4a53059SBarry Smith 3500f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 3501d0f46423SBarry Smith ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 350213288a74SBarry Smith ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3503f4a53059SBarry Smith 3504f4a53059SBarry Smith /* generate the scatter context */ 3505f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3506f4a53059SBarry Smith 3507f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 3508f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 3509f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 3510f4a53059SBarry Smith 3511f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 35124cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 35134cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 35144cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 351595ddefa2SHong Zhang B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 351695ddefa2SHong Zhang B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 35170fd73130SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3518f4a53059SBarry Smith } 3519cd3bbe55SBarry Smith *maij = B; 35200fd73130SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 3521d72c5c08SBarry Smith } 352282b1193eSBarry Smith PetscFunctionReturn(0); 352382b1193eSBarry Smith } 3524