1be1d678aSKris Buschelman 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 21b45d2f2cSJed Brown #include <petsc-private/vecimpl.h> 2282b1193eSBarry Smith 234a2ae208SSatish Balay #undef __FUNCT__ 244a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 25ff585edeSJed Brown /*@C 26ff585edeSJed Brown MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 27ff585edeSJed Brown 28ff585edeSJed Brown Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 29ff585edeSJed Brown 30ff585edeSJed Brown Input Parameter: 31ff585edeSJed Brown . A - the MAIJ matrix 32ff585edeSJed Brown 33ff585edeSJed Brown Output Parameter: 34ff585edeSJed Brown . B - the AIJ matrix 35ff585edeSJed Brown 36ff585edeSJed Brown Level: advanced 37ff585edeSJed Brown 38ff585edeSJed Brown Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 39ff585edeSJed Brown 40ff585edeSJed Brown .seealso: MatCreateMAIJ() 41ff585edeSJed Brown @*/ 427087cfbeSBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 43b9b97703SBarry Smith { 44dfbe8321SBarry Smith PetscErrorCode ierr; 45ace3abfcSBarry Smith PetscBool ismpimaij,isseqmaij; 46b9b97703SBarry Smith 47b9b97703SBarry Smith PetscFunctionBegin; 48b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 49b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 50b9b97703SBarry Smith if (ismpimaij) { 51b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 52b9b97703SBarry Smith 53b9b97703SBarry Smith *B = b->A; 54b9b97703SBarry Smith } else if (isseqmaij) { 55b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 56b9b97703SBarry Smith 57b9b97703SBarry Smith *B = b->AIJ; 58b9b97703SBarry Smith } else { 59b9b97703SBarry Smith *B = A; 60b9b97703SBarry Smith } 61b9b97703SBarry Smith PetscFunctionReturn(0); 62b9b97703SBarry Smith } 63b9b97703SBarry Smith 644a2ae208SSatish Balay #undef __FUNCT__ 654a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 66ff585edeSJed Brown /*@C 67ff585edeSJed Brown MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 68ff585edeSJed Brown 693f9fe445SBarry Smith Logically Collective 70ff585edeSJed Brown 71ff585edeSJed Brown Input Parameter: 72ff585edeSJed Brown + A - the MAIJ matrix 73ff585edeSJed Brown - dof - the block size for the new matrix 74ff585edeSJed Brown 75ff585edeSJed Brown Output Parameter: 76ff585edeSJed Brown . B - the new MAIJ matrix 77ff585edeSJed Brown 78ff585edeSJed Brown Level: advanced 79ff585edeSJed Brown 80ff585edeSJed Brown .seealso: MatCreateMAIJ() 81ff585edeSJed Brown @*/ 827087cfbeSBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 83b9b97703SBarry Smith { 84dfbe8321SBarry Smith PetscErrorCode ierr; 853b98c0a2SBarry Smith Mat Aij = PETSC_NULL; 86b9b97703SBarry Smith 87b9b97703SBarry Smith PetscFunctionBegin; 88c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A,dof,2); 89b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91b9b97703SBarry Smith PetscFunctionReturn(0); 92b9b97703SBarry Smith } 93b9b97703SBarry Smith 944a2ae208SSatish Balay #undef __FUNCT__ 954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 9782b1193eSBarry Smith { 98dfbe8321SBarry Smith PetscErrorCode ierr; 994cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 10082b1193eSBarry Smith 10182b1193eSBarry Smith PetscFunctionBegin; 1026bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 103bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 1044cb9d9b8SBarry Smith PetscFunctionReturn(0); 1054cb9d9b8SBarry Smith } 1064cb9d9b8SBarry Smith 1074a2ae208SSatish Balay #undef __FUNCT__ 1080fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 1090fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 1100fd73130SBarry Smith { 1110fd73130SBarry Smith PetscErrorCode ierr; 1120fd73130SBarry Smith Mat B; 1130fd73130SBarry Smith 1140fd73130SBarry Smith PetscFunctionBegin; 115ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 1160fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1176bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1180fd73130SBarry Smith PetscFunctionReturn(0); 1190fd73130SBarry Smith } 1200fd73130SBarry Smith 1210fd73130SBarry Smith #undef __FUNCT__ 1220fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 1230fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 1240fd73130SBarry Smith { 1250fd73130SBarry Smith PetscErrorCode ierr; 1260fd73130SBarry Smith Mat B; 1270fd73130SBarry Smith 1280fd73130SBarry Smith PetscFunctionBegin; 129ceb03754SKris Buschelman ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 1300fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1316bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 1320fd73130SBarry Smith PetscFunctionReturn(0); 1330fd73130SBarry Smith } 1340fd73130SBarry Smith 1350fd73130SBarry Smith #undef __FUNCT__ 1364a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 137dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1384cb9d9b8SBarry Smith { 139dfbe8321SBarry Smith PetscErrorCode ierr; 1404cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1414cb9d9b8SBarry Smith 1424cb9d9b8SBarry Smith PetscFunctionBegin; 1436bf464f9SBarry Smith ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 1446bf464f9SBarry Smith ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 1456bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 1466bf464f9SBarry Smith ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 1476bf464f9SBarry Smith ierr = VecDestroy(&b->w);CHKERRQ(ierr); 148bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 149dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 150b9b97703SBarry Smith PetscFunctionReturn(0); 151b9b97703SBarry Smith } 152b9b97703SBarry Smith 1530bad9183SKris Buschelman /*MC 154fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1550bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1560bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1570bad9183SKris Buschelman 1580bad9183SKris Buschelman Operations provided: 1590bad9183SKris Buschelman . MatMult 1600bad9183SKris Buschelman . MatMultTranspose 1610bad9183SKris Buschelman . MatMultAdd 1620bad9183SKris Buschelman . MatMultTransposeAdd 1630bad9183SKris Buschelman 1640bad9183SKris Buschelman Level: advanced 1650bad9183SKris Buschelman 166b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 1670bad9183SKris Buschelman M*/ 1680bad9183SKris Buschelman 16982b1193eSBarry Smith EXTERN_C_BEGIN 1704a2ae208SSatish Balay #undef __FUNCT__ 1714a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 1727087cfbeSBarry Smith PetscErrorCode MatCreate_MAIJ(Mat A) 17382b1193eSBarry Smith { 174dfbe8321SBarry Smith PetscErrorCode ierr; 1754cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 17664b52464SHong Zhang PetscMPIInt size; 17782b1193eSBarry Smith 17882b1193eSBarry Smith PetscFunctionBegin; 17938f2d2fdSLisandro Dalcin ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 180b0a32e0cSBarry Smith A->data = (void*)b; 181cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 182f4a53059SBarry Smith 183cd3bbe55SBarry Smith b->AIJ = 0; 184cd3bbe55SBarry Smith b->dof = 0; 185f4a53059SBarry Smith b->OAIJ = 0; 186f4a53059SBarry Smith b->ctx = 0; 187f4a53059SBarry Smith b->w = 0; 1887adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 18964b52464SHong Zhang if (size == 1){ 19064b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 19164b52464SHong Zhang } else { 19264b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 19364b52464SHong Zhang } 19482b1193eSBarry Smith PetscFunctionReturn(0); 19582b1193eSBarry Smith } 19682b1193eSBarry Smith EXTERN_C_END 19782b1193eSBarry Smith 198cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1994a2ae208SSatish Balay #undef __FUNCT__ 200b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 201dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 20282b1193eSBarry Smith { 2034cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 204bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 205d2840e09SBarry Smith const PetscScalar *x,*v; 206d2840e09SBarry Smith PetscScalar *y, sum1, sum2; 207dfbe8321SBarry Smith PetscErrorCode ierr; 208d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 209d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 21082b1193eSBarry Smith 211bcc973b7SBarry Smith PetscFunctionBegin; 2123649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2131ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 214bcc973b7SBarry Smith idx = a->j; 215bcc973b7SBarry Smith v = a->a; 216bcc973b7SBarry Smith ii = a->i; 217bcc973b7SBarry Smith 218bcc973b7SBarry Smith for (i=0; i<m; i++) { 219bcc973b7SBarry Smith jrow = ii[i]; 220bcc973b7SBarry Smith n = ii[i+1] - jrow; 221bcc973b7SBarry Smith sum1 = 0.0; 222bcc973b7SBarry Smith sum2 = 0.0; 223b3c51c66SMatthew Knepley nonzerorow += (n>0); 224bcc973b7SBarry Smith for (j=0; j<n; j++) { 225bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 226bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 227bcc973b7SBarry Smith jrow++; 228bcc973b7SBarry Smith } 229bcc973b7SBarry Smith y[2*i] = sum1; 230bcc973b7SBarry Smith y[2*i+1] = sum2; 231bcc973b7SBarry Smith } 232bcc973b7SBarry Smith 233dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 2343649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2351ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 23682b1193eSBarry Smith PetscFunctionReturn(0); 23782b1193eSBarry Smith } 238bcc973b7SBarry Smith 2394a2ae208SSatish Balay #undef __FUNCT__ 240b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 241dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 24282b1193eSBarry Smith { 2434cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 244bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 245d2840e09SBarry Smith const PetscScalar *x,*v; 246d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 247dfbe8321SBarry Smith PetscErrorCode ierr; 248d2840e09SBarry Smith PetscInt n,i; 249d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 25082b1193eSBarry Smith 251bcc973b7SBarry Smith PetscFunctionBegin; 252d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2541ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 255bfec09a0SHong Zhang 256bcc973b7SBarry Smith for (i=0; i<m; i++) { 257bfec09a0SHong Zhang idx = a->j + a->i[i] ; 258bfec09a0SHong Zhang v = a->a + a->i[i] ; 259bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 260bcc973b7SBarry Smith alpha1 = x[2*i]; 261bcc973b7SBarry Smith alpha2 = x[2*i+1]; 262bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 263bcc973b7SBarry Smith } 264dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 2653649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2661ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 26782b1193eSBarry Smith PetscFunctionReturn(0); 26882b1193eSBarry Smith } 269bcc973b7SBarry Smith 2704a2ae208SSatish Balay #undef __FUNCT__ 271b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 272dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 27382b1193eSBarry Smith { 2744cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 275bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 276d2840e09SBarry Smith const PetscScalar *x,*v; 277d2840e09SBarry Smith PetscScalar *y,sum1, sum2; 278dfbe8321SBarry Smith PetscErrorCode ierr; 279b24ad042SBarry Smith PetscInt n,i,jrow,j; 280d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 28182b1193eSBarry Smith 282bcc973b7SBarry Smith PetscFunctionBegin; 283f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2843649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2851ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 286bcc973b7SBarry Smith idx = a->j; 287bcc973b7SBarry Smith v = a->a; 288bcc973b7SBarry Smith ii = a->i; 289bcc973b7SBarry Smith 290bcc973b7SBarry Smith for (i=0; i<m; i++) { 291bcc973b7SBarry Smith jrow = ii[i]; 292bcc973b7SBarry Smith n = ii[i+1] - jrow; 293bcc973b7SBarry Smith sum1 = 0.0; 294bcc973b7SBarry Smith sum2 = 0.0; 295bcc973b7SBarry Smith for (j=0; j<n; j++) { 296bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 297bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 298bcc973b7SBarry Smith jrow++; 299bcc973b7SBarry Smith } 300bcc973b7SBarry Smith y[2*i] += sum1; 301bcc973b7SBarry Smith y[2*i+1] += sum2; 302bcc973b7SBarry Smith } 303bcc973b7SBarry Smith 304dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3061ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 307bcc973b7SBarry Smith PetscFunctionReturn(0); 30882b1193eSBarry Smith } 3094a2ae208SSatish Balay #undef __FUNCT__ 310b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 311dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 31282b1193eSBarry Smith { 3134cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 314bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 315d2840e09SBarry Smith const PetscScalar *x,*v; 316d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2; 317dfbe8321SBarry Smith PetscErrorCode ierr; 318d2840e09SBarry Smith PetscInt n,i; 319d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 32082b1193eSBarry Smith 321bcc973b7SBarry Smith PetscFunctionBegin; 322f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3233649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3241ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 325bfec09a0SHong Zhang 326bcc973b7SBarry Smith for (i=0; i<m; i++) { 327bfec09a0SHong Zhang idx = a->j + a->i[i] ; 328bfec09a0SHong Zhang v = a->a + a->i[i] ; 329bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 330bcc973b7SBarry Smith alpha1 = x[2*i]; 331bcc973b7SBarry Smith alpha2 = x[2*i+1]; 332bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 333bcc973b7SBarry Smith } 334dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3353649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3361ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 337bcc973b7SBarry Smith PetscFunctionReturn(0); 33882b1193eSBarry Smith } 339cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3404a2ae208SSatish Balay #undef __FUNCT__ 341b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 342dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 343bcc973b7SBarry Smith { 3444cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 345bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 346d2840e09SBarry Smith const PetscScalar *x,*v; 347d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 348dfbe8321SBarry Smith PetscErrorCode ierr; 349d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 350d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 35182b1193eSBarry Smith 352bcc973b7SBarry Smith PetscFunctionBegin; 3533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3541ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 355bcc973b7SBarry Smith idx = a->j; 356bcc973b7SBarry Smith v = a->a; 357bcc973b7SBarry Smith ii = a->i; 358bcc973b7SBarry Smith 359bcc973b7SBarry Smith for (i=0; i<m; i++) { 360bcc973b7SBarry Smith jrow = ii[i]; 361bcc973b7SBarry Smith n = ii[i+1] - jrow; 362bcc973b7SBarry Smith sum1 = 0.0; 363bcc973b7SBarry Smith sum2 = 0.0; 364bcc973b7SBarry Smith sum3 = 0.0; 365b3c51c66SMatthew Knepley nonzerorow += (n>0); 366bcc973b7SBarry Smith for (j=0; j<n; j++) { 367bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 368bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 369bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 370bcc973b7SBarry Smith jrow++; 371bcc973b7SBarry Smith } 372bcc973b7SBarry Smith y[3*i] = sum1; 373bcc973b7SBarry Smith y[3*i+1] = sum2; 374bcc973b7SBarry Smith y[3*i+2] = sum3; 375bcc973b7SBarry Smith } 376bcc973b7SBarry Smith 377dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 3783649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3791ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 380bcc973b7SBarry Smith PetscFunctionReturn(0); 381bcc973b7SBarry Smith } 382bcc973b7SBarry Smith 3834a2ae208SSatish Balay #undef __FUNCT__ 384b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 385dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 386bcc973b7SBarry Smith { 3874cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 388bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 389d2840e09SBarry Smith const PetscScalar *x,*v; 390d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 391dfbe8321SBarry Smith PetscErrorCode ierr; 392d2840e09SBarry Smith PetscInt n,i; 393d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 394bcc973b7SBarry Smith 395bcc973b7SBarry Smith PetscFunctionBegin; 396d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 3973649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3981ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 399bfec09a0SHong Zhang 400bcc973b7SBarry Smith for (i=0; i<m; i++) { 401bfec09a0SHong Zhang idx = a->j + a->i[i]; 402bfec09a0SHong Zhang v = a->a + a->i[i]; 403bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 404bcc973b7SBarry Smith alpha1 = x[3*i]; 405bcc973b7SBarry Smith alpha2 = x[3*i+1]; 406bcc973b7SBarry Smith alpha3 = x[3*i+2]; 407bcc973b7SBarry Smith while (n-->0) { 408bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 409bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 410bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 411bcc973b7SBarry Smith idx++; v++; 412bcc973b7SBarry Smith } 413bcc973b7SBarry Smith } 414dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4153649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4161ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 417bcc973b7SBarry Smith PetscFunctionReturn(0); 418bcc973b7SBarry Smith } 419bcc973b7SBarry Smith 4204a2ae208SSatish Balay #undef __FUNCT__ 421b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 422dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 423bcc973b7SBarry Smith { 4244cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 425bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 426d2840e09SBarry Smith const PetscScalar *x,*v; 427d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3; 428dfbe8321SBarry Smith PetscErrorCode ierr; 429b24ad042SBarry Smith PetscInt n,i,jrow,j; 430d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 431bcc973b7SBarry Smith 432bcc973b7SBarry Smith PetscFunctionBegin; 433f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4343649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4351ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 436bcc973b7SBarry Smith idx = a->j; 437bcc973b7SBarry Smith v = a->a; 438bcc973b7SBarry Smith ii = a->i; 439bcc973b7SBarry Smith 440bcc973b7SBarry Smith for (i=0; i<m; i++) { 441bcc973b7SBarry Smith jrow = ii[i]; 442bcc973b7SBarry Smith n = ii[i+1] - jrow; 443bcc973b7SBarry Smith sum1 = 0.0; 444bcc973b7SBarry Smith sum2 = 0.0; 445bcc973b7SBarry Smith sum3 = 0.0; 446bcc973b7SBarry Smith for (j=0; j<n; j++) { 447bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 448bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 449bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 450bcc973b7SBarry Smith jrow++; 451bcc973b7SBarry Smith } 452bcc973b7SBarry Smith y[3*i] += sum1; 453bcc973b7SBarry Smith y[3*i+1] += sum2; 454bcc973b7SBarry Smith y[3*i+2] += sum3; 455bcc973b7SBarry Smith } 456bcc973b7SBarry Smith 457dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4583649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4591ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 460bcc973b7SBarry Smith PetscFunctionReturn(0); 461bcc973b7SBarry Smith } 4624a2ae208SSatish Balay #undef __FUNCT__ 463b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 464dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 465bcc973b7SBarry Smith { 4664cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 467bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 468d2840e09SBarry Smith const PetscScalar *x,*v; 469d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3; 470dfbe8321SBarry Smith PetscErrorCode ierr; 471d2840e09SBarry Smith PetscInt n,i; 472d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 473bcc973b7SBarry Smith 474bcc973b7SBarry Smith PetscFunctionBegin; 475f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4763649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4771ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 478bcc973b7SBarry Smith for (i=0; i<m; i++) { 479bfec09a0SHong Zhang idx = a->j + a->i[i] ; 480bfec09a0SHong Zhang v = a->a + a->i[i] ; 481bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 482bcc973b7SBarry Smith alpha1 = x[3*i]; 483bcc973b7SBarry Smith alpha2 = x[3*i+1]; 484bcc973b7SBarry Smith alpha3 = x[3*i+2]; 485bcc973b7SBarry Smith while (n-->0) { 486bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 487bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 488bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 489bcc973b7SBarry Smith idx++; v++; 490bcc973b7SBarry Smith } 491bcc973b7SBarry Smith } 492dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4933649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4941ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 495bcc973b7SBarry Smith PetscFunctionReturn(0); 496bcc973b7SBarry Smith } 497bcc973b7SBarry Smith 498bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4994a2ae208SSatish Balay #undef __FUNCT__ 500b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 501dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 502bcc973b7SBarry Smith { 5034cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 504bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 505d2840e09SBarry Smith const PetscScalar *x,*v; 506d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 507dfbe8321SBarry Smith PetscErrorCode ierr; 508d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 509d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 510bcc973b7SBarry Smith 511bcc973b7SBarry Smith PetscFunctionBegin; 5123649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5131ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 514bcc973b7SBarry Smith idx = a->j; 515bcc973b7SBarry Smith v = a->a; 516bcc973b7SBarry Smith ii = a->i; 517bcc973b7SBarry Smith 518bcc973b7SBarry Smith for (i=0; i<m; i++) { 519bcc973b7SBarry Smith jrow = ii[i]; 520bcc973b7SBarry Smith n = ii[i+1] - jrow; 521bcc973b7SBarry Smith sum1 = 0.0; 522bcc973b7SBarry Smith sum2 = 0.0; 523bcc973b7SBarry Smith sum3 = 0.0; 524bcc973b7SBarry Smith sum4 = 0.0; 525b3c51c66SMatthew Knepley nonzerorow += (n>0); 526bcc973b7SBarry Smith for (j=0; j<n; j++) { 527bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 528bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 529bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 530bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 531bcc973b7SBarry Smith jrow++; 532bcc973b7SBarry Smith } 533bcc973b7SBarry Smith y[4*i] = sum1; 534bcc973b7SBarry Smith y[4*i+1] = sum2; 535bcc973b7SBarry Smith y[4*i+2] = sum3; 536bcc973b7SBarry Smith y[4*i+3] = sum4; 537bcc973b7SBarry Smith } 538bcc973b7SBarry Smith 539dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 5403649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5411ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 542bcc973b7SBarry Smith PetscFunctionReturn(0); 543bcc973b7SBarry Smith } 544bcc973b7SBarry Smith 5454a2ae208SSatish Balay #undef __FUNCT__ 546b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 547dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 548bcc973b7SBarry Smith { 5494cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 550bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 551d2840e09SBarry Smith const PetscScalar *x,*v; 552d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 553dfbe8321SBarry Smith PetscErrorCode ierr; 554d2840e09SBarry Smith PetscInt n,i; 555d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 556bcc973b7SBarry Smith 557bcc973b7SBarry Smith PetscFunctionBegin; 558d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 5593649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5601ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 561bcc973b7SBarry Smith for (i=0; i<m; i++) { 562bfec09a0SHong Zhang idx = a->j + a->i[i] ; 563bfec09a0SHong Zhang v = a->a + a->i[i] ; 564bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 565bcc973b7SBarry Smith alpha1 = x[4*i]; 566bcc973b7SBarry Smith alpha2 = x[4*i+1]; 567bcc973b7SBarry Smith alpha3 = x[4*i+2]; 568bcc973b7SBarry Smith alpha4 = x[4*i+3]; 569bcc973b7SBarry Smith while (n-->0) { 570bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 571bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 572bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 573bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 574bcc973b7SBarry Smith idx++; v++; 575bcc973b7SBarry Smith } 576bcc973b7SBarry Smith } 577dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 5783649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5791ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 580bcc973b7SBarry Smith PetscFunctionReturn(0); 581bcc973b7SBarry Smith } 582bcc973b7SBarry Smith 5834a2ae208SSatish Balay #undef __FUNCT__ 584b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 585dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 586bcc973b7SBarry Smith { 5874cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 588f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 589d2840e09SBarry Smith const PetscScalar *x,*v; 590d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4; 591dfbe8321SBarry Smith PetscErrorCode ierr; 592b24ad042SBarry Smith PetscInt n,i,jrow,j; 593d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 594f9fae5adSBarry Smith 595f9fae5adSBarry Smith PetscFunctionBegin; 596f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5973649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5981ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 599f9fae5adSBarry Smith idx = a->j; 600f9fae5adSBarry Smith v = a->a; 601f9fae5adSBarry Smith ii = a->i; 602f9fae5adSBarry Smith 603f9fae5adSBarry Smith for (i=0; i<m; i++) { 604f9fae5adSBarry Smith jrow = ii[i]; 605f9fae5adSBarry Smith n = ii[i+1] - jrow; 606f9fae5adSBarry Smith sum1 = 0.0; 607f9fae5adSBarry Smith sum2 = 0.0; 608f9fae5adSBarry Smith sum3 = 0.0; 609f9fae5adSBarry Smith sum4 = 0.0; 610f9fae5adSBarry Smith for (j=0; j<n; j++) { 611f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 612f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 613f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 614f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 615f9fae5adSBarry Smith jrow++; 616f9fae5adSBarry Smith } 617f9fae5adSBarry Smith y[4*i] += sum1; 618f9fae5adSBarry Smith y[4*i+1] += sum2; 619f9fae5adSBarry Smith y[4*i+2] += sum3; 620f9fae5adSBarry Smith y[4*i+3] += sum4; 621f9fae5adSBarry Smith } 622f9fae5adSBarry Smith 623dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6243649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6251ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 626f9fae5adSBarry Smith PetscFunctionReturn(0); 627bcc973b7SBarry Smith } 6284a2ae208SSatish Balay #undef __FUNCT__ 629b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 630dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 631bcc973b7SBarry Smith { 6324cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 633f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 634d2840e09SBarry Smith const PetscScalar *x,*v; 635d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 636dfbe8321SBarry Smith PetscErrorCode ierr; 637d2840e09SBarry Smith PetscInt n,i; 638d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 639f9fae5adSBarry Smith 640f9fae5adSBarry Smith PetscFunctionBegin; 641f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6423649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6431ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 644bfec09a0SHong Zhang 645f9fae5adSBarry Smith for (i=0; i<m; i++) { 646bfec09a0SHong Zhang idx = a->j + a->i[i] ; 647bfec09a0SHong Zhang v = a->a + a->i[i] ; 648f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 649f9fae5adSBarry Smith alpha1 = x[4*i]; 650f9fae5adSBarry Smith alpha2 = x[4*i+1]; 651f9fae5adSBarry Smith alpha3 = x[4*i+2]; 652f9fae5adSBarry Smith alpha4 = x[4*i+3]; 653f9fae5adSBarry Smith while (n-->0) { 654f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 655f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 656f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 657f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 658f9fae5adSBarry Smith idx++; v++; 659f9fae5adSBarry Smith } 660f9fae5adSBarry Smith } 661dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6623649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6631ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 664f9fae5adSBarry Smith PetscFunctionReturn(0); 665f9fae5adSBarry Smith } 666f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6676cd98798SBarry Smith 6684a2ae208SSatish Balay #undef __FUNCT__ 669b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 670dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 671f9fae5adSBarry Smith { 6724cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 673f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 674d2840e09SBarry Smith const PetscScalar *x,*v; 675d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 676dfbe8321SBarry Smith PetscErrorCode ierr; 677d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 678d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 679f9fae5adSBarry Smith 680f9fae5adSBarry Smith PetscFunctionBegin; 6813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6821ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 683f9fae5adSBarry Smith idx = a->j; 684f9fae5adSBarry Smith v = a->a; 685f9fae5adSBarry Smith ii = a->i; 686f9fae5adSBarry Smith 687f9fae5adSBarry Smith for (i=0; i<m; i++) { 688f9fae5adSBarry Smith jrow = ii[i]; 689f9fae5adSBarry Smith n = ii[i+1] - jrow; 690f9fae5adSBarry Smith sum1 = 0.0; 691f9fae5adSBarry Smith sum2 = 0.0; 692f9fae5adSBarry Smith sum3 = 0.0; 693f9fae5adSBarry Smith sum4 = 0.0; 694f9fae5adSBarry Smith sum5 = 0.0; 695b3c51c66SMatthew Knepley nonzerorow += (n>0); 696f9fae5adSBarry Smith for (j=0; j<n; j++) { 697f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 698f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 699f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 700f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 701f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 702f9fae5adSBarry Smith jrow++; 703f9fae5adSBarry Smith } 704f9fae5adSBarry Smith y[5*i] = sum1; 705f9fae5adSBarry Smith y[5*i+1] = sum2; 706f9fae5adSBarry Smith y[5*i+2] = sum3; 707f9fae5adSBarry Smith y[5*i+3] = sum4; 708f9fae5adSBarry Smith y[5*i+4] = sum5; 709f9fae5adSBarry Smith } 710f9fae5adSBarry Smith 711dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 7123649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7131ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 714f9fae5adSBarry Smith PetscFunctionReturn(0); 715f9fae5adSBarry Smith } 716f9fae5adSBarry Smith 7174a2ae208SSatish Balay #undef __FUNCT__ 718b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 719dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 720f9fae5adSBarry Smith { 7214cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 722f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 723d2840e09SBarry Smith const PetscScalar *x,*v; 724d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 725dfbe8321SBarry Smith PetscErrorCode ierr; 726d2840e09SBarry Smith PetscInt n,i; 727d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 728f9fae5adSBarry Smith 729f9fae5adSBarry Smith PetscFunctionBegin; 730d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 7313649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7321ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 733bfec09a0SHong Zhang 734f9fae5adSBarry Smith for (i=0; i<m; i++) { 735bfec09a0SHong Zhang idx = a->j + a->i[i] ; 736bfec09a0SHong Zhang v = a->a + a->i[i] ; 737f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 738f9fae5adSBarry Smith alpha1 = x[5*i]; 739f9fae5adSBarry Smith alpha2 = x[5*i+1]; 740f9fae5adSBarry Smith alpha3 = x[5*i+2]; 741f9fae5adSBarry Smith alpha4 = x[5*i+3]; 742f9fae5adSBarry Smith alpha5 = x[5*i+4]; 743f9fae5adSBarry Smith while (n-->0) { 744f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 745f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 746f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 747f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 748f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 749f9fae5adSBarry Smith idx++; v++; 750f9fae5adSBarry Smith } 751f9fae5adSBarry Smith } 752dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7533649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7541ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 755f9fae5adSBarry Smith PetscFunctionReturn(0); 756f9fae5adSBarry Smith } 757f9fae5adSBarry Smith 7584a2ae208SSatish Balay #undef __FUNCT__ 759b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 760dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 761f9fae5adSBarry Smith { 7624cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 763f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 764d2840e09SBarry Smith const PetscScalar *x,*v; 765d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 766dfbe8321SBarry Smith PetscErrorCode ierr; 767b24ad042SBarry Smith PetscInt n,i,jrow,j; 768d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 769f9fae5adSBarry Smith 770f9fae5adSBarry Smith PetscFunctionBegin; 771f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7723649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 7731ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 774f9fae5adSBarry Smith idx = a->j; 775f9fae5adSBarry Smith v = a->a; 776f9fae5adSBarry Smith ii = a->i; 777f9fae5adSBarry Smith 778f9fae5adSBarry Smith for (i=0; i<m; i++) { 779f9fae5adSBarry Smith jrow = ii[i]; 780f9fae5adSBarry Smith n = ii[i+1] - jrow; 781f9fae5adSBarry Smith sum1 = 0.0; 782f9fae5adSBarry Smith sum2 = 0.0; 783f9fae5adSBarry Smith sum3 = 0.0; 784f9fae5adSBarry Smith sum4 = 0.0; 785f9fae5adSBarry Smith sum5 = 0.0; 786f9fae5adSBarry Smith for (j=0; j<n; j++) { 787f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 788f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 789f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 790f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 791f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 792f9fae5adSBarry Smith jrow++; 793f9fae5adSBarry Smith } 794f9fae5adSBarry Smith y[5*i] += sum1; 795f9fae5adSBarry Smith y[5*i+1] += sum2; 796f9fae5adSBarry Smith y[5*i+2] += sum3; 797f9fae5adSBarry Smith y[5*i+3] += sum4; 798f9fae5adSBarry Smith y[5*i+4] += sum5; 799f9fae5adSBarry Smith } 800f9fae5adSBarry Smith 801dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8023649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8031ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 804f9fae5adSBarry Smith PetscFunctionReturn(0); 805f9fae5adSBarry Smith } 806f9fae5adSBarry Smith 8074a2ae208SSatish Balay #undef __FUNCT__ 808b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 809dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 810f9fae5adSBarry Smith { 8114cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 812f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 813d2840e09SBarry Smith const PetscScalar *x,*v; 814d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 815dfbe8321SBarry Smith PetscErrorCode ierr; 816d2840e09SBarry Smith PetscInt n,i; 817d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 818f9fae5adSBarry Smith 819f9fae5adSBarry Smith PetscFunctionBegin; 820f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8213649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8221ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 823bfec09a0SHong Zhang 824f9fae5adSBarry Smith for (i=0; i<m; i++) { 825bfec09a0SHong Zhang idx = a->j + a->i[i] ; 826bfec09a0SHong Zhang v = a->a + a->i[i] ; 827f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 828f9fae5adSBarry Smith alpha1 = x[5*i]; 829f9fae5adSBarry Smith alpha2 = x[5*i+1]; 830f9fae5adSBarry Smith alpha3 = x[5*i+2]; 831f9fae5adSBarry Smith alpha4 = x[5*i+3]; 832f9fae5adSBarry Smith alpha5 = x[5*i+4]; 833f9fae5adSBarry Smith while (n-->0) { 834f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 835f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 836f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 837f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 838f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 839f9fae5adSBarry Smith idx++; v++; 840f9fae5adSBarry Smith } 841f9fae5adSBarry Smith } 842dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 8433649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8441ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 845f9fae5adSBarry Smith PetscFunctionReturn(0); 846bcc973b7SBarry Smith } 847bcc973b7SBarry Smith 8486cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 8494a2ae208SSatish Balay #undef __FUNCT__ 850b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 851dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8526cd98798SBarry Smith { 8536cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8546cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 855d2840e09SBarry Smith const PetscScalar *x,*v; 856d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 857dfbe8321SBarry Smith PetscErrorCode ierr; 858d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 859d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 8606cd98798SBarry Smith 8616cd98798SBarry Smith PetscFunctionBegin; 8623649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 8631ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8646cd98798SBarry Smith idx = a->j; 8656cd98798SBarry Smith v = a->a; 8666cd98798SBarry Smith ii = a->i; 8676cd98798SBarry Smith 8686cd98798SBarry Smith for (i=0; i<m; i++) { 8696cd98798SBarry Smith jrow = ii[i]; 8706cd98798SBarry Smith n = ii[i+1] - jrow; 8716cd98798SBarry Smith sum1 = 0.0; 8726cd98798SBarry Smith sum2 = 0.0; 8736cd98798SBarry Smith sum3 = 0.0; 8746cd98798SBarry Smith sum4 = 0.0; 8756cd98798SBarry Smith sum5 = 0.0; 8766cd98798SBarry Smith sum6 = 0.0; 877b3c51c66SMatthew Knepley nonzerorow += (n>0); 8786cd98798SBarry Smith for (j=0; j<n; j++) { 8796cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8806cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8816cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8826cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8836cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8846cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8856cd98798SBarry Smith jrow++; 8866cd98798SBarry Smith } 8876cd98798SBarry Smith y[6*i] = sum1; 8886cd98798SBarry Smith y[6*i+1] = sum2; 8896cd98798SBarry Smith y[6*i+2] = sum3; 8906cd98798SBarry Smith y[6*i+3] = sum4; 8916cd98798SBarry Smith y[6*i+4] = sum5; 8926cd98798SBarry Smith y[6*i+5] = sum6; 8936cd98798SBarry Smith } 8946cd98798SBarry Smith 895dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 8963649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 8971ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8986cd98798SBarry Smith PetscFunctionReturn(0); 8996cd98798SBarry Smith } 9006cd98798SBarry Smith 9014a2ae208SSatish Balay #undef __FUNCT__ 902b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 903dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 9046cd98798SBarry Smith { 9056cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9066cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 907d2840e09SBarry Smith const PetscScalar *x,*v; 908d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 909dfbe8321SBarry Smith PetscErrorCode ierr; 910d2840e09SBarry Smith PetscInt n,i; 911d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 9126cd98798SBarry Smith 9136cd98798SBarry Smith PetscFunctionBegin; 914d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 9153649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9161ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 917bfec09a0SHong Zhang 9186cd98798SBarry Smith for (i=0; i<m; i++) { 919bfec09a0SHong Zhang idx = a->j + a->i[i] ; 920bfec09a0SHong Zhang v = a->a + a->i[i] ; 9216cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9226cd98798SBarry Smith alpha1 = x[6*i]; 9236cd98798SBarry Smith alpha2 = x[6*i+1]; 9246cd98798SBarry Smith alpha3 = x[6*i+2]; 9256cd98798SBarry Smith alpha4 = x[6*i+3]; 9266cd98798SBarry Smith alpha5 = x[6*i+4]; 9276cd98798SBarry Smith alpha6 = x[6*i+5]; 9286cd98798SBarry Smith while (n-->0) { 9296cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9306cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9316cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9326cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9336cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9346cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9356cd98798SBarry Smith idx++; v++; 9366cd98798SBarry Smith } 9376cd98798SBarry Smith } 938dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9393649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9401ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 9416cd98798SBarry Smith PetscFunctionReturn(0); 9426cd98798SBarry Smith } 9436cd98798SBarry Smith 9444a2ae208SSatish Balay #undef __FUNCT__ 945b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 946dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9476cd98798SBarry Smith { 9486cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9496cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 950d2840e09SBarry Smith const PetscScalar *x,*v; 951d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 952dfbe8321SBarry Smith PetscErrorCode ierr; 953b24ad042SBarry Smith PetscInt n,i,jrow,j; 954d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 9556cd98798SBarry Smith 9566cd98798SBarry Smith PetscFunctionBegin; 9576cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9583649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 9591ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9606cd98798SBarry Smith idx = a->j; 9616cd98798SBarry Smith v = a->a; 9626cd98798SBarry Smith ii = a->i; 9636cd98798SBarry Smith 9646cd98798SBarry Smith for (i=0; i<m; i++) { 9656cd98798SBarry Smith jrow = ii[i]; 9666cd98798SBarry Smith n = ii[i+1] - jrow; 9676cd98798SBarry Smith sum1 = 0.0; 9686cd98798SBarry Smith sum2 = 0.0; 9696cd98798SBarry Smith sum3 = 0.0; 9706cd98798SBarry Smith sum4 = 0.0; 9716cd98798SBarry Smith sum5 = 0.0; 9726cd98798SBarry Smith sum6 = 0.0; 9736cd98798SBarry Smith for (j=0; j<n; j++) { 9746cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9756cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9766cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9776cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9786cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9796cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9806cd98798SBarry Smith jrow++; 9816cd98798SBarry Smith } 9826cd98798SBarry Smith y[6*i] += sum1; 9836cd98798SBarry Smith y[6*i+1] += sum2; 9846cd98798SBarry Smith y[6*i+2] += sum3; 9856cd98798SBarry Smith y[6*i+3] += sum4; 9866cd98798SBarry Smith y[6*i+4] += sum5; 9876cd98798SBarry Smith y[6*i+5] += sum6; 9886cd98798SBarry Smith } 9896cd98798SBarry Smith 990dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9913649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 9921ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9936cd98798SBarry Smith PetscFunctionReturn(0); 9946cd98798SBarry Smith } 9956cd98798SBarry Smith 9964a2ae208SSatish Balay #undef __FUNCT__ 997b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 998dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9996cd98798SBarry Smith { 10006cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 10016cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1002d2840e09SBarry Smith const PetscScalar *x,*v; 1003d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1004dfbe8321SBarry Smith PetscErrorCode ierr; 1005d2840e09SBarry Smith PetscInt n,i; 1006d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 10076cd98798SBarry Smith 10086cd98798SBarry Smith PetscFunctionBegin; 10096cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 10103649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 10111ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1012bfec09a0SHong Zhang 10136cd98798SBarry Smith for (i=0; i<m; i++) { 1014bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1015bfec09a0SHong Zhang v = a->a + a->i[i] ; 10166cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 10176cd98798SBarry Smith alpha1 = x[6*i]; 10186cd98798SBarry Smith alpha2 = x[6*i+1]; 10196cd98798SBarry Smith alpha3 = x[6*i+2]; 10206cd98798SBarry Smith alpha4 = x[6*i+3]; 10216cd98798SBarry Smith alpha5 = x[6*i+4]; 10226cd98798SBarry Smith alpha6 = x[6*i+5]; 10236cd98798SBarry Smith while (n-->0) { 10246cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 10256cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 10266cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 10276cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 10286cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 10296cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 10306cd98798SBarry Smith idx++; v++; 10316cd98798SBarry Smith } 10326cd98798SBarry Smith } 1033dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 10343649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 10351ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 10366cd98798SBarry Smith PetscFunctionReturn(0); 10376cd98798SBarry Smith } 10386cd98798SBarry Smith 103966ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 104066ed3db0SBarry Smith #undef __FUNCT__ 1041ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 1042ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1043ed8eea03SBarry Smith { 1044ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1045ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1046d2840e09SBarry Smith const PetscScalar *x,*v; 1047d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1048ed8eea03SBarry Smith PetscErrorCode ierr; 1049d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1050d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1051ed8eea03SBarry Smith 1052ed8eea03SBarry Smith PetscFunctionBegin; 10533649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1054ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1055ed8eea03SBarry Smith idx = a->j; 1056ed8eea03SBarry Smith v = a->a; 1057ed8eea03SBarry Smith ii = a->i; 1058ed8eea03SBarry Smith 1059ed8eea03SBarry Smith for (i=0; i<m; i++) { 1060ed8eea03SBarry Smith jrow = ii[i]; 1061ed8eea03SBarry Smith n = ii[i+1] - jrow; 1062ed8eea03SBarry Smith sum1 = 0.0; 1063ed8eea03SBarry Smith sum2 = 0.0; 1064ed8eea03SBarry Smith sum3 = 0.0; 1065ed8eea03SBarry Smith sum4 = 0.0; 1066ed8eea03SBarry Smith sum5 = 0.0; 1067ed8eea03SBarry Smith sum6 = 0.0; 1068ed8eea03SBarry Smith sum7 = 0.0; 1069b3c51c66SMatthew Knepley nonzerorow += (n>0); 1070ed8eea03SBarry Smith for (j=0; j<n; j++) { 1071ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1072ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1073ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1074ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1075ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1076ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1077ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1078ed8eea03SBarry Smith jrow++; 1079ed8eea03SBarry Smith } 1080ed8eea03SBarry Smith y[7*i] = sum1; 1081ed8eea03SBarry Smith y[7*i+1] = sum2; 1082ed8eea03SBarry Smith y[7*i+2] = sum3; 1083ed8eea03SBarry Smith y[7*i+3] = sum4; 1084ed8eea03SBarry Smith y[7*i+4] = sum5; 1085ed8eea03SBarry Smith y[7*i+5] = sum6; 1086ed8eea03SBarry Smith y[7*i+6] = sum7; 1087ed8eea03SBarry Smith } 1088ed8eea03SBarry Smith 1089dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 10903649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1091ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1092ed8eea03SBarry Smith PetscFunctionReturn(0); 1093ed8eea03SBarry Smith } 1094ed8eea03SBarry Smith 1095ed8eea03SBarry Smith #undef __FUNCT__ 1096ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1097ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1098ed8eea03SBarry Smith { 1099ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1100ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1101d2840e09SBarry Smith const PetscScalar *x,*v; 1102d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1103ed8eea03SBarry Smith PetscErrorCode ierr; 1104d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1105d2840e09SBarry Smith PetscInt n,i; 1106ed8eea03SBarry Smith 1107ed8eea03SBarry Smith PetscFunctionBegin; 1108d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 11093649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1110ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1111ed8eea03SBarry Smith 1112ed8eea03SBarry Smith for (i=0; i<m; i++) { 1113ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1114ed8eea03SBarry Smith v = a->a + a->i[i] ; 1115ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1116ed8eea03SBarry Smith alpha1 = x[7*i]; 1117ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1118ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1119ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1120ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1121ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1122ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1123ed8eea03SBarry Smith while (n-->0) { 1124ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1125ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1126ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1127ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1128ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1129ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1130ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1131ed8eea03SBarry Smith idx++; v++; 1132ed8eea03SBarry Smith } 1133ed8eea03SBarry Smith } 1134dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11353649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1136ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1137ed8eea03SBarry Smith PetscFunctionReturn(0); 1138ed8eea03SBarry Smith } 1139ed8eea03SBarry Smith 1140ed8eea03SBarry Smith #undef __FUNCT__ 1141ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1142ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1143ed8eea03SBarry Smith { 1144ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1145ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1146d2840e09SBarry Smith const PetscScalar *x,*v; 1147d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1148ed8eea03SBarry Smith PetscErrorCode ierr; 1149d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1150ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1151ed8eea03SBarry Smith 1152ed8eea03SBarry Smith PetscFunctionBegin; 1153ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 11543649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1155ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1156ed8eea03SBarry Smith idx = a->j; 1157ed8eea03SBarry Smith v = a->a; 1158ed8eea03SBarry Smith ii = a->i; 1159ed8eea03SBarry Smith 1160ed8eea03SBarry Smith for (i=0; i<m; i++) { 1161ed8eea03SBarry Smith jrow = ii[i]; 1162ed8eea03SBarry Smith n = ii[i+1] - jrow; 1163ed8eea03SBarry Smith sum1 = 0.0; 1164ed8eea03SBarry Smith sum2 = 0.0; 1165ed8eea03SBarry Smith sum3 = 0.0; 1166ed8eea03SBarry Smith sum4 = 0.0; 1167ed8eea03SBarry Smith sum5 = 0.0; 1168ed8eea03SBarry Smith sum6 = 0.0; 1169ed8eea03SBarry Smith sum7 = 0.0; 1170ed8eea03SBarry Smith for (j=0; j<n; j++) { 1171ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1172ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1173ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1174ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1175ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1176ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1177ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1178ed8eea03SBarry Smith jrow++; 1179ed8eea03SBarry Smith } 1180ed8eea03SBarry Smith y[7*i] += sum1; 1181ed8eea03SBarry Smith y[7*i+1] += sum2; 1182ed8eea03SBarry Smith y[7*i+2] += sum3; 1183ed8eea03SBarry Smith y[7*i+3] += sum4; 1184ed8eea03SBarry Smith y[7*i+4] += sum5; 1185ed8eea03SBarry Smith y[7*i+5] += sum6; 1186ed8eea03SBarry Smith y[7*i+6] += sum7; 1187ed8eea03SBarry Smith } 1188ed8eea03SBarry Smith 1189dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 11903649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1191ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1192ed8eea03SBarry Smith PetscFunctionReturn(0); 1193ed8eea03SBarry Smith } 1194ed8eea03SBarry Smith 1195ed8eea03SBarry Smith #undef __FUNCT__ 1196ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1197ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1198ed8eea03SBarry Smith { 1199ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1200ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1201d2840e09SBarry Smith const PetscScalar *x,*v; 1202d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1203ed8eea03SBarry Smith PetscErrorCode ierr; 1204d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1205d2840e09SBarry Smith PetscInt n,i; 1206ed8eea03SBarry Smith 1207ed8eea03SBarry Smith PetscFunctionBegin; 1208ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12093649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1210ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1211ed8eea03SBarry Smith for (i=0; i<m; i++) { 1212ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1213ed8eea03SBarry Smith v = a->a + a->i[i] ; 1214ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1215ed8eea03SBarry Smith alpha1 = x[7*i]; 1216ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1217ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1218ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1219ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1220ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1221ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1222ed8eea03SBarry Smith while (n-->0) { 1223ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1224ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1225ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1226ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1227ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1228ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1229ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1230ed8eea03SBarry Smith idx++; v++; 1231ed8eea03SBarry Smith } 1232ed8eea03SBarry Smith } 1233dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 12343649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1235ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1236ed8eea03SBarry Smith PetscFunctionReturn(0); 1237ed8eea03SBarry Smith } 1238ed8eea03SBarry Smith 1239ed8eea03SBarry Smith #undef __FUNCT__ 124066ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1241dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 124266ed3db0SBarry Smith { 124366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 124466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1245d2840e09SBarry Smith const PetscScalar *x,*v; 1246d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1247dfbe8321SBarry Smith PetscErrorCode ierr; 1248d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1249d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 125066ed3db0SBarry Smith 125166ed3db0SBarry Smith PetscFunctionBegin; 12523649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 12531ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 125466ed3db0SBarry Smith idx = a->j; 125566ed3db0SBarry Smith v = a->a; 125666ed3db0SBarry Smith ii = a->i; 125766ed3db0SBarry Smith 125866ed3db0SBarry Smith for (i=0; i<m; i++) { 125966ed3db0SBarry Smith jrow = ii[i]; 126066ed3db0SBarry Smith n = ii[i+1] - jrow; 126166ed3db0SBarry Smith sum1 = 0.0; 126266ed3db0SBarry Smith sum2 = 0.0; 126366ed3db0SBarry Smith sum3 = 0.0; 126466ed3db0SBarry Smith sum4 = 0.0; 126566ed3db0SBarry Smith sum5 = 0.0; 126666ed3db0SBarry Smith sum6 = 0.0; 126766ed3db0SBarry Smith sum7 = 0.0; 126866ed3db0SBarry Smith sum8 = 0.0; 1269b3c51c66SMatthew Knepley nonzerorow += (n>0); 127066ed3db0SBarry Smith for (j=0; j<n; j++) { 127166ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 127266ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 127366ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 127466ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 127566ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 127666ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 127766ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 127866ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 127966ed3db0SBarry Smith jrow++; 128066ed3db0SBarry Smith } 128166ed3db0SBarry Smith y[8*i] = sum1; 128266ed3db0SBarry Smith y[8*i+1] = sum2; 128366ed3db0SBarry Smith y[8*i+2] = sum3; 128466ed3db0SBarry Smith y[8*i+3] = sum4; 128566ed3db0SBarry Smith y[8*i+4] = sum5; 128666ed3db0SBarry Smith y[8*i+5] = sum6; 128766ed3db0SBarry Smith y[8*i+6] = sum7; 128866ed3db0SBarry Smith y[8*i+7] = sum8; 128966ed3db0SBarry Smith } 129066ed3db0SBarry Smith 1291dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 12923649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 12931ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 129466ed3db0SBarry Smith PetscFunctionReturn(0); 129566ed3db0SBarry Smith } 129666ed3db0SBarry Smith 129766ed3db0SBarry Smith #undef __FUNCT__ 129866ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1299dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 130066ed3db0SBarry Smith { 130166ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 130266ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1303d2840e09SBarry Smith const PetscScalar *x,*v; 1304d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1305dfbe8321SBarry Smith PetscErrorCode ierr; 1306d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1307d2840e09SBarry Smith PetscInt n,i; 130866ed3db0SBarry Smith 130966ed3db0SBarry Smith PetscFunctionBegin; 1310d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 13113649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13121ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1313bfec09a0SHong Zhang 131466ed3db0SBarry Smith for (i=0; i<m; i++) { 1315bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1316bfec09a0SHong Zhang v = a->a + a->i[i] ; 131766ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 131866ed3db0SBarry Smith alpha1 = x[8*i]; 131966ed3db0SBarry Smith alpha2 = x[8*i+1]; 132066ed3db0SBarry Smith alpha3 = x[8*i+2]; 132166ed3db0SBarry Smith alpha4 = x[8*i+3]; 132266ed3db0SBarry Smith alpha5 = x[8*i+4]; 132366ed3db0SBarry Smith alpha6 = x[8*i+5]; 132466ed3db0SBarry Smith alpha7 = x[8*i+6]; 132566ed3db0SBarry Smith alpha8 = x[8*i+7]; 132666ed3db0SBarry Smith while (n-->0) { 132766ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 132866ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 132966ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 133066ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 133166ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 133266ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 133366ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 133466ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 133566ed3db0SBarry Smith idx++; v++; 133666ed3db0SBarry Smith } 133766ed3db0SBarry Smith } 1338dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13393649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13401ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 134166ed3db0SBarry Smith PetscFunctionReturn(0); 134266ed3db0SBarry Smith } 134366ed3db0SBarry Smith 134466ed3db0SBarry Smith #undef __FUNCT__ 134566ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1346dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 134766ed3db0SBarry Smith { 134866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 134966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1350d2840e09SBarry Smith const PetscScalar *x,*v; 1351d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1352dfbe8321SBarry Smith PetscErrorCode ierr; 1353d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1354b24ad042SBarry Smith PetscInt n,i,jrow,j; 135566ed3db0SBarry Smith 135666ed3db0SBarry Smith PetscFunctionBegin; 135766ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13583649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 13591ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 136066ed3db0SBarry Smith idx = a->j; 136166ed3db0SBarry Smith v = a->a; 136266ed3db0SBarry Smith ii = a->i; 136366ed3db0SBarry Smith 136466ed3db0SBarry Smith for (i=0; i<m; i++) { 136566ed3db0SBarry Smith jrow = ii[i]; 136666ed3db0SBarry Smith n = ii[i+1] - jrow; 136766ed3db0SBarry Smith sum1 = 0.0; 136866ed3db0SBarry Smith sum2 = 0.0; 136966ed3db0SBarry Smith sum3 = 0.0; 137066ed3db0SBarry Smith sum4 = 0.0; 137166ed3db0SBarry Smith sum5 = 0.0; 137266ed3db0SBarry Smith sum6 = 0.0; 137366ed3db0SBarry Smith sum7 = 0.0; 137466ed3db0SBarry Smith sum8 = 0.0; 137566ed3db0SBarry Smith for (j=0; j<n; j++) { 137666ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 137766ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 137866ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 137966ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 138066ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 138166ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 138266ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 138366ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 138466ed3db0SBarry Smith jrow++; 138566ed3db0SBarry Smith } 138666ed3db0SBarry Smith y[8*i] += sum1; 138766ed3db0SBarry Smith y[8*i+1] += sum2; 138866ed3db0SBarry Smith y[8*i+2] += sum3; 138966ed3db0SBarry Smith y[8*i+3] += sum4; 139066ed3db0SBarry Smith y[8*i+4] += sum5; 139166ed3db0SBarry Smith y[8*i+5] += sum6; 139266ed3db0SBarry Smith y[8*i+6] += sum7; 139366ed3db0SBarry Smith y[8*i+7] += sum8; 139466ed3db0SBarry Smith } 139566ed3db0SBarry Smith 1396dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13973649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 13981ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 139966ed3db0SBarry Smith PetscFunctionReturn(0); 140066ed3db0SBarry Smith } 140166ed3db0SBarry Smith 140266ed3db0SBarry Smith #undef __FUNCT__ 140366ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1404dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 140566ed3db0SBarry Smith { 140666ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 140766ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1408d2840e09SBarry Smith const PetscScalar *x,*v; 1409d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1410dfbe8321SBarry Smith PetscErrorCode ierr; 1411d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1412d2840e09SBarry Smith PetscInt n,i; 141366ed3db0SBarry Smith 141466ed3db0SBarry Smith PetscFunctionBegin; 141566ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 14163649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14171ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 141866ed3db0SBarry Smith for (i=0; i<m; i++) { 1419bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1420bfec09a0SHong Zhang v = a->a + a->i[i] ; 142166ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 142266ed3db0SBarry Smith alpha1 = x[8*i]; 142366ed3db0SBarry Smith alpha2 = x[8*i+1]; 142466ed3db0SBarry Smith alpha3 = x[8*i+2]; 142566ed3db0SBarry Smith alpha4 = x[8*i+3]; 142666ed3db0SBarry Smith alpha5 = x[8*i+4]; 142766ed3db0SBarry Smith alpha6 = x[8*i+5]; 142866ed3db0SBarry Smith alpha7 = x[8*i+6]; 142966ed3db0SBarry Smith alpha8 = x[8*i+7]; 143066ed3db0SBarry Smith while (n-->0) { 143166ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 143266ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 143366ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 143466ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 143566ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 143666ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 143766ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 143866ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 143966ed3db0SBarry Smith idx++; v++; 144066ed3db0SBarry Smith } 144166ed3db0SBarry Smith } 1442dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 14433649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14441ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 14452f7816d4SBarry Smith PetscFunctionReturn(0); 14462f7816d4SBarry Smith } 14472f7816d4SBarry Smith 14482b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 14492b692628SMatthew Knepley #undef __FUNCT__ 14502b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1451dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14522b692628SMatthew Knepley { 14532b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14542b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1455d2840e09SBarry Smith const PetscScalar *x,*v; 1456d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1457dfbe8321SBarry Smith PetscErrorCode ierr; 1458d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1459d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 14602b692628SMatthew Knepley 14612b692628SMatthew Knepley PetscFunctionBegin; 14623649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 14631ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14642b692628SMatthew Knepley idx = a->j; 14652b692628SMatthew Knepley v = a->a; 14662b692628SMatthew Knepley ii = a->i; 14672b692628SMatthew Knepley 14682b692628SMatthew Knepley for (i=0; i<m; i++) { 14692b692628SMatthew Knepley jrow = ii[i]; 14702b692628SMatthew Knepley n = ii[i+1] - jrow; 14712b692628SMatthew Knepley sum1 = 0.0; 14722b692628SMatthew Knepley sum2 = 0.0; 14732b692628SMatthew Knepley sum3 = 0.0; 14742b692628SMatthew Knepley sum4 = 0.0; 14752b692628SMatthew Knepley sum5 = 0.0; 14762b692628SMatthew Knepley sum6 = 0.0; 14772b692628SMatthew Knepley sum7 = 0.0; 14782b692628SMatthew Knepley sum8 = 0.0; 14792b692628SMatthew Knepley sum9 = 0.0; 1480b3c51c66SMatthew Knepley nonzerorow += (n>0); 14812b692628SMatthew Knepley for (j=0; j<n; j++) { 14822b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14832b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14842b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14852b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14862b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14872b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14882b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14892b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14902b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14912b692628SMatthew Knepley jrow++; 14922b692628SMatthew Knepley } 14932b692628SMatthew Knepley y[9*i] = sum1; 14942b692628SMatthew Knepley y[9*i+1] = sum2; 14952b692628SMatthew Knepley y[9*i+2] = sum3; 14962b692628SMatthew Knepley y[9*i+3] = sum4; 14972b692628SMatthew Knepley y[9*i+4] = sum5; 14982b692628SMatthew Knepley y[9*i+5] = sum6; 14992b692628SMatthew Knepley y[9*i+6] = sum7; 15002b692628SMatthew Knepley y[9*i+7] = sum8; 15012b692628SMatthew Knepley y[9*i+8] = sum9; 15022b692628SMatthew Knepley } 15032b692628SMatthew Knepley 1504dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 15053649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15061ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15072b692628SMatthew Knepley PetscFunctionReturn(0); 15082b692628SMatthew Knepley } 15092b692628SMatthew Knepley 1510b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1511b9cda22cSBarry Smith 15122b692628SMatthew Knepley #undef __FUNCT__ 15132b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1514dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 15152b692628SMatthew Knepley { 15162b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15172b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1518d2840e09SBarry Smith const PetscScalar *x,*v; 1519d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1520dfbe8321SBarry Smith PetscErrorCode ierr; 1521d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1522d2840e09SBarry Smith PetscInt n,i; 15232b692628SMatthew Knepley 15242b692628SMatthew Knepley PetscFunctionBegin; 1525d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 15263649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15271ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 15282b692628SMatthew Knepley 15292b692628SMatthew Knepley for (i=0; i<m; i++) { 15302b692628SMatthew Knepley idx = a->j + a->i[i] ; 15312b692628SMatthew Knepley v = a->a + a->i[i] ; 15322b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15332b692628SMatthew Knepley alpha1 = x[9*i]; 15342b692628SMatthew Knepley alpha2 = x[9*i+1]; 15352b692628SMatthew Knepley alpha3 = x[9*i+2]; 15362b692628SMatthew Knepley alpha4 = x[9*i+3]; 15372b692628SMatthew Knepley alpha5 = x[9*i+4]; 15382b692628SMatthew Knepley alpha6 = x[9*i+5]; 15392b692628SMatthew Knepley alpha7 = x[9*i+6]; 15402b692628SMatthew Knepley alpha8 = x[9*i+7]; 15412f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 15422b692628SMatthew Knepley while (n-->0) { 15432b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15442b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15452b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15462b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15472b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15482b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15492b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15502b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15512b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15522b692628SMatthew Knepley idx++; v++; 15532b692628SMatthew Knepley } 15542b692628SMatthew Knepley } 1555dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 15563649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 15571ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15582b692628SMatthew Knepley PetscFunctionReturn(0); 15592b692628SMatthew Knepley } 15602b692628SMatthew Knepley 15612b692628SMatthew Knepley #undef __FUNCT__ 15622b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1563dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15642b692628SMatthew Knepley { 15652b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15662b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1567d2840e09SBarry Smith const PetscScalar *x,*v; 1568d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1569dfbe8321SBarry Smith PetscErrorCode ierr; 1570d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1571b24ad042SBarry Smith PetscInt n,i,jrow,j; 15722b692628SMatthew Knepley 15732b692628SMatthew Knepley PetscFunctionBegin; 15742b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15753649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 15761ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15772b692628SMatthew Knepley idx = a->j; 15782b692628SMatthew Knepley v = a->a; 15792b692628SMatthew Knepley ii = a->i; 15802b692628SMatthew Knepley 15812b692628SMatthew Knepley for (i=0; i<m; i++) { 15822b692628SMatthew Knepley jrow = ii[i]; 15832b692628SMatthew Knepley n = ii[i+1] - jrow; 15842b692628SMatthew Knepley sum1 = 0.0; 15852b692628SMatthew Knepley sum2 = 0.0; 15862b692628SMatthew Knepley sum3 = 0.0; 15872b692628SMatthew Knepley sum4 = 0.0; 15882b692628SMatthew Knepley sum5 = 0.0; 15892b692628SMatthew Knepley sum6 = 0.0; 15902b692628SMatthew Knepley sum7 = 0.0; 15912b692628SMatthew Knepley sum8 = 0.0; 15922b692628SMatthew Knepley sum9 = 0.0; 15932b692628SMatthew Knepley for (j=0; j<n; j++) { 15942b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15952b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15962b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15972b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15982b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15992b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 16002b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 16012b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 16022b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 16032b692628SMatthew Knepley jrow++; 16042b692628SMatthew Knepley } 16052b692628SMatthew Knepley y[9*i] += sum1; 16062b692628SMatthew Knepley y[9*i+1] += sum2; 16072b692628SMatthew Knepley y[9*i+2] += sum3; 16082b692628SMatthew Knepley y[9*i+3] += sum4; 16092b692628SMatthew Knepley y[9*i+4] += sum5; 16102b692628SMatthew Knepley y[9*i+5] += sum6; 16112b692628SMatthew Knepley y[9*i+6] += sum7; 16122b692628SMatthew Knepley y[9*i+7] += sum8; 16132b692628SMatthew Knepley y[9*i+8] += sum9; 16142b692628SMatthew Knepley } 16152b692628SMatthew Knepley 1616efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 16173649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16181ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16192b692628SMatthew Knepley PetscFunctionReturn(0); 16202b692628SMatthew Knepley } 16212b692628SMatthew Knepley 16222b692628SMatthew Knepley #undef __FUNCT__ 16232b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1624dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 16252b692628SMatthew Knepley { 16262b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 16272b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1628d2840e09SBarry Smith const PetscScalar *x,*v; 1629d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1630dfbe8321SBarry Smith PetscErrorCode ierr; 1631d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1632d2840e09SBarry Smith PetscInt n,i; 16332b692628SMatthew Knepley 16342b692628SMatthew Knepley PetscFunctionBegin; 16352b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 16363649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 16371ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 16382b692628SMatthew Knepley for (i=0; i<m; i++) { 16392b692628SMatthew Knepley idx = a->j + a->i[i] ; 16402b692628SMatthew Knepley v = a->a + a->i[i] ; 16412b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 16422b692628SMatthew Knepley alpha1 = x[9*i]; 16432b692628SMatthew Knepley alpha2 = x[9*i+1]; 16442b692628SMatthew Knepley alpha3 = x[9*i+2]; 16452b692628SMatthew Knepley alpha4 = x[9*i+3]; 16462b692628SMatthew Knepley alpha5 = x[9*i+4]; 16472b692628SMatthew Knepley alpha6 = x[9*i+5]; 16482b692628SMatthew Knepley alpha7 = x[9*i+6]; 16492b692628SMatthew Knepley alpha8 = x[9*i+7]; 16502b692628SMatthew Knepley alpha9 = x[9*i+8]; 16512b692628SMatthew Knepley while (n-->0) { 16522b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 16532b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 16542b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 16552b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 16562b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 16572b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 16582b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 16592b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 16602b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 16612b692628SMatthew Knepley idx++; v++; 16622b692628SMatthew Knepley } 16632b692628SMatthew Knepley } 1664dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 16653649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 16661ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 16672b692628SMatthew Knepley PetscFunctionReturn(0); 16682b692628SMatthew Knepley } 1669b9cda22cSBarry Smith #undef __FUNCT__ 1670b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1671b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1672b9cda22cSBarry Smith { 1673b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1674b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1675d2840e09SBarry Smith const PetscScalar *x,*v; 1676d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1677b9cda22cSBarry Smith PetscErrorCode ierr; 1678d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1679d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1680b9cda22cSBarry Smith 1681b9cda22cSBarry Smith PetscFunctionBegin; 16823649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1683b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1684b9cda22cSBarry Smith idx = a->j; 1685b9cda22cSBarry Smith v = a->a; 1686b9cda22cSBarry Smith ii = a->i; 1687b9cda22cSBarry Smith 1688b9cda22cSBarry Smith for (i=0; i<m; i++) { 1689b9cda22cSBarry Smith jrow = ii[i]; 1690b9cda22cSBarry Smith n = ii[i+1] - jrow; 1691b9cda22cSBarry Smith sum1 = 0.0; 1692b9cda22cSBarry Smith sum2 = 0.0; 1693b9cda22cSBarry Smith sum3 = 0.0; 1694b9cda22cSBarry Smith sum4 = 0.0; 1695b9cda22cSBarry Smith sum5 = 0.0; 1696b9cda22cSBarry Smith sum6 = 0.0; 1697b9cda22cSBarry Smith sum7 = 0.0; 1698b9cda22cSBarry Smith sum8 = 0.0; 1699b9cda22cSBarry Smith sum9 = 0.0; 1700b9cda22cSBarry Smith sum10 = 0.0; 1701b3c51c66SMatthew Knepley nonzerorow += (n>0); 1702b9cda22cSBarry Smith for (j=0; j<n; j++) { 1703b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1704b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1705b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1706b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1707b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1708b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1709b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1710b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1711b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1712b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1713b9cda22cSBarry Smith jrow++; 1714b9cda22cSBarry Smith } 1715b9cda22cSBarry Smith y[10*i] = sum1; 1716b9cda22cSBarry Smith y[10*i+1] = sum2; 1717b9cda22cSBarry Smith y[10*i+2] = sum3; 1718b9cda22cSBarry Smith y[10*i+3] = sum4; 1719b9cda22cSBarry Smith y[10*i+4] = sum5; 1720b9cda22cSBarry Smith y[10*i+5] = sum6; 1721b9cda22cSBarry Smith y[10*i+6] = sum7; 1722b9cda22cSBarry Smith y[10*i+7] = sum8; 1723b9cda22cSBarry Smith y[10*i+8] = sum9; 1724b9cda22cSBarry Smith y[10*i+9] = sum10; 1725b9cda22cSBarry Smith } 1726b9cda22cSBarry Smith 1727dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 17283649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1729b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1730b9cda22cSBarry Smith PetscFunctionReturn(0); 1731b9cda22cSBarry Smith } 1732b9cda22cSBarry Smith 1733b9cda22cSBarry Smith #undef __FUNCT__ 1734dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1735b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1736b9cda22cSBarry Smith { 1737b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1738b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1739d2840e09SBarry Smith const PetscScalar *x,*v; 1740d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1741b9cda22cSBarry Smith PetscErrorCode ierr; 1742d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1743b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1744b9cda22cSBarry Smith 1745b9cda22cSBarry Smith PetscFunctionBegin; 1746b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 17473649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1748b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1749b9cda22cSBarry Smith idx = a->j; 1750b9cda22cSBarry Smith v = a->a; 1751b9cda22cSBarry Smith ii = a->i; 1752b9cda22cSBarry Smith 1753b9cda22cSBarry Smith for (i=0; i<m; i++) { 1754b9cda22cSBarry Smith jrow = ii[i]; 1755b9cda22cSBarry Smith n = ii[i+1] - jrow; 1756b9cda22cSBarry Smith sum1 = 0.0; 1757b9cda22cSBarry Smith sum2 = 0.0; 1758b9cda22cSBarry Smith sum3 = 0.0; 1759b9cda22cSBarry Smith sum4 = 0.0; 1760b9cda22cSBarry Smith sum5 = 0.0; 1761b9cda22cSBarry Smith sum6 = 0.0; 1762b9cda22cSBarry Smith sum7 = 0.0; 1763b9cda22cSBarry Smith sum8 = 0.0; 1764b9cda22cSBarry Smith sum9 = 0.0; 1765b9cda22cSBarry Smith sum10 = 0.0; 1766b9cda22cSBarry Smith for (j=0; j<n; j++) { 1767b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1768b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1769b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1770b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1771b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1772b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1773b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1774b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1775b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1776b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1777b9cda22cSBarry Smith jrow++; 1778b9cda22cSBarry Smith } 1779b9cda22cSBarry Smith y[10*i] += sum1; 1780b9cda22cSBarry Smith y[10*i+1] += sum2; 1781b9cda22cSBarry Smith y[10*i+2] += sum3; 1782b9cda22cSBarry Smith y[10*i+3] += sum4; 1783b9cda22cSBarry Smith y[10*i+4] += sum5; 1784b9cda22cSBarry Smith y[10*i+5] += sum6; 1785b9cda22cSBarry Smith y[10*i+6] += sum7; 1786b9cda22cSBarry Smith y[10*i+7] += sum8; 1787b9cda22cSBarry Smith y[10*i+8] += sum9; 1788b9cda22cSBarry Smith y[10*i+9] += sum10; 1789b9cda22cSBarry Smith } 1790b9cda22cSBarry Smith 1791dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 17923649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1793b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1794b9cda22cSBarry Smith PetscFunctionReturn(0); 1795b9cda22cSBarry Smith } 1796b9cda22cSBarry Smith 1797b9cda22cSBarry Smith #undef __FUNCT__ 1798b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1799b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1800b9cda22cSBarry Smith { 1801b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1802b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1803d2840e09SBarry Smith const PetscScalar *x,*v; 1804d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1805b9cda22cSBarry Smith PetscErrorCode ierr; 1806d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1807d2840e09SBarry Smith PetscInt n,i; 1808b9cda22cSBarry Smith 1809b9cda22cSBarry Smith PetscFunctionBegin; 1810d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 18113649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1812b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1813b9cda22cSBarry Smith 1814b9cda22cSBarry Smith for (i=0; i<m; i++) { 1815b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1816b9cda22cSBarry Smith v = a->a + a->i[i] ; 1817b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1818e29fdc22SBarry Smith alpha1 = x[10*i]; 1819e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1820e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1821e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1822e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1823e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1824e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1825e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1826e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1827b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1828b9cda22cSBarry Smith while (n-->0) { 1829e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1830e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1831e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1832e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1833e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1834e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1835e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1836e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1837e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1838b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1839b9cda22cSBarry Smith idx++; v++; 1840b9cda22cSBarry Smith } 1841b9cda22cSBarry Smith } 1842dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18433649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1844b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1845b9cda22cSBarry Smith PetscFunctionReturn(0); 1846b9cda22cSBarry Smith } 1847b9cda22cSBarry Smith 1848b9cda22cSBarry Smith #undef __FUNCT__ 1849b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1850b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1851b9cda22cSBarry Smith { 1852b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1853b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1854d2840e09SBarry Smith const PetscScalar *x,*v; 1855d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1856b9cda22cSBarry Smith PetscErrorCode ierr; 1857d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 1858d2840e09SBarry Smith PetscInt n,i; 1859b9cda22cSBarry Smith 1860b9cda22cSBarry Smith PetscFunctionBegin; 1861b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 18623649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1863b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1864b9cda22cSBarry Smith for (i=0; i<m; i++) { 1865b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1866b9cda22cSBarry Smith v = a->a + a->i[i] ; 1867b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1868b9cda22cSBarry Smith alpha1 = x[10*i]; 1869b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1870b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1871b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1872b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1873b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1874b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1875b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1876b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1877b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1878b9cda22cSBarry Smith while (n-->0) { 1879b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1880b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1881b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1882b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1883b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1884b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1885b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1886b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1887b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1888b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1889b9cda22cSBarry Smith idx++; v++; 1890b9cda22cSBarry Smith } 1891b9cda22cSBarry Smith } 1892dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 18933649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1894b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1895b9cda22cSBarry Smith PetscFunctionReturn(0); 1896b9cda22cSBarry Smith } 1897b9cda22cSBarry Smith 18982b692628SMatthew Knepley 18992f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 19002f7816d4SBarry Smith #undef __FUNCT__ 1901dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11" 1902dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1903dbdd7285SBarry Smith { 1904dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1905dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1906d2840e09SBarry Smith const PetscScalar *x,*v; 1907d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1908dbdd7285SBarry Smith PetscErrorCode ierr; 1909d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1910d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 1911dbdd7285SBarry Smith 1912dbdd7285SBarry Smith PetscFunctionBegin; 19133649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1914dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1915dbdd7285SBarry Smith idx = a->j; 1916dbdd7285SBarry Smith v = a->a; 1917dbdd7285SBarry Smith ii = a->i; 1918dbdd7285SBarry Smith 1919dbdd7285SBarry Smith for (i=0; i<m; i++) { 1920dbdd7285SBarry Smith jrow = ii[i]; 1921dbdd7285SBarry Smith n = ii[i+1] - jrow; 1922dbdd7285SBarry Smith sum1 = 0.0; 1923dbdd7285SBarry Smith sum2 = 0.0; 1924dbdd7285SBarry Smith sum3 = 0.0; 1925dbdd7285SBarry Smith sum4 = 0.0; 1926dbdd7285SBarry Smith sum5 = 0.0; 1927dbdd7285SBarry Smith sum6 = 0.0; 1928dbdd7285SBarry Smith sum7 = 0.0; 1929dbdd7285SBarry Smith sum8 = 0.0; 1930dbdd7285SBarry Smith sum9 = 0.0; 1931dbdd7285SBarry Smith sum10 = 0.0; 1932dbdd7285SBarry Smith sum11 = 0.0; 1933dbdd7285SBarry Smith nonzerorow += (n>0); 1934dbdd7285SBarry Smith for (j=0; j<n; j++) { 1935dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 1936dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 1937dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 1938dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 1939dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 1940dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 1941dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 1942dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 1943dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 1944dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 1945dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 1946dbdd7285SBarry Smith jrow++; 1947dbdd7285SBarry Smith } 1948dbdd7285SBarry Smith y[11*i] = sum1; 1949dbdd7285SBarry Smith y[11*i+1] = sum2; 1950dbdd7285SBarry Smith y[11*i+2] = sum3; 1951dbdd7285SBarry Smith y[11*i+3] = sum4; 1952dbdd7285SBarry Smith y[11*i+4] = sum5; 1953dbdd7285SBarry Smith y[11*i+5] = sum6; 1954dbdd7285SBarry Smith y[11*i+6] = sum7; 1955dbdd7285SBarry Smith y[11*i+7] = sum8; 1956dbdd7285SBarry Smith y[11*i+8] = sum9; 1957dbdd7285SBarry Smith y[11*i+9] = sum10; 1958dbdd7285SBarry Smith y[11*i+10] = sum11; 1959dbdd7285SBarry Smith } 1960dbdd7285SBarry Smith 1961dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 19623649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1963dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1964dbdd7285SBarry Smith PetscFunctionReturn(0); 1965dbdd7285SBarry Smith } 1966dbdd7285SBarry Smith 1967dbdd7285SBarry Smith #undef __FUNCT__ 1968dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1969dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1970dbdd7285SBarry Smith { 1971dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1972dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1973d2840e09SBarry Smith const PetscScalar *x,*v; 1974d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1975dbdd7285SBarry Smith PetscErrorCode ierr; 1976d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1977dbdd7285SBarry Smith PetscInt n,i,jrow,j; 1978dbdd7285SBarry Smith 1979dbdd7285SBarry Smith PetscFunctionBegin; 1980dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19813649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1982dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1983dbdd7285SBarry Smith idx = a->j; 1984dbdd7285SBarry Smith v = a->a; 1985dbdd7285SBarry Smith ii = a->i; 1986dbdd7285SBarry Smith 1987dbdd7285SBarry Smith for (i=0; i<m; i++) { 1988dbdd7285SBarry Smith jrow = ii[i]; 1989dbdd7285SBarry Smith n = ii[i+1] - jrow; 1990dbdd7285SBarry Smith sum1 = 0.0; 1991dbdd7285SBarry Smith sum2 = 0.0; 1992dbdd7285SBarry Smith sum3 = 0.0; 1993dbdd7285SBarry Smith sum4 = 0.0; 1994dbdd7285SBarry Smith sum5 = 0.0; 1995dbdd7285SBarry Smith sum6 = 0.0; 1996dbdd7285SBarry Smith sum7 = 0.0; 1997dbdd7285SBarry Smith sum8 = 0.0; 1998dbdd7285SBarry Smith sum9 = 0.0; 1999dbdd7285SBarry Smith sum10 = 0.0; 2000dbdd7285SBarry Smith sum11 = 0.0; 2001dbdd7285SBarry Smith for (j=0; j<n; j++) { 2002dbdd7285SBarry Smith sum1 += v[jrow]*x[11*idx[jrow]]; 2003dbdd7285SBarry Smith sum2 += v[jrow]*x[11*idx[jrow]+1]; 2004dbdd7285SBarry Smith sum3 += v[jrow]*x[11*idx[jrow]+2]; 2005dbdd7285SBarry Smith sum4 += v[jrow]*x[11*idx[jrow]+3]; 2006dbdd7285SBarry Smith sum5 += v[jrow]*x[11*idx[jrow]+4]; 2007dbdd7285SBarry Smith sum6 += v[jrow]*x[11*idx[jrow]+5]; 2008dbdd7285SBarry Smith sum7 += v[jrow]*x[11*idx[jrow]+6]; 2009dbdd7285SBarry Smith sum8 += v[jrow]*x[11*idx[jrow]+7]; 2010dbdd7285SBarry Smith sum9 += v[jrow]*x[11*idx[jrow]+8]; 2011dbdd7285SBarry Smith sum10 += v[jrow]*x[11*idx[jrow]+9]; 2012dbdd7285SBarry Smith sum11 += v[jrow]*x[11*idx[jrow]+10]; 2013dbdd7285SBarry Smith jrow++; 2014dbdd7285SBarry Smith } 2015dbdd7285SBarry Smith y[11*i] += sum1; 2016dbdd7285SBarry Smith y[11*i+1] += sum2; 2017dbdd7285SBarry Smith y[11*i+2] += sum3; 2018dbdd7285SBarry Smith y[11*i+3] += sum4; 2019dbdd7285SBarry Smith y[11*i+4] += sum5; 2020dbdd7285SBarry Smith y[11*i+5] += sum6; 2021dbdd7285SBarry Smith y[11*i+6] += sum7; 2022dbdd7285SBarry Smith y[11*i+7] += sum8; 2023dbdd7285SBarry Smith y[11*i+8] += sum9; 2024dbdd7285SBarry Smith y[11*i+9] += sum10; 2025dbdd7285SBarry Smith y[11*i+10] += sum11; 2026dbdd7285SBarry Smith } 2027dbdd7285SBarry Smith 2028dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20293649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2030dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2031dbdd7285SBarry Smith PetscFunctionReturn(0); 2032dbdd7285SBarry Smith } 2033dbdd7285SBarry Smith 2034dbdd7285SBarry Smith #undef __FUNCT__ 2035dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2036dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2037dbdd7285SBarry Smith { 2038dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2039dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2040d2840e09SBarry Smith const PetscScalar *x,*v; 2041d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2042dbdd7285SBarry Smith PetscErrorCode ierr; 2043d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2044d2840e09SBarry Smith PetscInt n,i; 2045dbdd7285SBarry Smith 2046dbdd7285SBarry Smith PetscFunctionBegin; 2047d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 20483649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2049dbdd7285SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2050dbdd7285SBarry Smith 2051dbdd7285SBarry Smith for (i=0; i<m; i++) { 2052dbdd7285SBarry Smith idx = a->j + a->i[i] ; 2053dbdd7285SBarry Smith v = a->a + a->i[i] ; 2054dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2055dbdd7285SBarry Smith alpha1 = x[11*i]; 2056dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2057dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2058dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2059dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2060dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2061dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2062dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2063dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2064dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2065dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2066dbdd7285SBarry Smith while (n-->0) { 2067dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2068dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2069dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2070dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2071dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2072dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2073dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2074dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2075dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2076dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2077dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2078dbdd7285SBarry Smith idx++; v++; 2079dbdd7285SBarry Smith } 2080dbdd7285SBarry Smith } 2081dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 20823649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2083dbdd7285SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2084dbdd7285SBarry Smith PetscFunctionReturn(0); 2085dbdd7285SBarry Smith } 2086dbdd7285SBarry Smith 2087dbdd7285SBarry Smith #undef __FUNCT__ 2088dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2089dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2090dbdd7285SBarry Smith { 2091dbdd7285SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2092dbdd7285SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2093d2840e09SBarry Smith const PetscScalar *x,*v; 2094d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2095dbdd7285SBarry Smith PetscErrorCode ierr; 2096d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2097d2840e09SBarry Smith PetscInt n,i; 2098dbdd7285SBarry Smith 2099dbdd7285SBarry Smith PetscFunctionBegin; 2100dbdd7285SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 21013649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2102dbdd7285SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2103dbdd7285SBarry Smith for (i=0; i<m; i++) { 2104dbdd7285SBarry Smith idx = a->j + a->i[i] ; 2105dbdd7285SBarry Smith v = a->a + a->i[i] ; 2106dbdd7285SBarry Smith n = a->i[i+1] - a->i[i]; 2107dbdd7285SBarry Smith alpha1 = x[11*i]; 2108dbdd7285SBarry Smith alpha2 = x[11*i+1]; 2109dbdd7285SBarry Smith alpha3 = x[11*i+2]; 2110dbdd7285SBarry Smith alpha4 = x[11*i+3]; 2111dbdd7285SBarry Smith alpha5 = x[11*i+4]; 2112dbdd7285SBarry Smith alpha6 = x[11*i+5]; 2113dbdd7285SBarry Smith alpha7 = x[11*i+6]; 2114dbdd7285SBarry Smith alpha8 = x[11*i+7]; 2115dbdd7285SBarry Smith alpha9 = x[11*i+8]; 2116dbdd7285SBarry Smith alpha10 = x[11*i+9]; 2117dbdd7285SBarry Smith alpha11 = x[11*i+10]; 2118dbdd7285SBarry Smith while (n-->0) { 2119dbdd7285SBarry Smith y[11*(*idx)] += alpha1*(*v); 2120dbdd7285SBarry Smith y[11*(*idx)+1] += alpha2*(*v); 2121dbdd7285SBarry Smith y[11*(*idx)+2] += alpha3*(*v); 2122dbdd7285SBarry Smith y[11*(*idx)+3] += alpha4*(*v); 2123dbdd7285SBarry Smith y[11*(*idx)+4] += alpha5*(*v); 2124dbdd7285SBarry Smith y[11*(*idx)+5] += alpha6*(*v); 2125dbdd7285SBarry Smith y[11*(*idx)+6] += alpha7*(*v); 2126dbdd7285SBarry Smith y[11*(*idx)+7] += alpha8*(*v); 2127dbdd7285SBarry Smith y[11*(*idx)+8] += alpha9*(*v); 2128dbdd7285SBarry Smith y[11*(*idx)+9] += alpha10*(*v); 2129dbdd7285SBarry Smith y[11*(*idx)+10] += alpha11*(*v); 2130dbdd7285SBarry Smith idx++; v++; 2131dbdd7285SBarry Smith } 2132dbdd7285SBarry Smith } 2133dbdd7285SBarry Smith ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 21343649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2135dbdd7285SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2136dbdd7285SBarry Smith PetscFunctionReturn(0); 2137dbdd7285SBarry Smith } 2138dbdd7285SBarry Smith 2139dbdd7285SBarry Smith 2140dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 2141dbdd7285SBarry Smith #undef __FUNCT__ 21422f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 2143dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 21442f7816d4SBarry Smith { 21452f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 21462f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2147d2840e09SBarry Smith const PetscScalar *x,*v; 2148d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 21492f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2150dfbe8321SBarry Smith PetscErrorCode ierr; 2151d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2152d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 21532f7816d4SBarry Smith 21542f7816d4SBarry Smith PetscFunctionBegin; 21553649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 21561ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 21572f7816d4SBarry Smith idx = a->j; 21582f7816d4SBarry Smith v = a->a; 21592f7816d4SBarry Smith ii = a->i; 21602f7816d4SBarry Smith 21612f7816d4SBarry Smith for (i=0; i<m; i++) { 21622f7816d4SBarry Smith jrow = ii[i]; 21632f7816d4SBarry Smith n = ii[i+1] - jrow; 21642f7816d4SBarry Smith sum1 = 0.0; 21652f7816d4SBarry Smith sum2 = 0.0; 21662f7816d4SBarry Smith sum3 = 0.0; 21672f7816d4SBarry Smith sum4 = 0.0; 21682f7816d4SBarry Smith sum5 = 0.0; 21692f7816d4SBarry Smith sum6 = 0.0; 21702f7816d4SBarry Smith sum7 = 0.0; 21712f7816d4SBarry Smith sum8 = 0.0; 21722f7816d4SBarry Smith sum9 = 0.0; 21732f7816d4SBarry Smith sum10 = 0.0; 21742f7816d4SBarry Smith sum11 = 0.0; 21752f7816d4SBarry Smith sum12 = 0.0; 21762f7816d4SBarry Smith sum13 = 0.0; 21772f7816d4SBarry Smith sum14 = 0.0; 21782f7816d4SBarry Smith sum15 = 0.0; 21792f7816d4SBarry Smith sum16 = 0.0; 2180b3c51c66SMatthew Knepley nonzerorow += (n>0); 21812f7816d4SBarry Smith for (j=0; j<n; j++) { 21822f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 21832f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 21842f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 21852f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 21862f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 21872f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 21882f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 21892f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 21902f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 21912f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 21922f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 21932f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 21942f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 21952f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 21962f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 21972f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 21982f7816d4SBarry Smith jrow++; 21992f7816d4SBarry Smith } 22002f7816d4SBarry Smith y[16*i] = sum1; 22012f7816d4SBarry Smith y[16*i+1] = sum2; 22022f7816d4SBarry Smith y[16*i+2] = sum3; 22032f7816d4SBarry Smith y[16*i+3] = sum4; 22042f7816d4SBarry Smith y[16*i+4] = sum5; 22052f7816d4SBarry Smith y[16*i+5] = sum6; 22062f7816d4SBarry Smith y[16*i+6] = sum7; 22072f7816d4SBarry Smith y[16*i+7] = sum8; 22082f7816d4SBarry Smith y[16*i+8] = sum9; 22092f7816d4SBarry Smith y[16*i+9] = sum10; 22102f7816d4SBarry Smith y[16*i+10] = sum11; 22112f7816d4SBarry Smith y[16*i+11] = sum12; 22122f7816d4SBarry Smith y[16*i+12] = sum13; 22132f7816d4SBarry Smith y[16*i+13] = sum14; 22142f7816d4SBarry Smith y[16*i+14] = sum15; 22152f7816d4SBarry Smith y[16*i+15] = sum16; 22162f7816d4SBarry Smith } 22172f7816d4SBarry Smith 2218dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 22193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22201ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 22212f7816d4SBarry Smith PetscFunctionReturn(0); 22222f7816d4SBarry Smith } 22232f7816d4SBarry Smith 22242f7816d4SBarry Smith #undef __FUNCT__ 22252f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2226dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 22272f7816d4SBarry Smith { 22282f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22292f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2230d2840e09SBarry Smith const PetscScalar *x,*v; 2231d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 22322f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2233dfbe8321SBarry Smith PetscErrorCode ierr; 2234d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2235d2840e09SBarry Smith PetscInt n,i; 22362f7816d4SBarry Smith 22372f7816d4SBarry Smith PetscFunctionBegin; 2238d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 22393649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 22401ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2241bfec09a0SHong Zhang 22422f7816d4SBarry Smith for (i=0; i<m; i++) { 2243bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2244bfec09a0SHong Zhang v = a->a + a->i[i] ; 22452f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 22462f7816d4SBarry Smith alpha1 = x[16*i]; 22472f7816d4SBarry Smith alpha2 = x[16*i+1]; 22482f7816d4SBarry Smith alpha3 = x[16*i+2]; 22492f7816d4SBarry Smith alpha4 = x[16*i+3]; 22502f7816d4SBarry Smith alpha5 = x[16*i+4]; 22512f7816d4SBarry Smith alpha6 = x[16*i+5]; 22522f7816d4SBarry Smith alpha7 = x[16*i+6]; 22532f7816d4SBarry Smith alpha8 = x[16*i+7]; 22542f7816d4SBarry Smith alpha9 = x[16*i+8]; 22552f7816d4SBarry Smith alpha10 = x[16*i+9]; 22562f7816d4SBarry Smith alpha11 = x[16*i+10]; 22572f7816d4SBarry Smith alpha12 = x[16*i+11]; 22582f7816d4SBarry Smith alpha13 = x[16*i+12]; 22592f7816d4SBarry Smith alpha14 = x[16*i+13]; 22602f7816d4SBarry Smith alpha15 = x[16*i+14]; 22612f7816d4SBarry Smith alpha16 = x[16*i+15]; 22622f7816d4SBarry Smith while (n-->0) { 22632f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 22642f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 22652f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 22662f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 22672f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 22682f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 22692f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 22702f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 22712f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 22722f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 22732f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 22742f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 22752f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 22762f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 22772f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 22782f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 22792f7816d4SBarry Smith idx++; v++; 22802f7816d4SBarry Smith } 22812f7816d4SBarry Smith } 2282dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 22833649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 22841ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 22852f7816d4SBarry Smith PetscFunctionReturn(0); 22862f7816d4SBarry Smith } 22872f7816d4SBarry Smith 22882f7816d4SBarry Smith #undef __FUNCT__ 22892f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2290dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 22912f7816d4SBarry Smith { 22922f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 22932f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2294d2840e09SBarry Smith const PetscScalar *x,*v; 2295d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 22962f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2297dfbe8321SBarry Smith PetscErrorCode ierr; 2298d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2299b24ad042SBarry Smith PetscInt n,i,jrow,j; 23002f7816d4SBarry Smith 23012f7816d4SBarry Smith PetscFunctionBegin; 23022f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 23033649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 23041ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 23052f7816d4SBarry Smith idx = a->j; 23062f7816d4SBarry Smith v = a->a; 23072f7816d4SBarry Smith ii = a->i; 23082f7816d4SBarry Smith 23092f7816d4SBarry Smith for (i=0; i<m; i++) { 23102f7816d4SBarry Smith jrow = ii[i]; 23112f7816d4SBarry Smith n = ii[i+1] - jrow; 23122f7816d4SBarry Smith sum1 = 0.0; 23132f7816d4SBarry Smith sum2 = 0.0; 23142f7816d4SBarry Smith sum3 = 0.0; 23152f7816d4SBarry Smith sum4 = 0.0; 23162f7816d4SBarry Smith sum5 = 0.0; 23172f7816d4SBarry Smith sum6 = 0.0; 23182f7816d4SBarry Smith sum7 = 0.0; 23192f7816d4SBarry Smith sum8 = 0.0; 23202f7816d4SBarry Smith sum9 = 0.0; 23212f7816d4SBarry Smith sum10 = 0.0; 23222f7816d4SBarry Smith sum11 = 0.0; 23232f7816d4SBarry Smith sum12 = 0.0; 23242f7816d4SBarry Smith sum13 = 0.0; 23252f7816d4SBarry Smith sum14 = 0.0; 23262f7816d4SBarry Smith sum15 = 0.0; 23272f7816d4SBarry Smith sum16 = 0.0; 23282f7816d4SBarry Smith for (j=0; j<n; j++) { 23292f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 23302f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 23312f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 23322f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 23332f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 23342f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 23352f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 23362f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 23372f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 23382f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 23392f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 23402f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 23412f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 23422f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 23432f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 23442f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 23452f7816d4SBarry Smith jrow++; 23462f7816d4SBarry Smith } 23472f7816d4SBarry Smith y[16*i] += sum1; 23482f7816d4SBarry Smith y[16*i+1] += sum2; 23492f7816d4SBarry Smith y[16*i+2] += sum3; 23502f7816d4SBarry Smith y[16*i+3] += sum4; 23512f7816d4SBarry Smith y[16*i+4] += sum5; 23522f7816d4SBarry Smith y[16*i+5] += sum6; 23532f7816d4SBarry Smith y[16*i+6] += sum7; 23542f7816d4SBarry Smith y[16*i+7] += sum8; 23552f7816d4SBarry Smith y[16*i+8] += sum9; 23562f7816d4SBarry Smith y[16*i+9] += sum10; 23572f7816d4SBarry Smith y[16*i+10] += sum11; 23582f7816d4SBarry Smith y[16*i+11] += sum12; 23592f7816d4SBarry Smith y[16*i+12] += sum13; 23602f7816d4SBarry Smith y[16*i+13] += sum14; 23612f7816d4SBarry Smith y[16*i+14] += sum15; 23622f7816d4SBarry Smith y[16*i+15] += sum16; 23632f7816d4SBarry Smith } 23642f7816d4SBarry Smith 2365dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 23663649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 23671ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 23682f7816d4SBarry Smith PetscFunctionReturn(0); 23692f7816d4SBarry Smith } 23702f7816d4SBarry Smith 23712f7816d4SBarry Smith #undef __FUNCT__ 23722f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2373dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 23742f7816d4SBarry Smith { 23752f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 23762f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2377d2840e09SBarry Smith const PetscScalar *x,*v; 2378d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 23792f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2380dfbe8321SBarry Smith PetscErrorCode ierr; 2381d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2382d2840e09SBarry Smith PetscInt n,i; 23832f7816d4SBarry Smith 23842f7816d4SBarry Smith PetscFunctionBegin; 23852f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 23863649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 23871ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 23882f7816d4SBarry Smith for (i=0; i<m; i++) { 2389bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2390bfec09a0SHong Zhang v = a->a + a->i[i] ; 23912f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 23922f7816d4SBarry Smith alpha1 = x[16*i]; 23932f7816d4SBarry Smith alpha2 = x[16*i+1]; 23942f7816d4SBarry Smith alpha3 = x[16*i+2]; 23952f7816d4SBarry Smith alpha4 = x[16*i+3]; 23962f7816d4SBarry Smith alpha5 = x[16*i+4]; 23972f7816d4SBarry Smith alpha6 = x[16*i+5]; 23982f7816d4SBarry Smith alpha7 = x[16*i+6]; 23992f7816d4SBarry Smith alpha8 = x[16*i+7]; 24002f7816d4SBarry Smith alpha9 = x[16*i+8]; 24012f7816d4SBarry Smith alpha10 = x[16*i+9]; 24022f7816d4SBarry Smith alpha11 = x[16*i+10]; 24032f7816d4SBarry Smith alpha12 = x[16*i+11]; 24042f7816d4SBarry Smith alpha13 = x[16*i+12]; 24052f7816d4SBarry Smith alpha14 = x[16*i+13]; 24062f7816d4SBarry Smith alpha15 = x[16*i+14]; 24072f7816d4SBarry Smith alpha16 = x[16*i+15]; 24082f7816d4SBarry Smith while (n-->0) { 24092f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 24102f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 24112f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 24122f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 24132f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 24142f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 24152f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 24162f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 24172f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 24182f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 24192f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 24202f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 24212f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 24222f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 24232f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 24242f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 24252f7816d4SBarry Smith idx++; v++; 24262f7816d4SBarry Smith } 24272f7816d4SBarry Smith } 2428dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 24293649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 24301ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 243166ed3db0SBarry Smith PetscFunctionReturn(0); 243266ed3db0SBarry Smith } 243366ed3db0SBarry Smith 2434ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2435ed1c418cSBarry Smith #undef __FUNCT__ 2436ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18" 2437ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2438ed1c418cSBarry Smith { 2439ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2440ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2441d2840e09SBarry Smith const PetscScalar *x,*v; 2442d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2443ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2444ed1c418cSBarry Smith PetscErrorCode ierr; 2445d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2446d2840e09SBarry Smith PetscInt nonzerorow=0,n,i,jrow,j; 2447ed1c418cSBarry Smith 2448ed1c418cSBarry Smith PetscFunctionBegin; 24493649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2450ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2451ed1c418cSBarry Smith idx = a->j; 2452ed1c418cSBarry Smith v = a->a; 2453ed1c418cSBarry Smith ii = a->i; 2454ed1c418cSBarry Smith 2455ed1c418cSBarry Smith for (i=0; i<m; i++) { 2456ed1c418cSBarry Smith jrow = ii[i]; 2457ed1c418cSBarry Smith n = ii[i+1] - jrow; 2458ed1c418cSBarry Smith sum1 = 0.0; 2459ed1c418cSBarry Smith sum2 = 0.0; 2460ed1c418cSBarry Smith sum3 = 0.0; 2461ed1c418cSBarry Smith sum4 = 0.0; 2462ed1c418cSBarry Smith sum5 = 0.0; 2463ed1c418cSBarry Smith sum6 = 0.0; 2464ed1c418cSBarry Smith sum7 = 0.0; 2465ed1c418cSBarry Smith sum8 = 0.0; 2466ed1c418cSBarry Smith sum9 = 0.0; 2467ed1c418cSBarry Smith sum10 = 0.0; 2468ed1c418cSBarry Smith sum11 = 0.0; 2469ed1c418cSBarry Smith sum12 = 0.0; 2470ed1c418cSBarry Smith sum13 = 0.0; 2471ed1c418cSBarry Smith sum14 = 0.0; 2472ed1c418cSBarry Smith sum15 = 0.0; 2473ed1c418cSBarry Smith sum16 = 0.0; 2474ed1c418cSBarry Smith sum17 = 0.0; 2475ed1c418cSBarry Smith sum18 = 0.0; 2476ed1c418cSBarry Smith nonzerorow += (n>0); 2477ed1c418cSBarry Smith for (j=0; j<n; j++) { 2478ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2479ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2480ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2481ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2482ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2483ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2484ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2485ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2486ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2487ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2488ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2489ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2490ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2491ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2492ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2493ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2494ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2495ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2496ed1c418cSBarry Smith jrow++; 2497ed1c418cSBarry Smith } 2498ed1c418cSBarry Smith y[18*i] = sum1; 2499ed1c418cSBarry Smith y[18*i+1] = sum2; 2500ed1c418cSBarry Smith y[18*i+2] = sum3; 2501ed1c418cSBarry Smith y[18*i+3] = sum4; 2502ed1c418cSBarry Smith y[18*i+4] = sum5; 2503ed1c418cSBarry Smith y[18*i+5] = sum6; 2504ed1c418cSBarry Smith y[18*i+6] = sum7; 2505ed1c418cSBarry Smith y[18*i+7] = sum8; 2506ed1c418cSBarry Smith y[18*i+8] = sum9; 2507ed1c418cSBarry Smith y[18*i+9] = sum10; 2508ed1c418cSBarry Smith y[18*i+10] = sum11; 2509ed1c418cSBarry Smith y[18*i+11] = sum12; 2510ed1c418cSBarry Smith y[18*i+12] = sum13; 2511ed1c418cSBarry Smith y[18*i+13] = sum14; 2512ed1c418cSBarry Smith y[18*i+14] = sum15; 2513ed1c418cSBarry Smith y[18*i+15] = sum16; 2514ed1c418cSBarry Smith y[18*i+16] = sum17; 2515ed1c418cSBarry Smith y[18*i+17] = sum18; 2516ed1c418cSBarry Smith } 2517ed1c418cSBarry Smith 2518dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 25193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2520ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2521ed1c418cSBarry Smith PetscFunctionReturn(0); 2522ed1c418cSBarry Smith } 2523ed1c418cSBarry Smith 2524ed1c418cSBarry Smith #undef __FUNCT__ 2525ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2526ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2527ed1c418cSBarry Smith { 2528ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2529ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2530d2840e09SBarry Smith const PetscScalar *x,*v; 2531d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2532ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2533ed1c418cSBarry Smith PetscErrorCode ierr; 2534d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2535d2840e09SBarry Smith PetscInt n,i; 2536ed1c418cSBarry Smith 2537ed1c418cSBarry Smith PetscFunctionBegin; 2538d2840e09SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 25393649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2540ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2541ed1c418cSBarry Smith 2542ed1c418cSBarry Smith for (i=0; i<m; i++) { 2543ed1c418cSBarry Smith idx = a->j + a->i[i] ; 2544ed1c418cSBarry Smith v = a->a + a->i[i] ; 2545ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2546ed1c418cSBarry Smith alpha1 = x[18*i]; 2547ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2548ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2549ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2550ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2551ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2552ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2553ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2554ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2555ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2556ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2557ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2558ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2559ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2560ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2561ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2562ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2563ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2564ed1c418cSBarry Smith while (n-->0) { 2565ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2566ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2567ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2568ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2569ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2570ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2571ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2572ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2573ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2574ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2575ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2576ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2577ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2578ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2579ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2580ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2581ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2582ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2583ed1c418cSBarry Smith idx++; v++; 2584ed1c418cSBarry Smith } 2585ed1c418cSBarry Smith } 2586dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 25873649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2588ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2589ed1c418cSBarry Smith PetscFunctionReturn(0); 2590ed1c418cSBarry Smith } 2591ed1c418cSBarry Smith 2592ed1c418cSBarry Smith #undef __FUNCT__ 2593ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2594ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2595ed1c418cSBarry Smith { 2596ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2597ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2598d2840e09SBarry Smith const PetscScalar *x,*v; 2599d2840e09SBarry Smith PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2600ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2601ed1c418cSBarry Smith PetscErrorCode ierr; 2602d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2603ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2604ed1c418cSBarry Smith 2605ed1c418cSBarry Smith PetscFunctionBegin; 2606ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 26073649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2608ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2609ed1c418cSBarry Smith idx = a->j; 2610ed1c418cSBarry Smith v = a->a; 2611ed1c418cSBarry Smith ii = a->i; 2612ed1c418cSBarry Smith 2613ed1c418cSBarry Smith for (i=0; i<m; i++) { 2614ed1c418cSBarry Smith jrow = ii[i]; 2615ed1c418cSBarry Smith n = ii[i+1] - jrow; 2616ed1c418cSBarry Smith sum1 = 0.0; 2617ed1c418cSBarry Smith sum2 = 0.0; 2618ed1c418cSBarry Smith sum3 = 0.0; 2619ed1c418cSBarry Smith sum4 = 0.0; 2620ed1c418cSBarry Smith sum5 = 0.0; 2621ed1c418cSBarry Smith sum6 = 0.0; 2622ed1c418cSBarry Smith sum7 = 0.0; 2623ed1c418cSBarry Smith sum8 = 0.0; 2624ed1c418cSBarry Smith sum9 = 0.0; 2625ed1c418cSBarry Smith sum10 = 0.0; 2626ed1c418cSBarry Smith sum11 = 0.0; 2627ed1c418cSBarry Smith sum12 = 0.0; 2628ed1c418cSBarry Smith sum13 = 0.0; 2629ed1c418cSBarry Smith sum14 = 0.0; 2630ed1c418cSBarry Smith sum15 = 0.0; 2631ed1c418cSBarry Smith sum16 = 0.0; 2632ed1c418cSBarry Smith sum17 = 0.0; 2633ed1c418cSBarry Smith sum18 = 0.0; 2634ed1c418cSBarry Smith for (j=0; j<n; j++) { 2635ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2636ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2637ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2638ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2639ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2640ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2641ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2642ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2643ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2644ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2645ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2646ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2647ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2648ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2649ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2650ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2651ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2652ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2653ed1c418cSBarry Smith jrow++; 2654ed1c418cSBarry Smith } 2655ed1c418cSBarry Smith y[18*i] += sum1; 2656ed1c418cSBarry Smith y[18*i+1] += sum2; 2657ed1c418cSBarry Smith y[18*i+2] += sum3; 2658ed1c418cSBarry Smith y[18*i+3] += sum4; 2659ed1c418cSBarry Smith y[18*i+4] += sum5; 2660ed1c418cSBarry Smith y[18*i+5] += sum6; 2661ed1c418cSBarry Smith y[18*i+6] += sum7; 2662ed1c418cSBarry Smith y[18*i+7] += sum8; 2663ed1c418cSBarry Smith y[18*i+8] += sum9; 2664ed1c418cSBarry Smith y[18*i+9] += sum10; 2665ed1c418cSBarry Smith y[18*i+10] += sum11; 2666ed1c418cSBarry Smith y[18*i+11] += sum12; 2667ed1c418cSBarry Smith y[18*i+12] += sum13; 2668ed1c418cSBarry Smith y[18*i+13] += sum14; 2669ed1c418cSBarry Smith y[18*i+14] += sum15; 2670ed1c418cSBarry Smith y[18*i+15] += sum16; 2671ed1c418cSBarry Smith y[18*i+16] += sum17; 2672ed1c418cSBarry Smith y[18*i+17] += sum18; 2673ed1c418cSBarry Smith } 2674ed1c418cSBarry Smith 2675dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 26763649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2677ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2678ed1c418cSBarry Smith PetscFunctionReturn(0); 2679ed1c418cSBarry Smith } 2680ed1c418cSBarry Smith 2681ed1c418cSBarry Smith #undef __FUNCT__ 2682ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2683ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2684ed1c418cSBarry Smith { 2685ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2686ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2687d2840e09SBarry Smith const PetscScalar *x,*v; 2688d2840e09SBarry Smith PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2689ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2690ed1c418cSBarry Smith PetscErrorCode ierr; 2691d2840e09SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx; 2692d2840e09SBarry Smith PetscInt n,i; 2693ed1c418cSBarry Smith 2694ed1c418cSBarry Smith PetscFunctionBegin; 2695ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 26963649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2697ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2698ed1c418cSBarry Smith for (i=0; i<m; i++) { 2699ed1c418cSBarry Smith idx = a->j + a->i[i] ; 2700ed1c418cSBarry Smith v = a->a + a->i[i] ; 2701ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2702ed1c418cSBarry Smith alpha1 = x[18*i]; 2703ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2704ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2705ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2706ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2707ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2708ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2709ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2710ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2711ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2712ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2713ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2714ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2715ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2716ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2717ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2718ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2719ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2720ed1c418cSBarry Smith while (n-->0) { 2721ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2722ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2723ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2724ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2725ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2726ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2727ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2728ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2729ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2730ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2731ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2732ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2733ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2734ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2735ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2736ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2737ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2738ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2739ed1c418cSBarry Smith idx++; v++; 2740ed1c418cSBarry Smith } 2741ed1c418cSBarry Smith } 2742dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 27433649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2744ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2745ed1c418cSBarry Smith PetscFunctionReturn(0); 2746ed1c418cSBarry Smith } 2747ed1c418cSBarry Smith 27486861f193SBarry Smith #undef __FUNCT__ 27496861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N" 27506861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 27516861f193SBarry Smith { 27526861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27536861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27546861f193SBarry Smith const PetscScalar *x,*v; 27556861f193SBarry Smith PetscScalar *y,*sums; 27566861f193SBarry Smith PetscErrorCode ierr; 27576861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27586861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 27596861f193SBarry Smith 27606861f193SBarry Smith PetscFunctionBegin; 27613649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 27626861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 27636861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 27646861f193SBarry Smith idx = a->j; 27656861f193SBarry Smith v = a->a; 27666861f193SBarry Smith ii = a->i; 27676861f193SBarry Smith 27686861f193SBarry Smith for (i=0; i<m; i++) { 27696861f193SBarry Smith jrow = ii[i]; 27706861f193SBarry Smith n = ii[i+1] - jrow; 27716861f193SBarry Smith sums = y + dof*i; 27726861f193SBarry Smith for (j=0; j<n; j++) { 27736861f193SBarry Smith for (k=0; k<dof; k++) { 27746861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 27756861f193SBarry Smith } 27766861f193SBarry Smith jrow++; 27776861f193SBarry Smith } 27786861f193SBarry Smith } 27796861f193SBarry Smith 27806861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 27813649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 27826861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 27836861f193SBarry Smith PetscFunctionReturn(0); 27846861f193SBarry Smith } 27856861f193SBarry Smith 27866861f193SBarry Smith #undef __FUNCT__ 27876861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 27886861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 27896861f193SBarry Smith { 27906861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 27916861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27926861f193SBarry Smith const PetscScalar *x,*v; 27936861f193SBarry Smith PetscScalar *y,*sums; 27946861f193SBarry Smith PetscErrorCode ierr; 27956861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 27966861f193SBarry Smith PetscInt n,i,jrow,j,dof = b->dof,k; 27976861f193SBarry Smith 27986861f193SBarry Smith PetscFunctionBegin; 27996861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 28003649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 28016861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 28026861f193SBarry Smith idx = a->j; 28036861f193SBarry Smith v = a->a; 28046861f193SBarry Smith ii = a->i; 28056861f193SBarry Smith 28066861f193SBarry Smith for (i=0; i<m; i++) { 28076861f193SBarry Smith jrow = ii[i]; 28086861f193SBarry Smith n = ii[i+1] - jrow; 28096861f193SBarry Smith sums = y + dof*i; 28106861f193SBarry Smith for (j=0; j<n; j++) { 28116861f193SBarry Smith for (k=0; k<dof; k++) { 28126861f193SBarry Smith sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 28136861f193SBarry Smith } 28146861f193SBarry Smith jrow++; 28156861f193SBarry Smith } 28166861f193SBarry Smith } 28176861f193SBarry Smith 28186861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28193649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28206861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 28216861f193SBarry Smith PetscFunctionReturn(0); 28226861f193SBarry Smith } 28236861f193SBarry Smith 28246861f193SBarry Smith #undef __FUNCT__ 28256861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 28266861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 28276861f193SBarry Smith { 28286861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 28296861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28306861f193SBarry Smith const PetscScalar *x,*v,*alpha; 28316861f193SBarry Smith PetscScalar *y; 28326861f193SBarry Smith PetscErrorCode ierr; 28336861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 28346861f193SBarry Smith PetscInt n,i,k; 28356861f193SBarry Smith 28366861f193SBarry Smith PetscFunctionBegin; 28373649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 28386861f193SBarry Smith ierr = VecSet(yy,0.0);CHKERRQ(ierr); 28396861f193SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 28406861f193SBarry Smith for (i=0; i<m; i++) { 28416861f193SBarry Smith idx = a->j + a->i[i] ; 28426861f193SBarry Smith v = a->a + a->i[i] ; 28436861f193SBarry Smith n = a->i[i+1] - a->i[i]; 28446861f193SBarry Smith alpha = x + dof*i; 28456861f193SBarry Smith while (n-->0) { 28466861f193SBarry Smith for (k=0; k<dof; k++) { 28476861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 28486861f193SBarry Smith } 284983ce7366SBarry Smith idx++; v++; 28506861f193SBarry Smith } 28516861f193SBarry Smith } 28526861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28533649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28546861f193SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 28556861f193SBarry Smith PetscFunctionReturn(0); 28566861f193SBarry Smith } 28576861f193SBarry Smith 28586861f193SBarry Smith #undef __FUNCT__ 28596861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 28606861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 28616861f193SBarry Smith { 28626861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 28636861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 28646861f193SBarry Smith const PetscScalar *x,*v,*alpha; 28656861f193SBarry Smith PetscScalar *y; 28666861f193SBarry Smith PetscErrorCode ierr; 28676861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 28686861f193SBarry Smith PetscInt n,i,k; 28696861f193SBarry Smith 28706861f193SBarry Smith PetscFunctionBegin; 28716861f193SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 28723649974fSBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 28736861f193SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 28746861f193SBarry Smith for (i=0; i<m; i++) { 28756861f193SBarry Smith idx = a->j + a->i[i] ; 28766861f193SBarry Smith v = a->a + a->i[i] ; 28776861f193SBarry Smith n = a->i[i+1] - a->i[i]; 28786861f193SBarry Smith alpha = x + dof*i; 28796861f193SBarry Smith while (n-->0) { 28806861f193SBarry Smith for (k=0; k<dof; k++) { 28816861f193SBarry Smith y[dof*(*idx)+k] += alpha[k]*(*v); 28826861f193SBarry Smith } 288383ce7366SBarry Smith idx++; v++; 28846861f193SBarry Smith } 28856861f193SBarry Smith } 28866861f193SBarry Smith ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 28873649974fSBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 28886861f193SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 28896861f193SBarry Smith PetscFunctionReturn(0); 28906861f193SBarry Smith } 28916861f193SBarry Smith 2892f4a53059SBarry Smith /*===================================================================================*/ 28934a2ae208SSatish Balay #undef __FUNCT__ 28944a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2895dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2896f4a53059SBarry Smith { 28974cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2898dfbe8321SBarry Smith PetscErrorCode ierr; 2899f4a53059SBarry Smith 2900b24ad042SBarry Smith PetscFunctionBegin; 2901f4a53059SBarry Smith /* start the scatter */ 2902ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 29034cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2904ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 29054cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2906f4a53059SBarry Smith PetscFunctionReturn(0); 2907f4a53059SBarry Smith } 2908f4a53059SBarry Smith 29094a2ae208SSatish Balay #undef __FUNCT__ 29104a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2911dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 29124cb9d9b8SBarry Smith { 29134cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2914dfbe8321SBarry Smith PetscErrorCode ierr; 2915b24ad042SBarry Smith 29164cb9d9b8SBarry Smith PetscFunctionBegin; 29174cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 29184cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2919ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2920ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 29214cb9d9b8SBarry Smith PetscFunctionReturn(0); 29224cb9d9b8SBarry Smith } 29234cb9d9b8SBarry Smith 29244a2ae208SSatish Balay #undef __FUNCT__ 29254a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2926dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 29274cb9d9b8SBarry Smith { 29284cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2929dfbe8321SBarry Smith PetscErrorCode ierr; 29304cb9d9b8SBarry Smith 2931b24ad042SBarry Smith PetscFunctionBegin; 29324cb9d9b8SBarry Smith /* start the scatter */ 2933ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2934d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2935ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2936717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 29374cb9d9b8SBarry Smith PetscFunctionReturn(0); 29384cb9d9b8SBarry Smith } 29394cb9d9b8SBarry Smith 29404a2ae208SSatish Balay #undef __FUNCT__ 29414a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2942dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 29434cb9d9b8SBarry Smith { 29444cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2945dfbe8321SBarry Smith PetscErrorCode ierr; 2946b24ad042SBarry Smith 29474cb9d9b8SBarry Smith PetscFunctionBegin; 29484cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2949ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2950d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2951ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 29524cb9d9b8SBarry Smith PetscFunctionReturn(0); 29534cb9d9b8SBarry Smith } 29544cb9d9b8SBarry Smith 295595ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 29569c4fc4c7SBarry Smith #undef __FUNCT__ 29577ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 29587ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 29597ba1a0bfSKris Buschelman { 29607ba1a0bfSKris Buschelman /* This routine requires testing -- but it's getting better. */ 29617ba1a0bfSKris Buschelman PetscErrorCode ierr; 2962a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 29637ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 29647ba1a0bfSKris Buschelman Mat P=pp->AIJ; 29657ba1a0bfSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2966d2840e09SBarry Smith PetscInt *pti,*ptj,*ptJ; 29677ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2968d2840e09SBarry Smith const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2969d2840e09SBarry Smith PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 29707ba1a0bfSKris Buschelman MatScalar *ca; 2971d2840e09SBarry Smith const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 29727ba1a0bfSKris Buschelman 29737ba1a0bfSKris Buschelman PetscFunctionBegin; 29747ba1a0bfSKris Buschelman /* Start timer */ 29757ba1a0bfSKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 29767ba1a0bfSKris Buschelman 29777ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 29787ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 29797ba1a0bfSKris Buschelman 29807ba1a0bfSKris Buschelman cn = pn*ppdof; 29817ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 29827ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 29837ba1a0bfSKris Buschelman ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 29847ba1a0bfSKris Buschelman ci[0] = 0; 29857ba1a0bfSKris Buschelman 29867ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 298774ed9c26SBarry Smith ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 2988c43dabc9SBarry Smith ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2989c43dabc9SBarry Smith ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 29907ba1a0bfSKris Buschelman 29917ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 29927ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 29937ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2994a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 29957ba1a0bfSKris Buschelman current_space = free_space; 29967ba1a0bfSKris Buschelman 29977ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 29987ba1a0bfSKris Buschelman for (i=0;i<pn;i++) { 29997ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 30007ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 30017ba1a0bfSKris Buschelman for (dof=0;dof<ppdof;dof++) { 30027ba1a0bfSKris Buschelman ptanzi = 0; 30037ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 30047ba1a0bfSKris Buschelman for (j=0;j<ptnzi;j++) { 30057ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 30067ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 30077ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 30087ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 30097ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 30107ba1a0bfSKris Buschelman for (k=0;k<anzj;k++) { 30117ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 30127ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 30137ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 30147ba1a0bfSKris Buschelman } 30157ba1a0bfSKris Buschelman } 30167ba1a0bfSKris Buschelman } 30177ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 30187ba1a0bfSKris Buschelman ptaj = ptasparserow; 30197ba1a0bfSKris Buschelman cnzi = 0; 30207ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 30217ba1a0bfSKris Buschelman /* Get offset within block of P */ 30227ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 30237ba1a0bfSKris Buschelman /* Get block row of P */ 30247ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 30257ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 30267ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 30277ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 30287ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 30297ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 30307ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 30317ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 30327ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 30337ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 30347ba1a0bfSKris Buschelman } 30357ba1a0bfSKris Buschelman } 30367ba1a0bfSKris Buschelman } 30377ba1a0bfSKris Buschelman 30387ba1a0bfSKris Buschelman /* sort sparserow */ 30397ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 30407ba1a0bfSKris Buschelman 30417ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 30427ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 30437ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 30444238b7adSHong Zhang ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 30457ba1a0bfSKris Buschelman } 30467ba1a0bfSKris Buschelman 30477ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 30487ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 30497ba1a0bfSKris Buschelman current_space->array += cnzi; 30507ba1a0bfSKris Buschelman current_space->local_used += cnzi; 30517ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 30527ba1a0bfSKris Buschelman 30537ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 30547ba1a0bfSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 30557ba1a0bfSKris Buschelman } 30567ba1a0bfSKris Buschelman for (j=0;j<cnzi;j++) { 30577ba1a0bfSKris Buschelman denserow[sparserow[j]] = 0; 30587ba1a0bfSKris Buschelman } 30597ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 30607ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 30617ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 30627ba1a0bfSKris Buschelman } 30637ba1a0bfSKris Buschelman } 30647ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 30657ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 30667ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 30677ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3068a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 306974ed9c26SBarry Smith ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 30707ba1a0bfSKris Buschelman 30717ba1a0bfSKris Buschelman /* Allocate space for ca */ 30727ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 30737ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 30747ba1a0bfSKris Buschelman 30757ba1a0bfSKris Buschelman /* put together the new matrix */ 30767adad957SLisandro Dalcin ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 30777ba1a0bfSKris Buschelman 30787ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 30797ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 30807ba1a0bfSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 3081e6b907acSBarry Smith c->free_a = PETSC_TRUE; 3082e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 30837ba1a0bfSKris Buschelman c->nonew = 0; 30847ba1a0bfSKris Buschelman 30857ba1a0bfSKris Buschelman /* Clean up. */ 30867ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 30877ba1a0bfSKris Buschelman 30887ba1a0bfSKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 30897ba1a0bfSKris Buschelman PetscFunctionReturn(0); 30907ba1a0bfSKris Buschelman } 30917ba1a0bfSKris Buschelman 30927ba1a0bfSKris Buschelman #undef __FUNCT__ 30937ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 30947ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 30957ba1a0bfSKris Buschelman { 30967ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 30977ba1a0bfSKris Buschelman PetscErrorCode ierr; 30987ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 30997ba1a0bfSKris Buschelman Mat P=pp->AIJ; 31007ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 31017ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 31027ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 3103d2840e09SBarry Smith const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 3104d2840e09SBarry Smith const PetscInt *ci=c->i,*cj=c->j,*cjj; 3105d2840e09SBarry Smith const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3106d2840e09SBarry Smith PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3107d2840e09SBarry Smith const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj; 3108d2840e09SBarry Smith MatScalar *ca=c->a,*caj,*apa; 31097ba1a0bfSKris Buschelman 31107ba1a0bfSKris Buschelman PetscFunctionBegin; 31117ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 311274ed9c26SBarry Smith ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 311374ed9c26SBarry Smith ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 311474ed9c26SBarry Smith ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 311574ed9c26SBarry Smith ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 31167ba1a0bfSKris Buschelman 31177ba1a0bfSKris Buschelman /* Clear old values in C */ 31187ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 31197ba1a0bfSKris Buschelman 31207ba1a0bfSKris Buschelman for (i=0;i<am;i++) { 31217ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 31227ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 31237ba1a0bfSKris Buschelman apnzj = 0; 31247ba1a0bfSKris Buschelman for (j=0;j<anzi;j++) { 31257ba1a0bfSKris Buschelman /* Get offset within block of P */ 31267ba1a0bfSKris Buschelman pshift = *aj%ppdof; 31277ba1a0bfSKris Buschelman /* Get block row of P */ 31287ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 31297ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 31307ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 31317ba1a0bfSKris Buschelman paj = pa + pi[prow]; 31327ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 31337ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 31347ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 31357ba1a0bfSKris Buschelman apjdense[poffset] = -1; 31367ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 31377ba1a0bfSKris Buschelman } 31387ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 31397ba1a0bfSKris Buschelman } 3140dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 31417ba1a0bfSKris Buschelman aa++; 31427ba1a0bfSKris Buschelman } 31437ba1a0bfSKris Buschelman 31447ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 31457ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 31467ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 31477ba1a0bfSKris Buschelman 31487ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 31497ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 31507ba1a0bfSKris Buschelman pshift = i%ppdof; 31517ba1a0bfSKris Buschelman poffset = pi[prow]; 31527ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 31537ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 31547ba1a0bfSKris Buschelman pJ = pj+poffset; 31557ba1a0bfSKris Buschelman pA = pa+poffset; 31567ba1a0bfSKris Buschelman for (j=0;j<pnzi;j++) { 31577ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 31587ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 31597ba1a0bfSKris Buschelman caj = ca + ci[crow]; 31607ba1a0bfSKris Buschelman pJ++; 31617ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 31627ba1a0bfSKris Buschelman for (k=0,nextap=0;nextap<apnzj;k++) { 31637ba1a0bfSKris Buschelman if (cjj[k]==apj[nextap]) { 31647ba1a0bfSKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 31657ba1a0bfSKris Buschelman } 31667ba1a0bfSKris Buschelman } 3167dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 31687ba1a0bfSKris Buschelman pA++; 31697ba1a0bfSKris Buschelman } 31707ba1a0bfSKris Buschelman 31717ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 31727ba1a0bfSKris Buschelman for (j=0;j<apnzj;j++) { 31737ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 31747ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 31757ba1a0bfSKris Buschelman } 31767ba1a0bfSKris Buschelman } 31777ba1a0bfSKris Buschelman 31787ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 31797ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31807ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 318174ed9c26SBarry Smith ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 318295ddefa2SHong Zhang PetscFunctionReturn(0); 318395ddefa2SHong Zhang } 31847ba1a0bfSKris Buschelman 318595ddefa2SHong Zhang #undef __FUNCT__ 318695ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 318795ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 318895ddefa2SHong Zhang { 318995ddefa2SHong Zhang PetscErrorCode ierr; 319095ddefa2SHong Zhang 319195ddefa2SHong Zhang PetscFunctionBegin; 319295ddefa2SHong Zhang /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 319395ddefa2SHong Zhang ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 319495ddefa2SHong Zhang ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 319595ddefa2SHong Zhang PetscFunctionReturn(0); 319695ddefa2SHong Zhang } 319795ddefa2SHong Zhang 319895ddefa2SHong Zhang #undef __FUNCT__ 319995ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 320095ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 320195ddefa2SHong Zhang { 320295ddefa2SHong Zhang PetscFunctionBegin; 3203e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 32047ba1a0bfSKris Buschelman PetscFunctionReturn(0); 32057ba1a0bfSKris Buschelman } 32067ba1a0bfSKris Buschelman 3207be1d678aSKris Buschelman EXTERN_C_BEGIN 32087ba1a0bfSKris Buschelman #undef __FUNCT__ 32090fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 32107087cfbeSBarry Smith PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32119c4fc4c7SBarry Smith { 32129c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3213ceb03754SKris Buschelman Mat a = b->AIJ,B; 32149c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 32159c4fc4c7SBarry Smith PetscErrorCode ierr; 32160fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3217ba8c8a56SBarry Smith PetscInt *cols; 3218ba8c8a56SBarry Smith PetscScalar *vals; 32199c4fc4c7SBarry Smith 32209c4fc4c7SBarry Smith PetscFunctionBegin; 32219c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 32227c307921SBarry Smith ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 32239c4fc4c7SBarry Smith for (i=0; i<m; i++) { 32249c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 32250fd73130SBarry Smith for (j=0; j<dof; j++) { 32260fd73130SBarry Smith ilen[dof*i+j] = aij->ilen[i]; 32279c4fc4c7SBarry Smith } 32289c4fc4c7SBarry Smith } 3229ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 32309c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 32319c4fc4c7SBarry Smith ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 32329c4fc4c7SBarry Smith ii = 0; 32339c4fc4c7SBarry Smith for (i=0; i<m; i++) { 3234ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32350fd73130SBarry Smith for (j=0; j<dof; j++) { 32369c4fc4c7SBarry Smith for (k=0; k<ncols; k++) { 32370fd73130SBarry Smith icols[k] = dof*cols[k]+j; 32389c4fc4c7SBarry Smith } 3239ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 32409c4fc4c7SBarry Smith ii++; 32419c4fc4c7SBarry Smith } 3242ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32439c4fc4c7SBarry Smith } 32449c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 3245ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3246ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3247ceb03754SKris Buschelman 3248ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 32498ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3250c3d102feSKris Buschelman } else { 3251ceb03754SKris Buschelman *newmat = B; 3252c3d102feSKris Buschelman } 32539c4fc4c7SBarry Smith PetscFunctionReturn(0); 32549c4fc4c7SBarry Smith } 3255be1d678aSKris Buschelman EXTERN_C_END 32569c4fc4c7SBarry Smith 3257c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 3258be1d678aSKris Buschelman 3259be1d678aSKris Buschelman EXTERN_C_BEGIN 32600fd73130SBarry Smith #undef __FUNCT__ 32610fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 32627087cfbeSBarry Smith PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 32630fd73130SBarry Smith { 32640fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3265ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 32660fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 32670fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 32680fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 32690fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3270910ba992SMatthew Knepley PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3271910ba992SMatthew Knepley PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 32720fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 32730fd73130SBarry Smith PetscErrorCode ierr; 32740fd73130SBarry Smith PetscScalar *vals,*ovals; 32750fd73130SBarry Smith 32760fd73130SBarry Smith PetscFunctionBegin; 3277d0f46423SBarry Smith ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3278d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32790fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 32800fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 32810fd73130SBarry Smith for (j=0; j<dof; j++) { 32820fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 32830fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 32840fd73130SBarry Smith } 32850fd73130SBarry Smith } 328669b1f4b7SBarry Smith ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 32870fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 32880fd73130SBarry Smith 32897a1a7badSBarry Smith ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3290d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 3291d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 32920fd73130SBarry Smith garray = mpiaij->garray; 32930fd73130SBarry Smith 32940fd73130SBarry Smith ii = rstart; 3295d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 32960fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 32970fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 32980fd73130SBarry Smith for (j=0; j<dof; j++) { 32990fd73130SBarry Smith for (k=0; k<ncols; k++) { 33000fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 33010fd73130SBarry Smith } 33020fd73130SBarry Smith for (k=0; k<oncols; k++) { 33030fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 33040fd73130SBarry Smith } 3305ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3306ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 33070fd73130SBarry Smith ii++; 33080fd73130SBarry Smith } 33090fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 33100fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 33110fd73130SBarry Smith } 33120fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 33130fd73130SBarry Smith 3314ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3315ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3316ceb03754SKris Buschelman 3317ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 33187adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 33197adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 33208ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 33217adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3322c3d102feSKris Buschelman } else { 3323ceb03754SKris Buschelman *newmat = B; 3324c3d102feSKris Buschelman } 33250fd73130SBarry Smith PetscFunctionReturn(0); 33260fd73130SBarry Smith } 3327be1d678aSKris Buschelman EXTERN_C_END 33280fd73130SBarry Smith 33299e516c8fSBarry Smith #undef __FUNCT__ 33309e516c8fSBarry Smith #define __FUNCT__ "MatGetSubMatrix_MAIJ" 33319e516c8fSBarry Smith PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 33329e516c8fSBarry Smith { 33339e516c8fSBarry Smith PetscErrorCode ierr; 33349e516c8fSBarry Smith Mat A; 33359e516c8fSBarry Smith 33369e516c8fSBarry Smith PetscFunctionBegin; 33379e516c8fSBarry Smith ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 33389e516c8fSBarry Smith ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 33399e516c8fSBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 33409e516c8fSBarry Smith PetscFunctionReturn(0); 33419e516c8fSBarry Smith } 33420fd73130SBarry Smith 3343bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 3344ff585edeSJed Brown #undef __FUNCT__ 3345ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ" 3346ff585edeSJed Brown /*@C 33470bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 33480bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 33490bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 33500bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 33510bad9183SKris Buschelman 3352ff585edeSJed Brown Collective 3353ff585edeSJed Brown 3354ff585edeSJed Brown Input Parameters: 3355ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks 3356ff585edeSJed Brown - dof - the block size (number of components per node) 3357ff585edeSJed Brown 3358ff585edeSJed Brown Output Parameter: 3359ff585edeSJed Brown . maij - the new MAIJ matrix 3360ff585edeSJed Brown 33610bad9183SKris Buschelman Operations provided: 33620fd73130SBarry Smith + MatMult 33630bad9183SKris Buschelman . MatMultTranspose 33640bad9183SKris Buschelman . MatMultAdd 33650bad9183SKris Buschelman . MatMultTransposeAdd 33660fd73130SBarry Smith - MatView 33670bad9183SKris Buschelman 33680bad9183SKris Buschelman Level: advanced 33690bad9183SKris Buschelman 3370b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3371ff585edeSJed Brown @*/ 33727087cfbeSBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 337382b1193eSBarry Smith { 3374dfbe8321SBarry Smith PetscErrorCode ierr; 3375b24ad042SBarry Smith PetscMPIInt size; 3376b24ad042SBarry Smith PetscInt n; 337782b1193eSBarry Smith Mat B; 337882b1193eSBarry Smith 337982b1193eSBarry Smith PetscFunctionBegin; 3380d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3381d72c5c08SBarry Smith 3382d72c5c08SBarry Smith if (dof == 1) { 3383d72c5c08SBarry Smith *maij = A; 3384d72c5c08SBarry Smith } else { 33857adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3386d0f46423SBarry Smith ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3387bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3388bccba955SJed Brown ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3389bccba955SJed Brown ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3390bccba955SJed Brown ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3391cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 3392d72c5c08SBarry Smith 33937adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3394f4a53059SBarry Smith if (size == 1) { 3395feb9fe23SJed Brown Mat_SeqMAIJ *b; 3396feb9fe23SJed Brown 3397b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 33984cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 33990fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 3400feb9fe23SJed Brown b = (Mat_SeqMAIJ*)B->data; 3401b9b97703SBarry Smith b->dof = dof; 34024cb9d9b8SBarry Smith b->AIJ = A; 3403d72c5c08SBarry Smith if (dof == 2) { 3404cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 3405cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3406cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3407cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3408bcc973b7SBarry Smith } else if (dof == 3) { 3409bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 3410bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3411bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3412bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3413bcc973b7SBarry Smith } else if (dof == 4) { 3414bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 3415bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3416bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3417bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3418f9fae5adSBarry Smith } else if (dof == 5) { 3419f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 3420f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3421f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3422f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 34236cd98798SBarry Smith } else if (dof == 6) { 34246cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 34256cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 34266cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 34276cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3428ed8eea03SBarry Smith } else if (dof == 7) { 3429ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 3430ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3431ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3432ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 343366ed3db0SBarry Smith } else if (dof == 8) { 343466ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 343566ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 343666ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 343766ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 34382b692628SMatthew Knepley } else if (dof == 9) { 34392b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 34402b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 34412b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 34422b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3443b9cda22cSBarry Smith } else if (dof == 10) { 3444b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 3445b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3446b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3447b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3448dbdd7285SBarry Smith } else if (dof == 11) { 3449dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 3450dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3451dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3452dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 34532f7816d4SBarry Smith } else if (dof == 16) { 34542f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 34552f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 34562f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 34572f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3458ed1c418cSBarry Smith } else if (dof == 18) { 3459ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 3460ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3461ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3462ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 346382b1193eSBarry Smith } else { 34646861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 34656861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 34666861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 34676861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 346882b1193eSBarry Smith } 34697ba1a0bfSKris Buschelman B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 34707ba1a0bfSKris Buschelman B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 34719c4fc4c7SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3472f4a53059SBarry Smith } else { 3473f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3474feb9fe23SJed Brown Mat_MPIMAIJ *b; 3475f4a53059SBarry Smith IS from,to; 3476f4a53059SBarry Smith Vec gvec; 3477f4a53059SBarry Smith 3478b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3479d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 34800fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 3481b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 3482b9b97703SBarry Smith b->dof = dof; 3483b9b97703SBarry Smith b->A = A; 34844cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 34854cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 34864cb9d9b8SBarry Smith 3487f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3488*a34bdc0bSBarry Smith ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3489*a34bdc0bSBarry Smith ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 349013288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3491*a34bdc0bSBarry Smith ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3492f4a53059SBarry Smith 3493f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 3494deff0451SBarry Smith ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3495f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3496f4a53059SBarry Smith 3497f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 3498778a2246SBarry Smith ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3499f4a53059SBarry Smith 3500f4a53059SBarry Smith /* generate the scatter context */ 3501f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3502f4a53059SBarry Smith 35036bf464f9SBarry Smith ierr = ISDestroy(&from);CHKERRQ(ierr); 35046bf464f9SBarry Smith ierr = ISDestroy(&to);CHKERRQ(ierr); 35056bf464f9SBarry Smith ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3506f4a53059SBarry Smith 3507f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 35084cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 35094cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 35104cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 351195ddefa2SHong Zhang B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 351295ddefa2SHong Zhang B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 35130fd73130SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3514f4a53059SBarry Smith } 35159e516c8fSBarry Smith B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 35164994cf47SJed Brown ierr = MatSetUp(B);CHKERRQ(ierr); 3517cd3bbe55SBarry Smith *maij = B; 35180fd73130SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 3519d72c5c08SBarry Smith } 352082b1193eSBarry Smith PetscFunctionReturn(0); 352182b1193eSBarry Smith } 3522