173f4d377SMatthew Knepley /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/ 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19be911618SKris Buschelman #include "src/mat/impls/maij/maij.h" 202f7816d4SBarry Smith #include "src/vec/vecimpl.h" 2182b1193eSBarry Smith 224a2ae208SSatish Balay #undef __FUNCT__ 234a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 24b9b97703SBarry Smith int MatMAIJGetAIJ(Mat A,Mat *B) 25b9b97703SBarry Smith { 26b9b97703SBarry Smith int ierr; 27b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 28b9b97703SBarry Smith 29b9b97703SBarry Smith PetscFunctionBegin; 30b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 31b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 32b9b97703SBarry Smith if (ismpimaij) { 33b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 34b9b97703SBarry Smith 35b9b97703SBarry Smith *B = b->A; 36b9b97703SBarry Smith } else if (isseqmaij) { 37b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 38b9b97703SBarry Smith 39b9b97703SBarry Smith *B = b->AIJ; 40b9b97703SBarry Smith } else { 41b9b97703SBarry Smith *B = A; 42b9b97703SBarry Smith } 43b9b97703SBarry Smith PetscFunctionReturn(0); 44b9b97703SBarry Smith } 45b9b97703SBarry Smith 464a2ae208SSatish Balay #undef __FUNCT__ 474a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 48b9b97703SBarry Smith int MatMAIJRedimension(Mat A,int dof,Mat *B) 49b9b97703SBarry Smith { 50b9b97703SBarry Smith int ierr; 51b9b97703SBarry Smith Mat Aij; 52b9b97703SBarry Smith 53b9b97703SBarry Smith PetscFunctionBegin; 54b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 55b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 56b9b97703SBarry Smith PetscFunctionReturn(0); 57b9b97703SBarry Smith } 58b9b97703SBarry Smith 594a2ae208SSatish Balay #undef __FUNCT__ 604a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 614cb9d9b8SBarry Smith int MatDestroy_SeqMAIJ(Mat A) 6282b1193eSBarry Smith { 6382b1193eSBarry Smith int ierr; 644cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6582b1193eSBarry Smith 6682b1193eSBarry Smith PetscFunctionBegin; 67cd3bbe55SBarry Smith if (b->AIJ) { 68cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 6982b1193eSBarry Smith } 704cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 714cb9d9b8SBarry Smith PetscFunctionReturn(0); 724cb9d9b8SBarry Smith } 734cb9d9b8SBarry Smith 744a2ae208SSatish Balay #undef __FUNCT__ 754a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 764cb9d9b8SBarry Smith int MatDestroy_MPIMAIJ(Mat A) 774cb9d9b8SBarry Smith { 784cb9d9b8SBarry Smith int ierr; 794cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 804cb9d9b8SBarry Smith 814cb9d9b8SBarry Smith PetscFunctionBegin; 824cb9d9b8SBarry Smith if (b->AIJ) { 834cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 844cb9d9b8SBarry Smith } 85f4a53059SBarry Smith if (b->OAIJ) { 86f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 87f4a53059SBarry Smith } 88b9b97703SBarry Smith if (b->A) { 89b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 90b9b97703SBarry Smith } 91f4a53059SBarry Smith if (b->ctx) { 92f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 93f4a53059SBarry Smith } 94f4a53059SBarry Smith if (b->w) { 95f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 96f4a53059SBarry Smith } 97cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 98b9b97703SBarry Smith PetscFunctionReturn(0); 99b9b97703SBarry Smith } 100b9b97703SBarry Smith 10182b1193eSBarry Smith EXTERN_C_BEGIN 1024a2ae208SSatish Balay #undef __FUNCT__ 1034a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 104f4a53059SBarry Smith int MatCreate_MAIJ(Mat A) 10582b1193eSBarry Smith { 106cd3bbe55SBarry Smith int ierr; 1074cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 10882b1193eSBarry Smith 10982b1193eSBarry Smith PetscFunctionBegin; 110b0a32e0cSBarry Smith ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 111b0a32e0cSBarry Smith A->data = (void*)b; 1124cb9d9b8SBarry Smith ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 113cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 114cd3bbe55SBarry Smith A->factor = 0; 115cd3bbe55SBarry Smith A->mapping = 0; 116f4a53059SBarry Smith 117cd3bbe55SBarry Smith b->AIJ = 0; 118cd3bbe55SBarry Smith b->dof = 0; 119f4a53059SBarry Smith b->OAIJ = 0; 120f4a53059SBarry Smith b->ctx = 0; 121f4a53059SBarry Smith b->w = 0; 12282b1193eSBarry Smith PetscFunctionReturn(0); 12382b1193eSBarry Smith } 12482b1193eSBarry Smith EXTERN_C_END 12582b1193eSBarry Smith 126cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1274a2ae208SSatish Balay #undef __FUNCT__ 128b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 129cd3bbe55SBarry Smith int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 13082b1193eSBarry Smith { 1314cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 132bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 134*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 135bcc973b7SBarry Smith int n,i,jrow,j; 13682b1193eSBarry Smith 137bcc973b7SBarry Smith PetscFunctionBegin; 1382f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1392f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 140bcc973b7SBarry Smith idx = a->j; 141bcc973b7SBarry Smith v = a->a; 142bcc973b7SBarry Smith ii = a->i; 143bcc973b7SBarry Smith 144bcc973b7SBarry Smith for (i=0; i<m; i++) { 145bcc973b7SBarry Smith jrow = ii[i]; 146bcc973b7SBarry Smith n = ii[i+1] - jrow; 147bcc973b7SBarry Smith sum1 = 0.0; 148bcc973b7SBarry Smith sum2 = 0.0; 149bcc973b7SBarry Smith for (j=0; j<n; j++) { 150bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 151bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 152bcc973b7SBarry Smith jrow++; 153bcc973b7SBarry Smith } 154bcc973b7SBarry Smith y[2*i] = sum1; 155bcc973b7SBarry Smith y[2*i+1] = sum2; 156bcc973b7SBarry Smith } 157bcc973b7SBarry Smith 158b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*m); 1592f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1602f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 16182b1193eSBarry Smith PetscFunctionReturn(0); 16282b1193eSBarry Smith } 163bcc973b7SBarry Smith 1644a2ae208SSatish Balay #undef __FUNCT__ 165b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 166cd3bbe55SBarry Smith int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 16782b1193eSBarry Smith { 1684cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 169bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 17087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 171*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 17282b1193eSBarry Smith 173bcc973b7SBarry Smith PetscFunctionBegin; 174bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1752f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1762f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 177*bfec09a0SHong Zhang 178bcc973b7SBarry Smith for (i=0; i<m; i++) { 179*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 180*bfec09a0SHong Zhang v = a->a + a->i[i] ; 181bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 182bcc973b7SBarry Smith alpha1 = x[2*i]; 183bcc973b7SBarry Smith alpha2 = x[2*i+1]; 184bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 185bcc973b7SBarry Smith } 186b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*b->AIJ->n); 1872f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1882f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 18982b1193eSBarry Smith PetscFunctionReturn(0); 19082b1193eSBarry Smith } 191bcc973b7SBarry Smith 1924a2ae208SSatish Balay #undef __FUNCT__ 193b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 194cd3bbe55SBarry Smith int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 19582b1193eSBarry Smith { 1964cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 197bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 199*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 200bcc973b7SBarry Smith int n,i,jrow,j; 20182b1193eSBarry Smith 202bcc973b7SBarry Smith PetscFunctionBegin; 203f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2042f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 2052f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 206bcc973b7SBarry Smith idx = a->j; 207bcc973b7SBarry Smith v = a->a; 208bcc973b7SBarry Smith ii = a->i; 209bcc973b7SBarry Smith 210bcc973b7SBarry Smith for (i=0; i<m; i++) { 211bcc973b7SBarry Smith jrow = ii[i]; 212bcc973b7SBarry Smith n = ii[i+1] - jrow; 213bcc973b7SBarry Smith sum1 = 0.0; 214bcc973b7SBarry Smith sum2 = 0.0; 215bcc973b7SBarry Smith for (j=0; j<n; j++) { 216bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 217bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 218bcc973b7SBarry Smith jrow++; 219bcc973b7SBarry Smith } 220bcc973b7SBarry Smith y[2*i] += sum1; 221bcc973b7SBarry Smith y[2*i+1] += sum2; 222bcc973b7SBarry Smith } 223bcc973b7SBarry Smith 224b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*m); 2252f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 2262f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 227bcc973b7SBarry Smith PetscFunctionReturn(0); 22882b1193eSBarry Smith } 2294a2ae208SSatish Balay #undef __FUNCT__ 230b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 231cd3bbe55SBarry Smith int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 23282b1193eSBarry Smith { 2334cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 234bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 23587828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 236*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 23782b1193eSBarry Smith 238bcc973b7SBarry Smith PetscFunctionBegin; 239f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2402f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 2412f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 242*bfec09a0SHong Zhang 243bcc973b7SBarry Smith for (i=0; i<m; i++) { 244*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 245*bfec09a0SHong Zhang v = a->a + a->i[i] ; 246bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 247bcc973b7SBarry Smith alpha1 = x[2*i]; 248bcc973b7SBarry Smith alpha2 = x[2*i+1]; 249bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 250bcc973b7SBarry Smith } 251b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*b->AIJ->n); 2522f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 2532f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 254bcc973b7SBarry Smith PetscFunctionReturn(0); 25582b1193eSBarry Smith } 256cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 2574a2ae208SSatish Balay #undef __FUNCT__ 258b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 259bcc973b7SBarry Smith int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 260bcc973b7SBarry Smith { 2614cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 262bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 264*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 265bcc973b7SBarry Smith int n,i,jrow,j; 26682b1193eSBarry Smith 267bcc973b7SBarry Smith PetscFunctionBegin; 2682f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 2692f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 270bcc973b7SBarry Smith idx = a->j; 271bcc973b7SBarry Smith v = a->a; 272bcc973b7SBarry Smith ii = a->i; 273bcc973b7SBarry Smith 274bcc973b7SBarry Smith for (i=0; i<m; i++) { 275bcc973b7SBarry Smith jrow = ii[i]; 276bcc973b7SBarry Smith n = ii[i+1] - jrow; 277bcc973b7SBarry Smith sum1 = 0.0; 278bcc973b7SBarry Smith sum2 = 0.0; 279bcc973b7SBarry Smith sum3 = 0.0; 280bcc973b7SBarry Smith for (j=0; j<n; j++) { 281bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 282bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 283bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 284bcc973b7SBarry Smith jrow++; 285bcc973b7SBarry Smith } 286bcc973b7SBarry Smith y[3*i] = sum1; 287bcc973b7SBarry Smith y[3*i+1] = sum2; 288bcc973b7SBarry Smith y[3*i+2] = sum3; 289bcc973b7SBarry Smith } 290bcc973b7SBarry Smith 291b0a32e0cSBarry Smith PetscLogFlops(6*a->nz - 3*m); 2922f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 2932f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 294bcc973b7SBarry Smith PetscFunctionReturn(0); 295bcc973b7SBarry Smith } 296bcc973b7SBarry Smith 2974a2ae208SSatish Balay #undef __FUNCT__ 298b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 299bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 300bcc973b7SBarry Smith { 3014cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 302bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 30387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 304*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 305bcc973b7SBarry Smith 306bcc973b7SBarry Smith PetscFunctionBegin; 307bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 3082f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 3092f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 310*bfec09a0SHong Zhang 311bcc973b7SBarry Smith for (i=0; i<m; i++) { 312*bfec09a0SHong Zhang idx = a->j + a->i[i]; 313*bfec09a0SHong Zhang v = a->a + a->i[i]; 314bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 315bcc973b7SBarry Smith alpha1 = x[3*i]; 316bcc973b7SBarry Smith alpha2 = x[3*i+1]; 317bcc973b7SBarry Smith alpha3 = x[3*i+2]; 318bcc973b7SBarry Smith while (n-->0) { 319bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 320bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 321bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 322bcc973b7SBarry Smith idx++; v++; 323bcc973b7SBarry Smith } 324bcc973b7SBarry Smith } 325b0a32e0cSBarry Smith PetscLogFlops(6*a->nz - 3*b->AIJ->n); 3262f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 3272f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 328bcc973b7SBarry Smith PetscFunctionReturn(0); 329bcc973b7SBarry Smith } 330bcc973b7SBarry Smith 3314a2ae208SSatish Balay #undef __FUNCT__ 332b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 333bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 334bcc973b7SBarry Smith { 3354cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 336bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 33787828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 338*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 339bcc973b7SBarry Smith int n,i,jrow,j; 340bcc973b7SBarry Smith 341bcc973b7SBarry Smith PetscFunctionBegin; 342f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3432f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 3442f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 345bcc973b7SBarry Smith idx = a->j; 346bcc973b7SBarry Smith v = a->a; 347bcc973b7SBarry Smith ii = a->i; 348bcc973b7SBarry Smith 349bcc973b7SBarry Smith for (i=0; i<m; i++) { 350bcc973b7SBarry Smith jrow = ii[i]; 351bcc973b7SBarry Smith n = ii[i+1] - jrow; 352bcc973b7SBarry Smith sum1 = 0.0; 353bcc973b7SBarry Smith sum2 = 0.0; 354bcc973b7SBarry Smith sum3 = 0.0; 355bcc973b7SBarry Smith for (j=0; j<n; j++) { 356bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 357bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 358bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 359bcc973b7SBarry Smith jrow++; 360bcc973b7SBarry Smith } 361bcc973b7SBarry Smith y[3*i] += sum1; 362bcc973b7SBarry Smith y[3*i+1] += sum2; 363bcc973b7SBarry Smith y[3*i+2] += sum3; 364bcc973b7SBarry Smith } 365bcc973b7SBarry Smith 366b0a32e0cSBarry Smith PetscLogFlops(6*a->nz); 3672f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 3682f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 369bcc973b7SBarry Smith PetscFunctionReturn(0); 370bcc973b7SBarry Smith } 3714a2ae208SSatish Balay #undef __FUNCT__ 372b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 373bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 374bcc973b7SBarry Smith { 3754cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 376bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 37787828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 378*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 379bcc973b7SBarry Smith 380bcc973b7SBarry Smith PetscFunctionBegin; 381f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3822f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 3832f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 384bcc973b7SBarry Smith for (i=0; i<m; i++) { 385*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 386*bfec09a0SHong Zhang v = a->a + a->i[i] ; 387bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 388bcc973b7SBarry Smith alpha1 = x[3*i]; 389bcc973b7SBarry Smith alpha2 = x[3*i+1]; 390bcc973b7SBarry Smith alpha3 = x[3*i+2]; 391bcc973b7SBarry Smith while (n-->0) { 392bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 393bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 394bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 395bcc973b7SBarry Smith idx++; v++; 396bcc973b7SBarry Smith } 397bcc973b7SBarry Smith } 398b0a32e0cSBarry Smith PetscLogFlops(6*a->nz); 3992f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 4002f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 401bcc973b7SBarry Smith PetscFunctionReturn(0); 402bcc973b7SBarry Smith } 403bcc973b7SBarry Smith 404bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4054a2ae208SSatish Balay #undef __FUNCT__ 406b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 407bcc973b7SBarry Smith int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 408bcc973b7SBarry Smith { 4094cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 410bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 41187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 412*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 413bcc973b7SBarry Smith int n,i,jrow,j; 414bcc973b7SBarry Smith 415bcc973b7SBarry Smith PetscFunctionBegin; 4162f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 4172f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 418bcc973b7SBarry Smith idx = a->j; 419bcc973b7SBarry Smith v = a->a; 420bcc973b7SBarry Smith ii = a->i; 421bcc973b7SBarry Smith 422bcc973b7SBarry Smith for (i=0; i<m; i++) { 423bcc973b7SBarry Smith jrow = ii[i]; 424bcc973b7SBarry Smith n = ii[i+1] - jrow; 425bcc973b7SBarry Smith sum1 = 0.0; 426bcc973b7SBarry Smith sum2 = 0.0; 427bcc973b7SBarry Smith sum3 = 0.0; 428bcc973b7SBarry Smith sum4 = 0.0; 429bcc973b7SBarry Smith for (j=0; j<n; j++) { 430bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 431bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 432bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 433bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 434bcc973b7SBarry Smith jrow++; 435bcc973b7SBarry Smith } 436bcc973b7SBarry Smith y[4*i] = sum1; 437bcc973b7SBarry Smith y[4*i+1] = sum2; 438bcc973b7SBarry Smith y[4*i+2] = sum3; 439bcc973b7SBarry Smith y[4*i+3] = sum4; 440bcc973b7SBarry Smith } 441bcc973b7SBarry Smith 442b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*m); 4432f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 4442f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 445bcc973b7SBarry Smith PetscFunctionReturn(0); 446bcc973b7SBarry Smith } 447bcc973b7SBarry Smith 4484a2ae208SSatish Balay #undef __FUNCT__ 449b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 450bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 451bcc973b7SBarry Smith { 4524cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 453bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 45487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 455*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 456bcc973b7SBarry Smith 457bcc973b7SBarry Smith PetscFunctionBegin; 458bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 4592f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 4602f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 461bcc973b7SBarry Smith for (i=0; i<m; i++) { 462*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 463*bfec09a0SHong Zhang v = a->a + a->i[i] ; 464bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 465bcc973b7SBarry Smith alpha1 = x[4*i]; 466bcc973b7SBarry Smith alpha2 = x[4*i+1]; 467bcc973b7SBarry Smith alpha3 = x[4*i+2]; 468bcc973b7SBarry Smith alpha4 = x[4*i+3]; 469bcc973b7SBarry Smith while (n-->0) { 470bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 471bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 472bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 473bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 474bcc973b7SBarry Smith idx++; v++; 475bcc973b7SBarry Smith } 476bcc973b7SBarry Smith } 477b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*b->AIJ->n); 4782f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 4792f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 480bcc973b7SBarry Smith PetscFunctionReturn(0); 481bcc973b7SBarry Smith } 482bcc973b7SBarry Smith 4834a2ae208SSatish Balay #undef __FUNCT__ 484b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 485bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 486bcc973b7SBarry Smith { 4874cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 488f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 48987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 490*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 491f9fae5adSBarry Smith int n,i,jrow,j; 492f9fae5adSBarry Smith 493f9fae5adSBarry Smith PetscFunctionBegin; 494f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4952f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 4962f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 497f9fae5adSBarry Smith idx = a->j; 498f9fae5adSBarry Smith v = a->a; 499f9fae5adSBarry Smith ii = a->i; 500f9fae5adSBarry Smith 501f9fae5adSBarry Smith for (i=0; i<m; i++) { 502f9fae5adSBarry Smith jrow = ii[i]; 503f9fae5adSBarry Smith n = ii[i+1] - jrow; 504f9fae5adSBarry Smith sum1 = 0.0; 505f9fae5adSBarry Smith sum2 = 0.0; 506f9fae5adSBarry Smith sum3 = 0.0; 507f9fae5adSBarry Smith sum4 = 0.0; 508f9fae5adSBarry Smith for (j=0; j<n; j++) { 509f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 510f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 511f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 512f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 513f9fae5adSBarry Smith jrow++; 514f9fae5adSBarry Smith } 515f9fae5adSBarry Smith y[4*i] += sum1; 516f9fae5adSBarry Smith y[4*i+1] += sum2; 517f9fae5adSBarry Smith y[4*i+2] += sum3; 518f9fae5adSBarry Smith y[4*i+3] += sum4; 519f9fae5adSBarry Smith } 520f9fae5adSBarry Smith 521b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*m); 5222f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 5232f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 524f9fae5adSBarry Smith PetscFunctionReturn(0); 525bcc973b7SBarry Smith } 5264a2ae208SSatish Balay #undef __FUNCT__ 527b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 528bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 529bcc973b7SBarry Smith { 5304cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 531f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 53287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 533*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 534f9fae5adSBarry Smith 535f9fae5adSBarry Smith PetscFunctionBegin; 536f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5372f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 5382f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 539*bfec09a0SHong Zhang 540f9fae5adSBarry Smith for (i=0; i<m; i++) { 541*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 542*bfec09a0SHong Zhang v = a->a + a->i[i] ; 543f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 544f9fae5adSBarry Smith alpha1 = x[4*i]; 545f9fae5adSBarry Smith alpha2 = x[4*i+1]; 546f9fae5adSBarry Smith alpha3 = x[4*i+2]; 547f9fae5adSBarry Smith alpha4 = x[4*i+3]; 548f9fae5adSBarry Smith while (n-->0) { 549f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 550f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 551f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 552f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 553f9fae5adSBarry Smith idx++; v++; 554f9fae5adSBarry Smith } 555f9fae5adSBarry Smith } 556b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*b->AIJ->n); 5572f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 5582f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 559f9fae5adSBarry Smith PetscFunctionReturn(0); 560f9fae5adSBarry Smith } 561f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 5626cd98798SBarry Smith 5634a2ae208SSatish Balay #undef __FUNCT__ 564b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 565f9fae5adSBarry Smith int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 566f9fae5adSBarry Smith { 5674cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 568f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 56987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 570*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 571f9fae5adSBarry Smith int n,i,jrow,j; 572f9fae5adSBarry Smith 573f9fae5adSBarry Smith PetscFunctionBegin; 5742f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 5752f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 576f9fae5adSBarry Smith idx = a->j; 577f9fae5adSBarry Smith v = a->a; 578f9fae5adSBarry Smith ii = a->i; 579f9fae5adSBarry Smith 580f9fae5adSBarry Smith for (i=0; i<m; i++) { 581f9fae5adSBarry Smith jrow = ii[i]; 582f9fae5adSBarry Smith n = ii[i+1] - jrow; 583f9fae5adSBarry Smith sum1 = 0.0; 584f9fae5adSBarry Smith sum2 = 0.0; 585f9fae5adSBarry Smith sum3 = 0.0; 586f9fae5adSBarry Smith sum4 = 0.0; 587f9fae5adSBarry Smith sum5 = 0.0; 588f9fae5adSBarry Smith for (j=0; j<n; j++) { 589f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 590f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 591f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 592f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 593f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 594f9fae5adSBarry Smith jrow++; 595f9fae5adSBarry Smith } 596f9fae5adSBarry Smith y[5*i] = sum1; 597f9fae5adSBarry Smith y[5*i+1] = sum2; 598f9fae5adSBarry Smith y[5*i+2] = sum3; 599f9fae5adSBarry Smith y[5*i+3] = sum4; 600f9fae5adSBarry Smith y[5*i+4] = sum5; 601f9fae5adSBarry Smith } 602f9fae5adSBarry Smith 603b0a32e0cSBarry Smith PetscLogFlops(10*a->nz - 5*m); 6042f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 6052f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 606f9fae5adSBarry Smith PetscFunctionReturn(0); 607f9fae5adSBarry Smith } 608f9fae5adSBarry Smith 6094a2ae208SSatish Balay #undef __FUNCT__ 610b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 611f9fae5adSBarry Smith int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 612f9fae5adSBarry Smith { 6134cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 614f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 61587828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 616*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 617f9fae5adSBarry Smith 618f9fae5adSBarry Smith PetscFunctionBegin; 619f9fae5adSBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 6202f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 6212f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 622*bfec09a0SHong Zhang 623f9fae5adSBarry Smith for (i=0; i<m; i++) { 624*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 625*bfec09a0SHong Zhang v = a->a + a->i[i] ; 626f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 627f9fae5adSBarry Smith alpha1 = x[5*i]; 628f9fae5adSBarry Smith alpha2 = x[5*i+1]; 629f9fae5adSBarry Smith alpha3 = x[5*i+2]; 630f9fae5adSBarry Smith alpha4 = x[5*i+3]; 631f9fae5adSBarry Smith alpha5 = x[5*i+4]; 632f9fae5adSBarry Smith while (n-->0) { 633f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 634f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 635f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 636f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 637f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 638f9fae5adSBarry Smith idx++; v++; 639f9fae5adSBarry Smith } 640f9fae5adSBarry Smith } 641b0a32e0cSBarry Smith PetscLogFlops(10*a->nz - 5*b->AIJ->n); 6422f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 6432f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 644f9fae5adSBarry Smith PetscFunctionReturn(0); 645f9fae5adSBarry Smith } 646f9fae5adSBarry Smith 6474a2ae208SSatish Balay #undef __FUNCT__ 648b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 649f9fae5adSBarry Smith int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 650f9fae5adSBarry Smith { 6514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 652f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 65387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 654*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 655f9fae5adSBarry Smith int n,i,jrow,j; 656f9fae5adSBarry Smith 657f9fae5adSBarry Smith PetscFunctionBegin; 658f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6592f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 6602f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 661f9fae5adSBarry Smith idx = a->j; 662f9fae5adSBarry Smith v = a->a; 663f9fae5adSBarry Smith ii = a->i; 664f9fae5adSBarry Smith 665f9fae5adSBarry Smith for (i=0; i<m; i++) { 666f9fae5adSBarry Smith jrow = ii[i]; 667f9fae5adSBarry Smith n = ii[i+1] - jrow; 668f9fae5adSBarry Smith sum1 = 0.0; 669f9fae5adSBarry Smith sum2 = 0.0; 670f9fae5adSBarry Smith sum3 = 0.0; 671f9fae5adSBarry Smith sum4 = 0.0; 672f9fae5adSBarry Smith sum5 = 0.0; 673f9fae5adSBarry Smith for (j=0; j<n; j++) { 674f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 675f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 676f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 677f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 678f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 679f9fae5adSBarry Smith jrow++; 680f9fae5adSBarry Smith } 681f9fae5adSBarry Smith y[5*i] += sum1; 682f9fae5adSBarry Smith y[5*i+1] += sum2; 683f9fae5adSBarry Smith y[5*i+2] += sum3; 684f9fae5adSBarry Smith y[5*i+3] += sum4; 685f9fae5adSBarry Smith y[5*i+4] += sum5; 686f9fae5adSBarry Smith } 687f9fae5adSBarry Smith 688b0a32e0cSBarry Smith PetscLogFlops(10*a->nz); 6892f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 6902f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 691f9fae5adSBarry Smith PetscFunctionReturn(0); 692f9fae5adSBarry Smith } 693f9fae5adSBarry Smith 6944a2ae208SSatish Balay #undef __FUNCT__ 695b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 696f9fae5adSBarry Smith int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 697f9fae5adSBarry Smith { 6984cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 699f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 70087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 701*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 702f9fae5adSBarry Smith 703f9fae5adSBarry Smith PetscFunctionBegin; 704f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7052f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 7062f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 707*bfec09a0SHong Zhang 708f9fae5adSBarry Smith for (i=0; i<m; i++) { 709*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 710*bfec09a0SHong Zhang v = a->a + a->i[i] ; 711f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 712f9fae5adSBarry Smith alpha1 = x[5*i]; 713f9fae5adSBarry Smith alpha2 = x[5*i+1]; 714f9fae5adSBarry Smith alpha3 = x[5*i+2]; 715f9fae5adSBarry Smith alpha4 = x[5*i+3]; 716f9fae5adSBarry Smith alpha5 = x[5*i+4]; 717f9fae5adSBarry Smith while (n-->0) { 718f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 719f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 720f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 721f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 722f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 723f9fae5adSBarry Smith idx++; v++; 724f9fae5adSBarry Smith } 725f9fae5adSBarry Smith } 726b0a32e0cSBarry Smith PetscLogFlops(10*a->nz); 7272f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 7282f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 729f9fae5adSBarry Smith PetscFunctionReturn(0); 730bcc973b7SBarry Smith } 731bcc973b7SBarry Smith 7326cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7334a2ae208SSatish Balay #undef __FUNCT__ 734b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 7356cd98798SBarry Smith int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 7366cd98798SBarry Smith { 7376cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 7386cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 73987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 740*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 7416cd98798SBarry Smith int n,i,jrow,j; 7426cd98798SBarry Smith 7436cd98798SBarry Smith PetscFunctionBegin; 7442f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 7452f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 7466cd98798SBarry Smith idx = a->j; 7476cd98798SBarry Smith v = a->a; 7486cd98798SBarry Smith ii = a->i; 7496cd98798SBarry Smith 7506cd98798SBarry Smith for (i=0; i<m; i++) { 7516cd98798SBarry Smith jrow = ii[i]; 7526cd98798SBarry Smith n = ii[i+1] - jrow; 7536cd98798SBarry Smith sum1 = 0.0; 7546cd98798SBarry Smith sum2 = 0.0; 7556cd98798SBarry Smith sum3 = 0.0; 7566cd98798SBarry Smith sum4 = 0.0; 7576cd98798SBarry Smith sum5 = 0.0; 7586cd98798SBarry Smith sum6 = 0.0; 7596cd98798SBarry Smith for (j=0; j<n; j++) { 7606cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 7616cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 7626cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 7636cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 7646cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 7656cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 7666cd98798SBarry Smith jrow++; 7676cd98798SBarry Smith } 7686cd98798SBarry Smith y[6*i] = sum1; 7696cd98798SBarry Smith y[6*i+1] = sum2; 7706cd98798SBarry Smith y[6*i+2] = sum3; 7716cd98798SBarry Smith y[6*i+3] = sum4; 7726cd98798SBarry Smith y[6*i+4] = sum5; 7736cd98798SBarry Smith y[6*i+5] = sum6; 7746cd98798SBarry Smith } 7756cd98798SBarry Smith 776b0a32e0cSBarry Smith PetscLogFlops(12*a->nz - 6*m); 7772f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 7782f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 7796cd98798SBarry Smith PetscFunctionReturn(0); 7806cd98798SBarry Smith } 7816cd98798SBarry Smith 7824a2ae208SSatish Balay #undef __FUNCT__ 783b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 7846cd98798SBarry Smith int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 7856cd98798SBarry Smith { 7866cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 7876cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 78887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 789*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 7906cd98798SBarry Smith 7916cd98798SBarry Smith PetscFunctionBegin; 7926cd98798SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 7932f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 7942f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 795*bfec09a0SHong Zhang 7966cd98798SBarry Smith for (i=0; i<m; i++) { 797*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 798*bfec09a0SHong Zhang v = a->a + a->i[i] ; 7996cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8006cd98798SBarry Smith alpha1 = x[6*i]; 8016cd98798SBarry Smith alpha2 = x[6*i+1]; 8026cd98798SBarry Smith alpha3 = x[6*i+2]; 8036cd98798SBarry Smith alpha4 = x[6*i+3]; 8046cd98798SBarry Smith alpha5 = x[6*i+4]; 8056cd98798SBarry Smith alpha6 = x[6*i+5]; 8066cd98798SBarry Smith while (n-->0) { 8076cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8086cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8096cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8106cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8116cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8126cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8136cd98798SBarry Smith idx++; v++; 8146cd98798SBarry Smith } 8156cd98798SBarry Smith } 816b0a32e0cSBarry Smith PetscLogFlops(12*a->nz - 6*b->AIJ->n); 8172f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 8182f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 8196cd98798SBarry Smith PetscFunctionReturn(0); 8206cd98798SBarry Smith } 8216cd98798SBarry Smith 8224a2ae208SSatish Balay #undef __FUNCT__ 823b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 8246cd98798SBarry Smith int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8256cd98798SBarry Smith { 8266cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8276cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 82887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 829*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 8306cd98798SBarry Smith int n,i,jrow,j; 8316cd98798SBarry Smith 8326cd98798SBarry Smith PetscFunctionBegin; 8336cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8342f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 8352f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 8366cd98798SBarry Smith idx = a->j; 8376cd98798SBarry Smith v = a->a; 8386cd98798SBarry Smith ii = a->i; 8396cd98798SBarry Smith 8406cd98798SBarry Smith for (i=0; i<m; i++) { 8416cd98798SBarry Smith jrow = ii[i]; 8426cd98798SBarry Smith n = ii[i+1] - jrow; 8436cd98798SBarry Smith sum1 = 0.0; 8446cd98798SBarry Smith sum2 = 0.0; 8456cd98798SBarry Smith sum3 = 0.0; 8466cd98798SBarry Smith sum4 = 0.0; 8476cd98798SBarry Smith sum5 = 0.0; 8486cd98798SBarry Smith sum6 = 0.0; 8496cd98798SBarry Smith for (j=0; j<n; j++) { 8506cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8516cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8526cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8536cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8546cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8556cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8566cd98798SBarry Smith jrow++; 8576cd98798SBarry Smith } 8586cd98798SBarry Smith y[6*i] += sum1; 8596cd98798SBarry Smith y[6*i+1] += sum2; 8606cd98798SBarry Smith y[6*i+2] += sum3; 8616cd98798SBarry Smith y[6*i+3] += sum4; 8626cd98798SBarry Smith y[6*i+4] += sum5; 8636cd98798SBarry Smith y[6*i+5] += sum6; 8646cd98798SBarry Smith } 8656cd98798SBarry Smith 866b0a32e0cSBarry Smith PetscLogFlops(12*a->nz); 8672f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 8682f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 8696cd98798SBarry Smith PetscFunctionReturn(0); 8706cd98798SBarry Smith } 8716cd98798SBarry Smith 8724a2ae208SSatish Balay #undef __FUNCT__ 873b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 8746cd98798SBarry Smith int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8756cd98798SBarry Smith { 8766cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8776cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 87887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 879*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 8806cd98798SBarry Smith 8816cd98798SBarry Smith PetscFunctionBegin; 8826cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8832f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 8842f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 885*bfec09a0SHong Zhang 8866cd98798SBarry Smith for (i=0; i<m; i++) { 887*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 888*bfec09a0SHong Zhang v = a->a + a->i[i] ; 8896cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8906cd98798SBarry Smith alpha1 = x[6*i]; 8916cd98798SBarry Smith alpha2 = x[6*i+1]; 8926cd98798SBarry Smith alpha3 = x[6*i+2]; 8936cd98798SBarry Smith alpha4 = x[6*i+3]; 8946cd98798SBarry Smith alpha5 = x[6*i+4]; 8956cd98798SBarry Smith alpha6 = x[6*i+5]; 8966cd98798SBarry Smith while (n-->0) { 8976cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8986cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8996cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9006cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9016cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9026cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9036cd98798SBarry Smith idx++; v++; 9046cd98798SBarry Smith } 9056cd98798SBarry Smith } 906b0a32e0cSBarry Smith PetscLogFlops(12*a->nz); 9072f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 9082f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 9096cd98798SBarry Smith PetscFunctionReturn(0); 9106cd98798SBarry Smith } 9116cd98798SBarry Smith 91266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 91366ed3db0SBarry Smith #undef __FUNCT__ 91466ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 91566ed3db0SBarry Smith int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 91666ed3db0SBarry Smith { 91766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 91866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 91987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 920*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 92166ed3db0SBarry Smith int n,i,jrow,j; 92266ed3db0SBarry Smith 92366ed3db0SBarry Smith PetscFunctionBegin; 9242f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 9252f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 92666ed3db0SBarry Smith idx = a->j; 92766ed3db0SBarry Smith v = a->a; 92866ed3db0SBarry Smith ii = a->i; 92966ed3db0SBarry Smith 93066ed3db0SBarry Smith for (i=0; i<m; i++) { 93166ed3db0SBarry Smith jrow = ii[i]; 93266ed3db0SBarry Smith n = ii[i+1] - jrow; 93366ed3db0SBarry Smith sum1 = 0.0; 93466ed3db0SBarry Smith sum2 = 0.0; 93566ed3db0SBarry Smith sum3 = 0.0; 93666ed3db0SBarry Smith sum4 = 0.0; 93766ed3db0SBarry Smith sum5 = 0.0; 93866ed3db0SBarry Smith sum6 = 0.0; 93966ed3db0SBarry Smith sum7 = 0.0; 94066ed3db0SBarry Smith sum8 = 0.0; 94166ed3db0SBarry Smith for (j=0; j<n; j++) { 94266ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 94366ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 94466ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 94566ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 94666ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 94766ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 94866ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 94966ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 95066ed3db0SBarry Smith jrow++; 95166ed3db0SBarry Smith } 95266ed3db0SBarry Smith y[8*i] = sum1; 95366ed3db0SBarry Smith y[8*i+1] = sum2; 95466ed3db0SBarry Smith y[8*i+2] = sum3; 95566ed3db0SBarry Smith y[8*i+3] = sum4; 95666ed3db0SBarry Smith y[8*i+4] = sum5; 95766ed3db0SBarry Smith y[8*i+5] = sum6; 95866ed3db0SBarry Smith y[8*i+6] = sum7; 95966ed3db0SBarry Smith y[8*i+7] = sum8; 96066ed3db0SBarry Smith } 96166ed3db0SBarry Smith 96266ed3db0SBarry Smith PetscLogFlops(16*a->nz - 8*m); 9632f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 9642f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 96566ed3db0SBarry Smith PetscFunctionReturn(0); 96666ed3db0SBarry Smith } 96766ed3db0SBarry Smith 96866ed3db0SBarry Smith #undef __FUNCT__ 96966ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 97066ed3db0SBarry Smith int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 97166ed3db0SBarry Smith { 97266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 97366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 97487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 975*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 97666ed3db0SBarry Smith 97766ed3db0SBarry Smith PetscFunctionBegin; 97866ed3db0SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 9792f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 9802f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 981*bfec09a0SHong Zhang 98266ed3db0SBarry Smith for (i=0; i<m; i++) { 983*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 984*bfec09a0SHong Zhang v = a->a + a->i[i] ; 98566ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 98666ed3db0SBarry Smith alpha1 = x[8*i]; 98766ed3db0SBarry Smith alpha2 = x[8*i+1]; 98866ed3db0SBarry Smith alpha3 = x[8*i+2]; 98966ed3db0SBarry Smith alpha4 = x[8*i+3]; 99066ed3db0SBarry Smith alpha5 = x[8*i+4]; 99166ed3db0SBarry Smith alpha6 = x[8*i+5]; 99266ed3db0SBarry Smith alpha7 = x[8*i+6]; 99366ed3db0SBarry Smith alpha8 = x[8*i+7]; 99466ed3db0SBarry Smith while (n-->0) { 99566ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 99666ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 99766ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 99866ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 99966ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 100066ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 100166ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 100266ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 100366ed3db0SBarry Smith idx++; v++; 100466ed3db0SBarry Smith } 100566ed3db0SBarry Smith } 100666ed3db0SBarry Smith PetscLogFlops(16*a->nz - 8*b->AIJ->n); 10072f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 10082f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 100966ed3db0SBarry Smith PetscFunctionReturn(0); 101066ed3db0SBarry Smith } 101166ed3db0SBarry Smith 101266ed3db0SBarry Smith #undef __FUNCT__ 101366ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 101466ed3db0SBarry Smith int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 101566ed3db0SBarry Smith { 101666ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 101766ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 101887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1019*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 102066ed3db0SBarry Smith int n,i,jrow,j; 102166ed3db0SBarry Smith 102266ed3db0SBarry Smith PetscFunctionBegin; 102366ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 10242f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 10252f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 102666ed3db0SBarry Smith idx = a->j; 102766ed3db0SBarry Smith v = a->a; 102866ed3db0SBarry Smith ii = a->i; 102966ed3db0SBarry Smith 103066ed3db0SBarry Smith for (i=0; i<m; i++) { 103166ed3db0SBarry Smith jrow = ii[i]; 103266ed3db0SBarry Smith n = ii[i+1] - jrow; 103366ed3db0SBarry Smith sum1 = 0.0; 103466ed3db0SBarry Smith sum2 = 0.0; 103566ed3db0SBarry Smith sum3 = 0.0; 103666ed3db0SBarry Smith sum4 = 0.0; 103766ed3db0SBarry Smith sum5 = 0.0; 103866ed3db0SBarry Smith sum6 = 0.0; 103966ed3db0SBarry Smith sum7 = 0.0; 104066ed3db0SBarry Smith sum8 = 0.0; 104166ed3db0SBarry Smith for (j=0; j<n; j++) { 104266ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 104366ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 104466ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 104566ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 104666ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 104766ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 104866ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 104966ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 105066ed3db0SBarry Smith jrow++; 105166ed3db0SBarry Smith } 105266ed3db0SBarry Smith y[8*i] += sum1; 105366ed3db0SBarry Smith y[8*i+1] += sum2; 105466ed3db0SBarry Smith y[8*i+2] += sum3; 105566ed3db0SBarry Smith y[8*i+3] += sum4; 105666ed3db0SBarry Smith y[8*i+4] += sum5; 105766ed3db0SBarry Smith y[8*i+5] += sum6; 105866ed3db0SBarry Smith y[8*i+6] += sum7; 105966ed3db0SBarry Smith y[8*i+7] += sum8; 106066ed3db0SBarry Smith } 106166ed3db0SBarry Smith 106266ed3db0SBarry Smith PetscLogFlops(16*a->nz); 10632f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 10642f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 106566ed3db0SBarry Smith PetscFunctionReturn(0); 106666ed3db0SBarry Smith } 106766ed3db0SBarry Smith 106866ed3db0SBarry Smith #undef __FUNCT__ 106966ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 107066ed3db0SBarry Smith int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 107166ed3db0SBarry Smith { 107266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 107366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 107487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1075*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 107666ed3db0SBarry Smith 107766ed3db0SBarry Smith PetscFunctionBegin; 107866ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 10792f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 10802f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 108166ed3db0SBarry Smith for (i=0; i<m; i++) { 1082*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1083*bfec09a0SHong Zhang v = a->a + a->i[i] ; 108466ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 108566ed3db0SBarry Smith alpha1 = x[8*i]; 108666ed3db0SBarry Smith alpha2 = x[8*i+1]; 108766ed3db0SBarry Smith alpha3 = x[8*i+2]; 108866ed3db0SBarry Smith alpha4 = x[8*i+3]; 108966ed3db0SBarry Smith alpha5 = x[8*i+4]; 109066ed3db0SBarry Smith alpha6 = x[8*i+5]; 109166ed3db0SBarry Smith alpha7 = x[8*i+6]; 109266ed3db0SBarry Smith alpha8 = x[8*i+7]; 109366ed3db0SBarry Smith while (n-->0) { 109466ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 109566ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 109666ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 109766ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 109866ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 109966ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 110066ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 110166ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 110266ed3db0SBarry Smith idx++; v++; 110366ed3db0SBarry Smith } 110466ed3db0SBarry Smith } 110566ed3db0SBarry Smith PetscLogFlops(16*a->nz); 11062f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 11072f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 11082f7816d4SBarry Smith PetscFunctionReturn(0); 11092f7816d4SBarry Smith } 11102f7816d4SBarry Smith 11112f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 11122f7816d4SBarry Smith #undef __FUNCT__ 11132f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 11142f7816d4SBarry Smith int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 11152f7816d4SBarry Smith { 11162f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 11172f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 11182f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 11192f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1120*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 11212f7816d4SBarry Smith int n,i,jrow,j; 11222f7816d4SBarry Smith 11232f7816d4SBarry Smith PetscFunctionBegin; 11242f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 11252f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 11262f7816d4SBarry Smith idx = a->j; 11272f7816d4SBarry Smith v = a->a; 11282f7816d4SBarry Smith ii = a->i; 11292f7816d4SBarry Smith 11302f7816d4SBarry Smith for (i=0; i<m; i++) { 11312f7816d4SBarry Smith jrow = ii[i]; 11322f7816d4SBarry Smith n = ii[i+1] - jrow; 11332f7816d4SBarry Smith sum1 = 0.0; 11342f7816d4SBarry Smith sum2 = 0.0; 11352f7816d4SBarry Smith sum3 = 0.0; 11362f7816d4SBarry Smith sum4 = 0.0; 11372f7816d4SBarry Smith sum5 = 0.0; 11382f7816d4SBarry Smith sum6 = 0.0; 11392f7816d4SBarry Smith sum7 = 0.0; 11402f7816d4SBarry Smith sum8 = 0.0; 11412f7816d4SBarry Smith sum9 = 0.0; 11422f7816d4SBarry Smith sum10 = 0.0; 11432f7816d4SBarry Smith sum11 = 0.0; 11442f7816d4SBarry Smith sum12 = 0.0; 11452f7816d4SBarry Smith sum13 = 0.0; 11462f7816d4SBarry Smith sum14 = 0.0; 11472f7816d4SBarry Smith sum15 = 0.0; 11482f7816d4SBarry Smith sum16 = 0.0; 11492f7816d4SBarry Smith for (j=0; j<n; j++) { 11502f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 11512f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 11522f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 11532f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 11542f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 11552f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 11562f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 11572f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 11582f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 11592f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 11602f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 11612f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 11622f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 11632f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 11642f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 11652f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 11662f7816d4SBarry Smith jrow++; 11672f7816d4SBarry Smith } 11682f7816d4SBarry Smith y[16*i] = sum1; 11692f7816d4SBarry Smith y[16*i+1] = sum2; 11702f7816d4SBarry Smith y[16*i+2] = sum3; 11712f7816d4SBarry Smith y[16*i+3] = sum4; 11722f7816d4SBarry Smith y[16*i+4] = sum5; 11732f7816d4SBarry Smith y[16*i+5] = sum6; 11742f7816d4SBarry Smith y[16*i+6] = sum7; 11752f7816d4SBarry Smith y[16*i+7] = sum8; 11762f7816d4SBarry Smith y[16*i+8] = sum9; 11772f7816d4SBarry Smith y[16*i+9] = sum10; 11782f7816d4SBarry Smith y[16*i+10] = sum11; 11792f7816d4SBarry Smith y[16*i+11] = sum12; 11802f7816d4SBarry Smith y[16*i+12] = sum13; 11812f7816d4SBarry Smith y[16*i+13] = sum14; 11822f7816d4SBarry Smith y[16*i+14] = sum15; 11832f7816d4SBarry Smith y[16*i+15] = sum16; 11842f7816d4SBarry Smith } 11852f7816d4SBarry Smith 11862f7816d4SBarry Smith PetscLogFlops(32*a->nz - 16*m); 11872f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 11882f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 11892f7816d4SBarry Smith PetscFunctionReturn(0); 11902f7816d4SBarry Smith } 11912f7816d4SBarry Smith 11922f7816d4SBarry Smith #undef __FUNCT__ 11932f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 11942f7816d4SBarry Smith int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 11952f7816d4SBarry Smith { 11962f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 11972f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 11982f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 11992f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1200*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 12012f7816d4SBarry Smith 12022f7816d4SBarry Smith PetscFunctionBegin; 12032f7816d4SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 12042f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 12052f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1206*bfec09a0SHong Zhang 12072f7816d4SBarry Smith for (i=0; i<m; i++) { 1208*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1209*bfec09a0SHong Zhang v = a->a + a->i[i] ; 12102f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 12112f7816d4SBarry Smith alpha1 = x[16*i]; 12122f7816d4SBarry Smith alpha2 = x[16*i+1]; 12132f7816d4SBarry Smith alpha3 = x[16*i+2]; 12142f7816d4SBarry Smith alpha4 = x[16*i+3]; 12152f7816d4SBarry Smith alpha5 = x[16*i+4]; 12162f7816d4SBarry Smith alpha6 = x[16*i+5]; 12172f7816d4SBarry Smith alpha7 = x[16*i+6]; 12182f7816d4SBarry Smith alpha8 = x[16*i+7]; 12192f7816d4SBarry Smith alpha9 = x[16*i+8]; 12202f7816d4SBarry Smith alpha10 = x[16*i+9]; 12212f7816d4SBarry Smith alpha11 = x[16*i+10]; 12222f7816d4SBarry Smith alpha12 = x[16*i+11]; 12232f7816d4SBarry Smith alpha13 = x[16*i+12]; 12242f7816d4SBarry Smith alpha14 = x[16*i+13]; 12252f7816d4SBarry Smith alpha15 = x[16*i+14]; 12262f7816d4SBarry Smith alpha16 = x[16*i+15]; 12272f7816d4SBarry Smith while (n-->0) { 12282f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 12292f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 12302f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 12312f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 12322f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 12332f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 12342f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 12352f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 12362f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 12372f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 12382f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 12392f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 12402f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 12412f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 12422f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 12432f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 12442f7816d4SBarry Smith idx++; v++; 12452f7816d4SBarry Smith } 12462f7816d4SBarry Smith } 12472f7816d4SBarry Smith PetscLogFlops(32*a->nz - 16*b->AIJ->n); 12482f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 12492f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 12502f7816d4SBarry Smith PetscFunctionReturn(0); 12512f7816d4SBarry Smith } 12522f7816d4SBarry Smith 12532f7816d4SBarry Smith #undef __FUNCT__ 12542f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 12552f7816d4SBarry Smith int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 12562f7816d4SBarry Smith { 12572f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 12582f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 12592f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 12602f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1261*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 12622f7816d4SBarry Smith int n,i,jrow,j; 12632f7816d4SBarry Smith 12642f7816d4SBarry Smith PetscFunctionBegin; 12652f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12662f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 12672f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 12682f7816d4SBarry Smith idx = a->j; 12692f7816d4SBarry Smith v = a->a; 12702f7816d4SBarry Smith ii = a->i; 12712f7816d4SBarry Smith 12722f7816d4SBarry Smith for (i=0; i<m; i++) { 12732f7816d4SBarry Smith jrow = ii[i]; 12742f7816d4SBarry Smith n = ii[i+1] - jrow; 12752f7816d4SBarry Smith sum1 = 0.0; 12762f7816d4SBarry Smith sum2 = 0.0; 12772f7816d4SBarry Smith sum3 = 0.0; 12782f7816d4SBarry Smith sum4 = 0.0; 12792f7816d4SBarry Smith sum5 = 0.0; 12802f7816d4SBarry Smith sum6 = 0.0; 12812f7816d4SBarry Smith sum7 = 0.0; 12822f7816d4SBarry Smith sum8 = 0.0; 12832f7816d4SBarry Smith sum9 = 0.0; 12842f7816d4SBarry Smith sum10 = 0.0; 12852f7816d4SBarry Smith sum11 = 0.0; 12862f7816d4SBarry Smith sum12 = 0.0; 12872f7816d4SBarry Smith sum13 = 0.0; 12882f7816d4SBarry Smith sum14 = 0.0; 12892f7816d4SBarry Smith sum15 = 0.0; 12902f7816d4SBarry Smith sum16 = 0.0; 12912f7816d4SBarry Smith for (j=0; j<n; j++) { 12922f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 12932f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 12942f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 12952f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 12962f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 12972f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 12982f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 12992f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 13002f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 13012f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 13022f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 13032f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 13042f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 13052f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 13062f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 13072f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 13082f7816d4SBarry Smith jrow++; 13092f7816d4SBarry Smith } 13102f7816d4SBarry Smith y[16*i] += sum1; 13112f7816d4SBarry Smith y[16*i+1] += sum2; 13122f7816d4SBarry Smith y[16*i+2] += sum3; 13132f7816d4SBarry Smith y[16*i+3] += sum4; 13142f7816d4SBarry Smith y[16*i+4] += sum5; 13152f7816d4SBarry Smith y[16*i+5] += sum6; 13162f7816d4SBarry Smith y[16*i+6] += sum7; 13172f7816d4SBarry Smith y[16*i+7] += sum8; 13182f7816d4SBarry Smith y[16*i+8] += sum9; 13192f7816d4SBarry Smith y[16*i+9] += sum10; 13202f7816d4SBarry Smith y[16*i+10] += sum11; 13212f7816d4SBarry Smith y[16*i+11] += sum12; 13222f7816d4SBarry Smith y[16*i+12] += sum13; 13232f7816d4SBarry Smith y[16*i+13] += sum14; 13242f7816d4SBarry Smith y[16*i+14] += sum15; 13252f7816d4SBarry Smith y[16*i+15] += sum16; 13262f7816d4SBarry Smith } 13272f7816d4SBarry Smith 13282f7816d4SBarry Smith PetscLogFlops(32*a->nz); 13292f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 13302f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 13312f7816d4SBarry Smith PetscFunctionReturn(0); 13322f7816d4SBarry Smith } 13332f7816d4SBarry Smith 13342f7816d4SBarry Smith #undef __FUNCT__ 13352f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 13362f7816d4SBarry Smith int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 13372f7816d4SBarry Smith { 13382f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13392f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13402f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 13412f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1342*bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 13432f7816d4SBarry Smith 13442f7816d4SBarry Smith PetscFunctionBegin; 13452f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13462f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 13472f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 13482f7816d4SBarry Smith for (i=0; i<m; i++) { 1349*bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1350*bfec09a0SHong Zhang v = a->a + a->i[i] ; 13512f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 13522f7816d4SBarry Smith alpha1 = x[16*i]; 13532f7816d4SBarry Smith alpha2 = x[16*i+1]; 13542f7816d4SBarry Smith alpha3 = x[16*i+2]; 13552f7816d4SBarry Smith alpha4 = x[16*i+3]; 13562f7816d4SBarry Smith alpha5 = x[16*i+4]; 13572f7816d4SBarry Smith alpha6 = x[16*i+5]; 13582f7816d4SBarry Smith alpha7 = x[16*i+6]; 13592f7816d4SBarry Smith alpha8 = x[16*i+7]; 13602f7816d4SBarry Smith alpha9 = x[16*i+8]; 13612f7816d4SBarry Smith alpha10 = x[16*i+9]; 13622f7816d4SBarry Smith alpha11 = x[16*i+10]; 13632f7816d4SBarry Smith alpha12 = x[16*i+11]; 13642f7816d4SBarry Smith alpha13 = x[16*i+12]; 13652f7816d4SBarry Smith alpha14 = x[16*i+13]; 13662f7816d4SBarry Smith alpha15 = x[16*i+14]; 13672f7816d4SBarry Smith alpha16 = x[16*i+15]; 13682f7816d4SBarry Smith while (n-->0) { 13692f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 13702f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 13712f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 13722f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 13732f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 13742f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 13752f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 13762f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 13772f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 13782f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 13792f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 13802f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 13812f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 13822f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 13832f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 13842f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 13852f7816d4SBarry Smith idx++; v++; 13862f7816d4SBarry Smith } 13872f7816d4SBarry Smith } 13882f7816d4SBarry Smith PetscLogFlops(32*a->nz); 13892f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 13902f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 139166ed3db0SBarry Smith PetscFunctionReturn(0); 139266ed3db0SBarry Smith } 139366ed3db0SBarry Smith 1394f4a53059SBarry Smith /*===================================================================================*/ 13954a2ae208SSatish Balay #undef __FUNCT__ 13964a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1397f4a53059SBarry Smith int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1398f4a53059SBarry Smith { 13994cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1400f4a53059SBarry Smith int ierr; 1401f4a53059SBarry Smith PetscFunctionBegin; 1402f4a53059SBarry Smith 1403f4a53059SBarry Smith /* start the scatter */ 1404f4a53059SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 14054cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1406f4a53059SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 14074cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1408f4a53059SBarry Smith PetscFunctionReturn(0); 1409f4a53059SBarry Smith } 1410f4a53059SBarry Smith 14114a2ae208SSatish Balay #undef __FUNCT__ 14124a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 14134cb9d9b8SBarry Smith int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 14144cb9d9b8SBarry Smith { 14154cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 14164cb9d9b8SBarry Smith int ierr; 14174cb9d9b8SBarry Smith PetscFunctionBegin; 14184cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 14194cb9d9b8SBarry Smith ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 14204cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 14214cb9d9b8SBarry Smith ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 14224cb9d9b8SBarry Smith PetscFunctionReturn(0); 14234cb9d9b8SBarry Smith } 14244cb9d9b8SBarry Smith 14254a2ae208SSatish Balay #undef __FUNCT__ 14264a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1427d72c5c08SBarry Smith int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 14284cb9d9b8SBarry Smith { 14294cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 14304cb9d9b8SBarry Smith int ierr; 14314cb9d9b8SBarry Smith PetscFunctionBegin; 14324cb9d9b8SBarry Smith 14334cb9d9b8SBarry Smith /* start the scatter */ 14344cb9d9b8SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1435d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 14364cb9d9b8SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1437d72c5c08SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 14384cb9d9b8SBarry Smith PetscFunctionReturn(0); 14394cb9d9b8SBarry Smith } 14404cb9d9b8SBarry Smith 14414a2ae208SSatish Balay #undef __FUNCT__ 14424a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1443d72c5c08SBarry Smith int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 14444cb9d9b8SBarry Smith { 14454cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 14464cb9d9b8SBarry Smith int ierr; 14474cb9d9b8SBarry Smith PetscFunctionBegin; 14484cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1449d72c5c08SBarry Smith ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1450d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1451d72c5c08SBarry Smith ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 14524cb9d9b8SBarry Smith PetscFunctionReturn(0); 14534cb9d9b8SBarry Smith } 14544cb9d9b8SBarry Smith 1455bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 14564a2ae208SSatish Balay #undef __FUNCT__ 14574a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 1458cd3bbe55SBarry Smith int MatCreateMAIJ(Mat A,int dof,Mat *maij) 145982b1193eSBarry Smith { 1460f4a53059SBarry Smith int ierr,size,n; 14614cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 146282b1193eSBarry Smith Mat B; 146382b1193eSBarry Smith 146482b1193eSBarry Smith PetscFunctionBegin; 1465d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1466d72c5c08SBarry Smith 1467d72c5c08SBarry Smith if (dof == 1) { 1468d72c5c08SBarry Smith *maij = A; 1469d72c5c08SBarry Smith } else { 147083903688SBarry Smith ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1471cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1472d72c5c08SBarry Smith 1473f4a53059SBarry Smith ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1474f4a53059SBarry Smith if (size == 1) { 1475b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 14764cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 1477b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 1478b9b97703SBarry Smith b->dof = dof; 14794cb9d9b8SBarry Smith b->AIJ = A; 1480d72c5c08SBarry Smith if (dof == 2) { 1481cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1482cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1483cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1484cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1485bcc973b7SBarry Smith } else if (dof == 3) { 1486bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1487bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1488bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1489bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1490bcc973b7SBarry Smith } else if (dof == 4) { 1491bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1492bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1493bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1494bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1495f9fae5adSBarry Smith } else if (dof == 5) { 1496f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1497f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1498f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1499f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 15006cd98798SBarry Smith } else if (dof == 6) { 15016cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 15026cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 15036cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 15046cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 150566ed3db0SBarry Smith } else if (dof == 8) { 150666ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 150766ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 150866ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 150966ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 15102f7816d4SBarry Smith } else if (dof == 16) { 15112f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 15122f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 15132f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 15142f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 151582b1193eSBarry Smith } else { 151666ed3db0SBarry Smith SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 151782b1193eSBarry Smith } 1518f4a53059SBarry Smith } else { 1519f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1520f4a53059SBarry Smith IS from,to; 1521f4a53059SBarry Smith Vec gvec; 1522f4a53059SBarry Smith int *garray,i; 1523f4a53059SBarry Smith 1524b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1525d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 1526b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 1527b9b97703SBarry Smith b->dof = dof; 1528b9b97703SBarry Smith b->A = A; 15294cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 15304cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 15314cb9d9b8SBarry Smith 1532f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1533f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1534f4a53059SBarry Smith 1535f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 1536b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1537f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1538f4a53059SBarry Smith ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1539f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 1540f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1541f4a53059SBarry Smith 1542f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 1543f4a53059SBarry Smith ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1544f4a53059SBarry Smith 1545f4a53059SBarry Smith /* generate the scatter context */ 1546f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1547f4a53059SBarry Smith 1548f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 1549f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 1550f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 1551f4a53059SBarry Smith 1552f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 15534cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 15544cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 15554cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1556f4a53059SBarry Smith } 1557cd3bbe55SBarry Smith *maij = B; 1558d72c5c08SBarry Smith } 155982b1193eSBarry Smith PetscFunctionReturn(0); 156082b1193eSBarry Smith } 156182b1193eSBarry Smith 156282b1193eSBarry Smith 156382b1193eSBarry Smith 156482b1193eSBarry Smith 156582b1193eSBarry Smith 156682b1193eSBarry Smith 156782b1193eSBarry Smith 156882b1193eSBarry Smith 156982b1193eSBarry Smith 157082b1193eSBarry Smith 157182b1193eSBarry Smith 157282b1193eSBarry Smith 1573