173f4d377SMatthew Knepley /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/ 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19be911618SKris Buschelman #include "src/mat/impls/maij/maij.h" 20*2f7816d4SBarry Smith #include "src/vec/vecimpl.h" 2182b1193eSBarry Smith 224a2ae208SSatish Balay #undef __FUNCT__ 234a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 24b9b97703SBarry Smith int MatMAIJGetAIJ(Mat A,Mat *B) 25b9b97703SBarry Smith { 26b9b97703SBarry Smith int ierr; 27b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 28b9b97703SBarry Smith 29b9b97703SBarry Smith PetscFunctionBegin; 30b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 31b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 32b9b97703SBarry Smith if (ismpimaij) { 33b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 34b9b97703SBarry Smith 35b9b97703SBarry Smith *B = b->A; 36b9b97703SBarry Smith } else if (isseqmaij) { 37b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 38b9b97703SBarry Smith 39b9b97703SBarry Smith *B = b->AIJ; 40b9b97703SBarry Smith } else { 41b9b97703SBarry Smith *B = A; 42b9b97703SBarry Smith } 43b9b97703SBarry Smith PetscFunctionReturn(0); 44b9b97703SBarry Smith } 45b9b97703SBarry Smith 464a2ae208SSatish Balay #undef __FUNCT__ 474a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 48b9b97703SBarry Smith int MatMAIJRedimension(Mat A,int dof,Mat *B) 49b9b97703SBarry Smith { 50b9b97703SBarry Smith int ierr; 51b9b97703SBarry Smith Mat Aij; 52b9b97703SBarry Smith 53b9b97703SBarry Smith PetscFunctionBegin; 54b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 55b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 56b9b97703SBarry Smith PetscFunctionReturn(0); 57b9b97703SBarry Smith } 58b9b97703SBarry Smith 594a2ae208SSatish Balay #undef __FUNCT__ 604a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 614cb9d9b8SBarry Smith int MatDestroy_SeqMAIJ(Mat A) 6282b1193eSBarry Smith { 6382b1193eSBarry Smith int ierr; 644cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6582b1193eSBarry Smith 6682b1193eSBarry Smith PetscFunctionBegin; 67cd3bbe55SBarry Smith if (b->AIJ) { 68cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 6982b1193eSBarry Smith } 704cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 714cb9d9b8SBarry Smith PetscFunctionReturn(0); 724cb9d9b8SBarry Smith } 734cb9d9b8SBarry Smith 744a2ae208SSatish Balay #undef __FUNCT__ 754a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 764cb9d9b8SBarry Smith int MatDestroy_MPIMAIJ(Mat A) 774cb9d9b8SBarry Smith { 784cb9d9b8SBarry Smith int ierr; 794cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 804cb9d9b8SBarry Smith 814cb9d9b8SBarry Smith PetscFunctionBegin; 824cb9d9b8SBarry Smith if (b->AIJ) { 834cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 844cb9d9b8SBarry Smith } 85f4a53059SBarry Smith if (b->OAIJ) { 86f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 87f4a53059SBarry Smith } 88b9b97703SBarry Smith if (b->A) { 89b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 90b9b97703SBarry Smith } 91f4a53059SBarry Smith if (b->ctx) { 92f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 93f4a53059SBarry Smith } 94f4a53059SBarry Smith if (b->w) { 95f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 96f4a53059SBarry Smith } 97cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 98b9b97703SBarry Smith PetscFunctionReturn(0); 99b9b97703SBarry Smith } 100b9b97703SBarry Smith 10182b1193eSBarry Smith EXTERN_C_BEGIN 1024a2ae208SSatish Balay #undef __FUNCT__ 1034a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 104f4a53059SBarry Smith int MatCreate_MAIJ(Mat A) 10582b1193eSBarry Smith { 106cd3bbe55SBarry Smith int ierr; 1074cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 10882b1193eSBarry Smith 10982b1193eSBarry Smith PetscFunctionBegin; 110b0a32e0cSBarry Smith ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 111b0a32e0cSBarry Smith A->data = (void*)b; 1124cb9d9b8SBarry Smith ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 113cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 114cd3bbe55SBarry Smith A->factor = 0; 115cd3bbe55SBarry Smith A->mapping = 0; 116f4a53059SBarry Smith 117cd3bbe55SBarry Smith b->AIJ = 0; 118cd3bbe55SBarry Smith b->dof = 0; 119f4a53059SBarry Smith b->OAIJ = 0; 120f4a53059SBarry Smith b->ctx = 0; 121f4a53059SBarry Smith b->w = 0; 12282b1193eSBarry Smith PetscFunctionReturn(0); 12382b1193eSBarry Smith } 12482b1193eSBarry Smith EXTERN_C_END 12582b1193eSBarry Smith 126cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1274a2ae208SSatish Balay #undef __FUNCT__ 128b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 129cd3bbe55SBarry Smith int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 13082b1193eSBarry Smith { 1314cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 132bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 134273d9f13SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 135bcc973b7SBarry Smith int n,i,jrow,j; 13682b1193eSBarry Smith 137bcc973b7SBarry Smith PetscFunctionBegin; 138*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 139*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 140bcc973b7SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 141bcc973b7SBarry Smith idx = a->j; 142bcc973b7SBarry Smith v = a->a; 143bcc973b7SBarry Smith ii = a->i; 144bcc973b7SBarry Smith 145bcc973b7SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 146bcc973b7SBarry Smith idx += shift; 147bcc973b7SBarry Smith for (i=0; i<m; i++) { 148bcc973b7SBarry Smith jrow = ii[i]; 149bcc973b7SBarry Smith n = ii[i+1] - jrow; 150bcc973b7SBarry Smith sum1 = 0.0; 151bcc973b7SBarry Smith sum2 = 0.0; 152bcc973b7SBarry Smith for (j=0; j<n; j++) { 153bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 154bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 155bcc973b7SBarry Smith jrow++; 156bcc973b7SBarry Smith } 157bcc973b7SBarry Smith y[2*i] = sum1; 158bcc973b7SBarry Smith y[2*i+1] = sum2; 159bcc973b7SBarry Smith } 160bcc973b7SBarry Smith 161b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*m); 162*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 163*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 16482b1193eSBarry Smith PetscFunctionReturn(0); 16582b1193eSBarry Smith } 166bcc973b7SBarry Smith 1674a2ae208SSatish Balay #undef __FUNCT__ 168b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 169cd3bbe55SBarry Smith int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 17082b1193eSBarry Smith { 1714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 172bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 17387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 174273d9f13SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 17582b1193eSBarry Smith 176bcc973b7SBarry Smith PetscFunctionBegin; 177bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 178*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 179*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 180bcc973b7SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 181bcc973b7SBarry Smith for (i=0; i<m; i++) { 182bcc973b7SBarry Smith idx = a->j + a->i[i] + shift; 183bcc973b7SBarry Smith v = a->a + a->i[i] + shift; 184bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 185bcc973b7SBarry Smith alpha1 = x[2*i]; 186bcc973b7SBarry Smith alpha2 = x[2*i+1]; 187bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 188bcc973b7SBarry Smith } 189b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*b->AIJ->n); 190*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 191*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 19282b1193eSBarry Smith PetscFunctionReturn(0); 19382b1193eSBarry Smith } 194bcc973b7SBarry Smith 1954a2ae208SSatish Balay #undef __FUNCT__ 196b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 197cd3bbe55SBarry Smith int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 19882b1193eSBarry Smith { 1994cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 200bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 20187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 202273d9f13SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 203bcc973b7SBarry Smith int n,i,jrow,j; 20482b1193eSBarry Smith 205bcc973b7SBarry Smith PetscFunctionBegin; 206f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 207*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 208*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 209bcc973b7SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 210bcc973b7SBarry Smith idx = a->j; 211bcc973b7SBarry Smith v = a->a; 212bcc973b7SBarry Smith ii = a->i; 213bcc973b7SBarry Smith 214bcc973b7SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 215bcc973b7SBarry Smith idx += shift; 216bcc973b7SBarry Smith for (i=0; i<m; i++) { 217bcc973b7SBarry Smith jrow = ii[i]; 218bcc973b7SBarry Smith n = ii[i+1] - jrow; 219bcc973b7SBarry Smith sum1 = 0.0; 220bcc973b7SBarry Smith sum2 = 0.0; 221bcc973b7SBarry Smith for (j=0; j<n; j++) { 222bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 223bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 224bcc973b7SBarry Smith jrow++; 225bcc973b7SBarry Smith } 226bcc973b7SBarry Smith y[2*i] += sum1; 227bcc973b7SBarry Smith y[2*i+1] += sum2; 228bcc973b7SBarry Smith } 229bcc973b7SBarry Smith 230b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*m); 231*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 232*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 233bcc973b7SBarry Smith PetscFunctionReturn(0); 23482b1193eSBarry Smith } 2354a2ae208SSatish Balay #undef __FUNCT__ 236b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 237cd3bbe55SBarry Smith int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 23882b1193eSBarry Smith { 2394cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 240bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 24187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 242273d9f13SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 24382b1193eSBarry Smith 244bcc973b7SBarry Smith PetscFunctionBegin; 245f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 246*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 247*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 248bcc973b7SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 249bcc973b7SBarry Smith for (i=0; i<m; i++) { 250bcc973b7SBarry Smith idx = a->j + a->i[i] + shift; 251bcc973b7SBarry Smith v = a->a + a->i[i] + shift; 252bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 253bcc973b7SBarry Smith alpha1 = x[2*i]; 254bcc973b7SBarry Smith alpha2 = x[2*i+1]; 255bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 256bcc973b7SBarry Smith } 257b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*b->AIJ->n); 258*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 259*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 260bcc973b7SBarry Smith PetscFunctionReturn(0); 26182b1193eSBarry Smith } 262cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 2634a2ae208SSatish Balay #undef __FUNCT__ 264b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 265bcc973b7SBarry Smith int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 266bcc973b7SBarry Smith { 2674cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 268bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 26987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 270273d9f13SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 271bcc973b7SBarry Smith int n,i,jrow,j; 27282b1193eSBarry Smith 273bcc973b7SBarry Smith PetscFunctionBegin; 274*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 275*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 276bcc973b7SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 277bcc973b7SBarry Smith idx = a->j; 278bcc973b7SBarry Smith v = a->a; 279bcc973b7SBarry Smith ii = a->i; 280bcc973b7SBarry Smith 281bcc973b7SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 282bcc973b7SBarry Smith idx += shift; 283bcc973b7SBarry Smith for (i=0; i<m; i++) { 284bcc973b7SBarry Smith jrow = ii[i]; 285bcc973b7SBarry Smith n = ii[i+1] - jrow; 286bcc973b7SBarry Smith sum1 = 0.0; 287bcc973b7SBarry Smith sum2 = 0.0; 288bcc973b7SBarry Smith sum3 = 0.0; 289bcc973b7SBarry Smith for (j=0; j<n; j++) { 290bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 291bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 292bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 293bcc973b7SBarry Smith jrow++; 294bcc973b7SBarry Smith } 295bcc973b7SBarry Smith y[3*i] = sum1; 296bcc973b7SBarry Smith y[3*i+1] = sum2; 297bcc973b7SBarry Smith y[3*i+2] = sum3; 298bcc973b7SBarry Smith } 299bcc973b7SBarry Smith 300b0a32e0cSBarry Smith PetscLogFlops(6*a->nz - 3*m); 301*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 302*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 303bcc973b7SBarry Smith PetscFunctionReturn(0); 304bcc973b7SBarry Smith } 305bcc973b7SBarry Smith 3064a2ae208SSatish Balay #undef __FUNCT__ 307b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 308bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 309bcc973b7SBarry Smith { 3104cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 311bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 31287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 313273d9f13SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 314bcc973b7SBarry Smith 315bcc973b7SBarry Smith PetscFunctionBegin; 316bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 317*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 318*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 319bcc973b7SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 320bcc973b7SBarry Smith for (i=0; i<m; i++) { 321bcc973b7SBarry Smith idx = a->j + a->i[i] + shift; 322bcc973b7SBarry Smith v = a->a + a->i[i] + shift; 323bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 324bcc973b7SBarry Smith alpha1 = x[3*i]; 325bcc973b7SBarry Smith alpha2 = x[3*i+1]; 326bcc973b7SBarry Smith alpha3 = x[3*i+2]; 327bcc973b7SBarry Smith while (n-->0) { 328bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 329bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 330bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 331bcc973b7SBarry Smith idx++; v++; 332bcc973b7SBarry Smith } 333bcc973b7SBarry Smith } 334b0a32e0cSBarry Smith PetscLogFlops(6*a->nz - 3*b->AIJ->n); 335*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 336*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 337bcc973b7SBarry Smith PetscFunctionReturn(0); 338bcc973b7SBarry Smith } 339bcc973b7SBarry Smith 3404a2ae208SSatish Balay #undef __FUNCT__ 341b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 342bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 343bcc973b7SBarry Smith { 3444cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 345bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 34687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 347273d9f13SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 348bcc973b7SBarry Smith int n,i,jrow,j; 349bcc973b7SBarry Smith 350bcc973b7SBarry Smith PetscFunctionBegin; 351f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 352*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 353*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 354bcc973b7SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 355bcc973b7SBarry Smith idx = a->j; 356bcc973b7SBarry Smith v = a->a; 357bcc973b7SBarry Smith ii = a->i; 358bcc973b7SBarry Smith 359bcc973b7SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 360bcc973b7SBarry Smith idx += shift; 361bcc973b7SBarry Smith for (i=0; i<m; i++) { 362bcc973b7SBarry Smith jrow = ii[i]; 363bcc973b7SBarry Smith n = ii[i+1] - jrow; 364bcc973b7SBarry Smith sum1 = 0.0; 365bcc973b7SBarry Smith sum2 = 0.0; 366bcc973b7SBarry Smith sum3 = 0.0; 367bcc973b7SBarry Smith for (j=0; j<n; j++) { 368bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 369bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 370bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 371bcc973b7SBarry Smith jrow++; 372bcc973b7SBarry Smith } 373bcc973b7SBarry Smith y[3*i] += sum1; 374bcc973b7SBarry Smith y[3*i+1] += sum2; 375bcc973b7SBarry Smith y[3*i+2] += sum3; 376bcc973b7SBarry Smith } 377bcc973b7SBarry Smith 378b0a32e0cSBarry Smith PetscLogFlops(6*a->nz); 379*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 380*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 381bcc973b7SBarry Smith PetscFunctionReturn(0); 382bcc973b7SBarry Smith } 3834a2ae208SSatish Balay #undef __FUNCT__ 384b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 385bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 386bcc973b7SBarry Smith { 3874cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 388bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 38987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 390273d9f13SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 391bcc973b7SBarry Smith 392bcc973b7SBarry Smith PetscFunctionBegin; 393f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 394*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 395*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 396bcc973b7SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 397bcc973b7SBarry Smith for (i=0; i<m; i++) { 398bcc973b7SBarry Smith idx = a->j + a->i[i] + shift; 399bcc973b7SBarry Smith v = a->a + a->i[i] + shift; 400bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 401bcc973b7SBarry Smith alpha1 = x[3*i]; 402bcc973b7SBarry Smith alpha2 = x[3*i+1]; 403bcc973b7SBarry Smith alpha3 = x[3*i+2]; 404bcc973b7SBarry Smith while (n-->0) { 405bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 406bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 407bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 408bcc973b7SBarry Smith idx++; v++; 409bcc973b7SBarry Smith } 410bcc973b7SBarry Smith } 411b0a32e0cSBarry Smith PetscLogFlops(6*a->nz); 412*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 413*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 414bcc973b7SBarry Smith PetscFunctionReturn(0); 415bcc973b7SBarry Smith } 416bcc973b7SBarry Smith 417bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4184a2ae208SSatish Balay #undef __FUNCT__ 419b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 420bcc973b7SBarry Smith int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 421bcc973b7SBarry Smith { 4224cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 423bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 42487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 425273d9f13SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 426bcc973b7SBarry Smith int n,i,jrow,j; 427bcc973b7SBarry Smith 428bcc973b7SBarry Smith PetscFunctionBegin; 429*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 430*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 431bcc973b7SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 432bcc973b7SBarry Smith idx = a->j; 433bcc973b7SBarry Smith v = a->a; 434bcc973b7SBarry Smith ii = a->i; 435bcc973b7SBarry Smith 436bcc973b7SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 437bcc973b7SBarry Smith idx += shift; 438bcc973b7SBarry Smith for (i=0; i<m; i++) { 439bcc973b7SBarry Smith jrow = ii[i]; 440bcc973b7SBarry Smith n = ii[i+1] - jrow; 441bcc973b7SBarry Smith sum1 = 0.0; 442bcc973b7SBarry Smith sum2 = 0.0; 443bcc973b7SBarry Smith sum3 = 0.0; 444bcc973b7SBarry Smith sum4 = 0.0; 445bcc973b7SBarry Smith for (j=0; j<n; j++) { 446bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 447bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 448bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 449bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 450bcc973b7SBarry Smith jrow++; 451bcc973b7SBarry Smith } 452bcc973b7SBarry Smith y[4*i] = sum1; 453bcc973b7SBarry Smith y[4*i+1] = sum2; 454bcc973b7SBarry Smith y[4*i+2] = sum3; 455bcc973b7SBarry Smith y[4*i+3] = sum4; 456bcc973b7SBarry Smith } 457bcc973b7SBarry Smith 458b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*m); 459*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 460*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 461bcc973b7SBarry Smith PetscFunctionReturn(0); 462bcc973b7SBarry Smith } 463bcc973b7SBarry Smith 4644a2ae208SSatish Balay #undef __FUNCT__ 465b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 466bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 467bcc973b7SBarry Smith { 4684cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 469bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 47087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 471273d9f13SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 472bcc973b7SBarry Smith 473bcc973b7SBarry Smith PetscFunctionBegin; 474bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 475*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 476*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 477bcc973b7SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 478bcc973b7SBarry Smith for (i=0; i<m; i++) { 479bcc973b7SBarry Smith idx = a->j + a->i[i] + shift; 480bcc973b7SBarry Smith v = a->a + a->i[i] + shift; 481bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 482bcc973b7SBarry Smith alpha1 = x[4*i]; 483bcc973b7SBarry Smith alpha2 = x[4*i+1]; 484bcc973b7SBarry Smith alpha3 = x[4*i+2]; 485bcc973b7SBarry Smith alpha4 = x[4*i+3]; 486bcc973b7SBarry Smith while (n-->0) { 487bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 488bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 489bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 490bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 491bcc973b7SBarry Smith idx++; v++; 492bcc973b7SBarry Smith } 493bcc973b7SBarry Smith } 494b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*b->AIJ->n); 495*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 496*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 497bcc973b7SBarry Smith PetscFunctionReturn(0); 498bcc973b7SBarry Smith } 499bcc973b7SBarry Smith 5004a2ae208SSatish Balay #undef __FUNCT__ 501b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 502bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 503bcc973b7SBarry Smith { 5044cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 505f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 50687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 507273d9f13SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 508f9fae5adSBarry Smith int n,i,jrow,j; 509f9fae5adSBarry Smith 510f9fae5adSBarry Smith PetscFunctionBegin; 511f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 512*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 513*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 514f9fae5adSBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 515f9fae5adSBarry Smith idx = a->j; 516f9fae5adSBarry Smith v = a->a; 517f9fae5adSBarry Smith ii = a->i; 518f9fae5adSBarry Smith 519f9fae5adSBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 520f9fae5adSBarry Smith idx += shift; 521f9fae5adSBarry Smith for (i=0; i<m; i++) { 522f9fae5adSBarry Smith jrow = ii[i]; 523f9fae5adSBarry Smith n = ii[i+1] - jrow; 524f9fae5adSBarry Smith sum1 = 0.0; 525f9fae5adSBarry Smith sum2 = 0.0; 526f9fae5adSBarry Smith sum3 = 0.0; 527f9fae5adSBarry Smith sum4 = 0.0; 528f9fae5adSBarry Smith for (j=0; j<n; j++) { 529f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 530f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 531f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 532f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 533f9fae5adSBarry Smith jrow++; 534f9fae5adSBarry Smith } 535f9fae5adSBarry Smith y[4*i] += sum1; 536f9fae5adSBarry Smith y[4*i+1] += sum2; 537f9fae5adSBarry Smith y[4*i+2] += sum3; 538f9fae5adSBarry Smith y[4*i+3] += sum4; 539f9fae5adSBarry Smith } 540f9fae5adSBarry Smith 541b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*m); 542*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 543*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 544f9fae5adSBarry Smith PetscFunctionReturn(0); 545bcc973b7SBarry Smith } 5464a2ae208SSatish Balay #undef __FUNCT__ 547b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 548bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 549bcc973b7SBarry Smith { 5504cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 551f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 55287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 553273d9f13SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 554f9fae5adSBarry Smith 555f9fae5adSBarry Smith PetscFunctionBegin; 556f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 557*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 558*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 559f9fae5adSBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 560f9fae5adSBarry Smith for (i=0; i<m; i++) { 561f9fae5adSBarry Smith idx = a->j + a->i[i] + shift; 562f9fae5adSBarry Smith v = a->a + a->i[i] + shift; 563f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 564f9fae5adSBarry Smith alpha1 = x[4*i]; 565f9fae5adSBarry Smith alpha2 = x[4*i+1]; 566f9fae5adSBarry Smith alpha3 = x[4*i+2]; 567f9fae5adSBarry Smith alpha4 = x[4*i+3]; 568f9fae5adSBarry Smith while (n-->0) { 569f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 570f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 571f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 572f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 573f9fae5adSBarry Smith idx++; v++; 574f9fae5adSBarry Smith } 575f9fae5adSBarry Smith } 576b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*b->AIJ->n); 577*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 578*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 579f9fae5adSBarry Smith PetscFunctionReturn(0); 580f9fae5adSBarry Smith } 581f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 5826cd98798SBarry Smith 5834a2ae208SSatish Balay #undef __FUNCT__ 584b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 585f9fae5adSBarry Smith int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 586f9fae5adSBarry Smith { 5874cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 588f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 58987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 590273d9f13SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 591f9fae5adSBarry Smith int n,i,jrow,j; 592f9fae5adSBarry Smith 593f9fae5adSBarry Smith PetscFunctionBegin; 594*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 595*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 596f9fae5adSBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 597f9fae5adSBarry Smith idx = a->j; 598f9fae5adSBarry Smith v = a->a; 599f9fae5adSBarry Smith ii = a->i; 600f9fae5adSBarry Smith 601f9fae5adSBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 602f9fae5adSBarry Smith idx += shift; 603f9fae5adSBarry Smith for (i=0; i<m; i++) { 604f9fae5adSBarry Smith jrow = ii[i]; 605f9fae5adSBarry Smith n = ii[i+1] - jrow; 606f9fae5adSBarry Smith sum1 = 0.0; 607f9fae5adSBarry Smith sum2 = 0.0; 608f9fae5adSBarry Smith sum3 = 0.0; 609f9fae5adSBarry Smith sum4 = 0.0; 610f9fae5adSBarry Smith sum5 = 0.0; 611f9fae5adSBarry Smith for (j=0; j<n; j++) { 612f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 613f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 614f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 615f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 616f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 617f9fae5adSBarry Smith jrow++; 618f9fae5adSBarry Smith } 619f9fae5adSBarry Smith y[5*i] = sum1; 620f9fae5adSBarry Smith y[5*i+1] = sum2; 621f9fae5adSBarry Smith y[5*i+2] = sum3; 622f9fae5adSBarry Smith y[5*i+3] = sum4; 623f9fae5adSBarry Smith y[5*i+4] = sum5; 624f9fae5adSBarry Smith } 625f9fae5adSBarry Smith 626b0a32e0cSBarry Smith PetscLogFlops(10*a->nz - 5*m); 627*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 628*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 629f9fae5adSBarry Smith PetscFunctionReturn(0); 630f9fae5adSBarry Smith } 631f9fae5adSBarry Smith 6324a2ae208SSatish Balay #undef __FUNCT__ 633b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 634f9fae5adSBarry Smith int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 635f9fae5adSBarry Smith { 6364cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 637f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 63887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 639273d9f13SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 640f9fae5adSBarry Smith 641f9fae5adSBarry Smith PetscFunctionBegin; 642f9fae5adSBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 643*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 644*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 645f9fae5adSBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 646f9fae5adSBarry Smith for (i=0; i<m; i++) { 647f9fae5adSBarry Smith idx = a->j + a->i[i] + shift; 648f9fae5adSBarry Smith v = a->a + a->i[i] + shift; 649f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 650f9fae5adSBarry Smith alpha1 = x[5*i]; 651f9fae5adSBarry Smith alpha2 = x[5*i+1]; 652f9fae5adSBarry Smith alpha3 = x[5*i+2]; 653f9fae5adSBarry Smith alpha4 = x[5*i+3]; 654f9fae5adSBarry Smith alpha5 = x[5*i+4]; 655f9fae5adSBarry Smith while (n-->0) { 656f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 657f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 658f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 659f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 660f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 661f9fae5adSBarry Smith idx++; v++; 662f9fae5adSBarry Smith } 663f9fae5adSBarry Smith } 664b0a32e0cSBarry Smith PetscLogFlops(10*a->nz - 5*b->AIJ->n); 665*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 666*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 667f9fae5adSBarry Smith PetscFunctionReturn(0); 668f9fae5adSBarry Smith } 669f9fae5adSBarry Smith 6704a2ae208SSatish Balay #undef __FUNCT__ 671b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 672f9fae5adSBarry Smith int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 673f9fae5adSBarry Smith { 6744cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 675f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 67687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 677273d9f13SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 678f9fae5adSBarry Smith int n,i,jrow,j; 679f9fae5adSBarry Smith 680f9fae5adSBarry Smith PetscFunctionBegin; 681f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 682*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 683*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 684f9fae5adSBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 685f9fae5adSBarry Smith idx = a->j; 686f9fae5adSBarry Smith v = a->a; 687f9fae5adSBarry Smith ii = a->i; 688f9fae5adSBarry Smith 689f9fae5adSBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 690f9fae5adSBarry Smith idx += shift; 691f9fae5adSBarry Smith for (i=0; i<m; i++) { 692f9fae5adSBarry Smith jrow = ii[i]; 693f9fae5adSBarry Smith n = ii[i+1] - jrow; 694f9fae5adSBarry Smith sum1 = 0.0; 695f9fae5adSBarry Smith sum2 = 0.0; 696f9fae5adSBarry Smith sum3 = 0.0; 697f9fae5adSBarry Smith sum4 = 0.0; 698f9fae5adSBarry Smith sum5 = 0.0; 699f9fae5adSBarry Smith for (j=0; j<n; j++) { 700f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 701f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 702f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 703f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 704f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 705f9fae5adSBarry Smith jrow++; 706f9fae5adSBarry Smith } 707f9fae5adSBarry Smith y[5*i] += sum1; 708f9fae5adSBarry Smith y[5*i+1] += sum2; 709f9fae5adSBarry Smith y[5*i+2] += sum3; 710f9fae5adSBarry Smith y[5*i+3] += sum4; 711f9fae5adSBarry Smith y[5*i+4] += sum5; 712f9fae5adSBarry Smith } 713f9fae5adSBarry Smith 714b0a32e0cSBarry Smith PetscLogFlops(10*a->nz); 715*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 716*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 717f9fae5adSBarry Smith PetscFunctionReturn(0); 718f9fae5adSBarry Smith } 719f9fae5adSBarry Smith 7204a2ae208SSatish Balay #undef __FUNCT__ 721b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 722f9fae5adSBarry Smith int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 723f9fae5adSBarry Smith { 7244cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 725f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 72687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 727273d9f13SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 728f9fae5adSBarry Smith 729f9fae5adSBarry Smith PetscFunctionBegin; 730f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 731*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 732*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 733f9fae5adSBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 734f9fae5adSBarry Smith for (i=0; i<m; i++) { 735f9fae5adSBarry Smith idx = a->j + a->i[i] + shift; 736f9fae5adSBarry Smith v = a->a + a->i[i] + shift; 737f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 738f9fae5adSBarry Smith alpha1 = x[5*i]; 739f9fae5adSBarry Smith alpha2 = x[5*i+1]; 740f9fae5adSBarry Smith alpha3 = x[5*i+2]; 741f9fae5adSBarry Smith alpha4 = x[5*i+3]; 742f9fae5adSBarry Smith alpha5 = x[5*i+4]; 743f9fae5adSBarry Smith while (n-->0) { 744f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 745f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 746f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 747f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 748f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 749f9fae5adSBarry Smith idx++; v++; 750f9fae5adSBarry Smith } 751f9fae5adSBarry Smith } 752b0a32e0cSBarry Smith PetscLogFlops(10*a->nz); 753*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 754*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 755f9fae5adSBarry Smith PetscFunctionReturn(0); 756bcc973b7SBarry Smith } 757bcc973b7SBarry Smith 7586cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7594a2ae208SSatish Balay #undef __FUNCT__ 760b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 7616cd98798SBarry Smith int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 7626cd98798SBarry Smith { 7636cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 7646cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 76587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 7666cd98798SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 7676cd98798SBarry Smith int n,i,jrow,j; 7686cd98798SBarry Smith 7696cd98798SBarry Smith PetscFunctionBegin; 770*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 771*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 7726cd98798SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 7736cd98798SBarry Smith idx = a->j; 7746cd98798SBarry Smith v = a->a; 7756cd98798SBarry Smith ii = a->i; 7766cd98798SBarry Smith 7776cd98798SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 7786cd98798SBarry Smith idx += shift; 7796cd98798SBarry Smith for (i=0; i<m; i++) { 7806cd98798SBarry Smith jrow = ii[i]; 7816cd98798SBarry Smith n = ii[i+1] - jrow; 7826cd98798SBarry Smith sum1 = 0.0; 7836cd98798SBarry Smith sum2 = 0.0; 7846cd98798SBarry Smith sum3 = 0.0; 7856cd98798SBarry Smith sum4 = 0.0; 7866cd98798SBarry Smith sum5 = 0.0; 7876cd98798SBarry Smith sum6 = 0.0; 7886cd98798SBarry Smith for (j=0; j<n; j++) { 7896cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 7906cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 7916cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 7926cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 7936cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 7946cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 7956cd98798SBarry Smith jrow++; 7966cd98798SBarry Smith } 7976cd98798SBarry Smith y[6*i] = sum1; 7986cd98798SBarry Smith y[6*i+1] = sum2; 7996cd98798SBarry Smith y[6*i+2] = sum3; 8006cd98798SBarry Smith y[6*i+3] = sum4; 8016cd98798SBarry Smith y[6*i+4] = sum5; 8026cd98798SBarry Smith y[6*i+5] = sum6; 8036cd98798SBarry Smith } 8046cd98798SBarry Smith 805b0a32e0cSBarry Smith PetscLogFlops(12*a->nz - 6*m); 806*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 807*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 8086cd98798SBarry Smith PetscFunctionReturn(0); 8096cd98798SBarry Smith } 8106cd98798SBarry Smith 8114a2ae208SSatish Balay #undef __FUNCT__ 812b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 8136cd98798SBarry Smith int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8146cd98798SBarry Smith { 8156cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8166cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 81787828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 8186cd98798SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 8196cd98798SBarry Smith 8206cd98798SBarry Smith PetscFunctionBegin; 8216cd98798SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 822*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 823*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 8246cd98798SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 8256cd98798SBarry Smith for (i=0; i<m; i++) { 8266cd98798SBarry Smith idx = a->j + a->i[i] + shift; 8276cd98798SBarry Smith v = a->a + a->i[i] + shift; 8286cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8296cd98798SBarry Smith alpha1 = x[6*i]; 8306cd98798SBarry Smith alpha2 = x[6*i+1]; 8316cd98798SBarry Smith alpha3 = x[6*i+2]; 8326cd98798SBarry Smith alpha4 = x[6*i+3]; 8336cd98798SBarry Smith alpha5 = x[6*i+4]; 8346cd98798SBarry Smith alpha6 = x[6*i+5]; 8356cd98798SBarry Smith while (n-->0) { 8366cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8376cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8386cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8396cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8406cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8416cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8426cd98798SBarry Smith idx++; v++; 8436cd98798SBarry Smith } 8446cd98798SBarry Smith } 845b0a32e0cSBarry Smith PetscLogFlops(12*a->nz - 6*b->AIJ->n); 846*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 847*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 8486cd98798SBarry Smith PetscFunctionReturn(0); 8496cd98798SBarry Smith } 8506cd98798SBarry Smith 8514a2ae208SSatish Balay #undef __FUNCT__ 852b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 8536cd98798SBarry Smith int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8546cd98798SBarry Smith { 8556cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8566cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 85787828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 8586cd98798SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 8596cd98798SBarry Smith int n,i,jrow,j; 8606cd98798SBarry Smith 8616cd98798SBarry Smith PetscFunctionBegin; 8626cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 863*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 864*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 8656cd98798SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 8666cd98798SBarry Smith idx = a->j; 8676cd98798SBarry Smith v = a->a; 8686cd98798SBarry Smith ii = a->i; 8696cd98798SBarry Smith 8706cd98798SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 8716cd98798SBarry Smith idx += shift; 8726cd98798SBarry Smith for (i=0; i<m; i++) { 8736cd98798SBarry Smith jrow = ii[i]; 8746cd98798SBarry Smith n = ii[i+1] - jrow; 8756cd98798SBarry Smith sum1 = 0.0; 8766cd98798SBarry Smith sum2 = 0.0; 8776cd98798SBarry Smith sum3 = 0.0; 8786cd98798SBarry Smith sum4 = 0.0; 8796cd98798SBarry Smith sum5 = 0.0; 8806cd98798SBarry Smith sum6 = 0.0; 8816cd98798SBarry Smith for (j=0; j<n; j++) { 8826cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8836cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8846cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8856cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8866cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8876cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8886cd98798SBarry Smith jrow++; 8896cd98798SBarry Smith } 8906cd98798SBarry Smith y[6*i] += sum1; 8916cd98798SBarry Smith y[6*i+1] += sum2; 8926cd98798SBarry Smith y[6*i+2] += sum3; 8936cd98798SBarry Smith y[6*i+3] += sum4; 8946cd98798SBarry Smith y[6*i+4] += sum5; 8956cd98798SBarry Smith y[6*i+5] += sum6; 8966cd98798SBarry Smith } 8976cd98798SBarry Smith 898b0a32e0cSBarry Smith PetscLogFlops(12*a->nz); 899*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 900*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 9016cd98798SBarry Smith PetscFunctionReturn(0); 9026cd98798SBarry Smith } 9036cd98798SBarry Smith 9044a2ae208SSatish Balay #undef __FUNCT__ 905b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 9066cd98798SBarry Smith int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9076cd98798SBarry Smith { 9086cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9096cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 91087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 9116cd98798SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 9126cd98798SBarry Smith 9136cd98798SBarry Smith PetscFunctionBegin; 9146cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 915*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 916*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 9176cd98798SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 9186cd98798SBarry Smith for (i=0; i<m; i++) { 9196cd98798SBarry Smith idx = a->j + a->i[i] + shift; 9206cd98798SBarry Smith v = a->a + a->i[i] + shift; 9216cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9226cd98798SBarry Smith alpha1 = x[6*i]; 9236cd98798SBarry Smith alpha2 = x[6*i+1]; 9246cd98798SBarry Smith alpha3 = x[6*i+2]; 9256cd98798SBarry Smith alpha4 = x[6*i+3]; 9266cd98798SBarry Smith alpha5 = x[6*i+4]; 9276cd98798SBarry Smith alpha6 = x[6*i+5]; 9286cd98798SBarry Smith while (n-->0) { 9296cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9306cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9316cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9326cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9336cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9346cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9356cd98798SBarry Smith idx++; v++; 9366cd98798SBarry Smith } 9376cd98798SBarry Smith } 938b0a32e0cSBarry Smith PetscLogFlops(12*a->nz); 939*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 940*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 9416cd98798SBarry Smith PetscFunctionReturn(0); 9426cd98798SBarry Smith } 9436cd98798SBarry Smith 94466ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 94566ed3db0SBarry Smith #undef __FUNCT__ 94666ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 94766ed3db0SBarry Smith int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 94866ed3db0SBarry Smith { 94966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 95066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 95187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 95266ed3db0SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 95366ed3db0SBarry Smith int n,i,jrow,j; 95466ed3db0SBarry Smith 95566ed3db0SBarry Smith PetscFunctionBegin; 956*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 957*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 95866ed3db0SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 95966ed3db0SBarry Smith idx = a->j; 96066ed3db0SBarry Smith v = a->a; 96166ed3db0SBarry Smith ii = a->i; 96266ed3db0SBarry Smith 96366ed3db0SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 96466ed3db0SBarry Smith idx += shift; 96566ed3db0SBarry Smith for (i=0; i<m; i++) { 96666ed3db0SBarry Smith jrow = ii[i]; 96766ed3db0SBarry Smith n = ii[i+1] - jrow; 96866ed3db0SBarry Smith sum1 = 0.0; 96966ed3db0SBarry Smith sum2 = 0.0; 97066ed3db0SBarry Smith sum3 = 0.0; 97166ed3db0SBarry Smith sum4 = 0.0; 97266ed3db0SBarry Smith sum5 = 0.0; 97366ed3db0SBarry Smith sum6 = 0.0; 97466ed3db0SBarry Smith sum7 = 0.0; 97566ed3db0SBarry Smith sum8 = 0.0; 97666ed3db0SBarry Smith for (j=0; j<n; j++) { 97766ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 97866ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 97966ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 98066ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 98166ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 98266ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 98366ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 98466ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 98566ed3db0SBarry Smith jrow++; 98666ed3db0SBarry Smith } 98766ed3db0SBarry Smith y[8*i] = sum1; 98866ed3db0SBarry Smith y[8*i+1] = sum2; 98966ed3db0SBarry Smith y[8*i+2] = sum3; 99066ed3db0SBarry Smith y[8*i+3] = sum4; 99166ed3db0SBarry Smith y[8*i+4] = sum5; 99266ed3db0SBarry Smith y[8*i+5] = sum6; 99366ed3db0SBarry Smith y[8*i+6] = sum7; 99466ed3db0SBarry Smith y[8*i+7] = sum8; 99566ed3db0SBarry Smith } 99666ed3db0SBarry Smith 99766ed3db0SBarry Smith PetscLogFlops(16*a->nz - 8*m); 998*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 999*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 100066ed3db0SBarry Smith PetscFunctionReturn(0); 100166ed3db0SBarry Smith } 100266ed3db0SBarry Smith 100366ed3db0SBarry Smith #undef __FUNCT__ 100466ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 100566ed3db0SBarry Smith int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 100666ed3db0SBarry Smith { 100766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 100987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 101066ed3db0SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 101166ed3db0SBarry Smith 101266ed3db0SBarry Smith PetscFunctionBegin; 101366ed3db0SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1014*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1015*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 101666ed3db0SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 101766ed3db0SBarry Smith for (i=0; i<m; i++) { 101866ed3db0SBarry Smith idx = a->j + a->i[i] + shift; 101966ed3db0SBarry Smith v = a->a + a->i[i] + shift; 102066ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 102166ed3db0SBarry Smith alpha1 = x[8*i]; 102266ed3db0SBarry Smith alpha2 = x[8*i+1]; 102366ed3db0SBarry Smith alpha3 = x[8*i+2]; 102466ed3db0SBarry Smith alpha4 = x[8*i+3]; 102566ed3db0SBarry Smith alpha5 = x[8*i+4]; 102666ed3db0SBarry Smith alpha6 = x[8*i+5]; 102766ed3db0SBarry Smith alpha7 = x[8*i+6]; 102866ed3db0SBarry Smith alpha8 = x[8*i+7]; 102966ed3db0SBarry Smith while (n-->0) { 103066ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 103166ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 103266ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 103366ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 103466ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 103566ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 103666ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 103766ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 103866ed3db0SBarry Smith idx++; v++; 103966ed3db0SBarry Smith } 104066ed3db0SBarry Smith } 104166ed3db0SBarry Smith PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1042*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1043*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 104466ed3db0SBarry Smith PetscFunctionReturn(0); 104566ed3db0SBarry Smith } 104666ed3db0SBarry Smith 104766ed3db0SBarry Smith #undef __FUNCT__ 104866ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 104966ed3db0SBarry Smith int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 105066ed3db0SBarry Smith { 105166ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 105266ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 105387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 105466ed3db0SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 105566ed3db0SBarry Smith int n,i,jrow,j; 105666ed3db0SBarry Smith 105766ed3db0SBarry Smith PetscFunctionBegin; 105866ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1059*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1060*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 106166ed3db0SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 106266ed3db0SBarry Smith idx = a->j; 106366ed3db0SBarry Smith v = a->a; 106466ed3db0SBarry Smith ii = a->i; 106566ed3db0SBarry Smith 106666ed3db0SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 106766ed3db0SBarry Smith idx += shift; 106866ed3db0SBarry Smith for (i=0; i<m; i++) { 106966ed3db0SBarry Smith jrow = ii[i]; 107066ed3db0SBarry Smith n = ii[i+1] - jrow; 107166ed3db0SBarry Smith sum1 = 0.0; 107266ed3db0SBarry Smith sum2 = 0.0; 107366ed3db0SBarry Smith sum3 = 0.0; 107466ed3db0SBarry Smith sum4 = 0.0; 107566ed3db0SBarry Smith sum5 = 0.0; 107666ed3db0SBarry Smith sum6 = 0.0; 107766ed3db0SBarry Smith sum7 = 0.0; 107866ed3db0SBarry Smith sum8 = 0.0; 107966ed3db0SBarry Smith for (j=0; j<n; j++) { 108066ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 108166ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 108266ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 108366ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 108466ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 108566ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 108666ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 108766ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 108866ed3db0SBarry Smith jrow++; 108966ed3db0SBarry Smith } 109066ed3db0SBarry Smith y[8*i] += sum1; 109166ed3db0SBarry Smith y[8*i+1] += sum2; 109266ed3db0SBarry Smith y[8*i+2] += sum3; 109366ed3db0SBarry Smith y[8*i+3] += sum4; 109466ed3db0SBarry Smith y[8*i+4] += sum5; 109566ed3db0SBarry Smith y[8*i+5] += sum6; 109666ed3db0SBarry Smith y[8*i+6] += sum7; 109766ed3db0SBarry Smith y[8*i+7] += sum8; 109866ed3db0SBarry Smith } 109966ed3db0SBarry Smith 110066ed3db0SBarry Smith PetscLogFlops(16*a->nz); 1101*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1102*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 110366ed3db0SBarry Smith PetscFunctionReturn(0); 110466ed3db0SBarry Smith } 110566ed3db0SBarry Smith 110666ed3db0SBarry Smith #undef __FUNCT__ 110766ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 110866ed3db0SBarry Smith int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 110966ed3db0SBarry Smith { 111066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 111166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 111287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 111366ed3db0SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 111466ed3db0SBarry Smith 111566ed3db0SBarry Smith PetscFunctionBegin; 111666ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1117*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1118*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 111966ed3db0SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 112066ed3db0SBarry Smith for (i=0; i<m; i++) { 112166ed3db0SBarry Smith idx = a->j + a->i[i] + shift; 112266ed3db0SBarry Smith v = a->a + a->i[i] + shift; 112366ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 112466ed3db0SBarry Smith alpha1 = x[8*i]; 112566ed3db0SBarry Smith alpha2 = x[8*i+1]; 112666ed3db0SBarry Smith alpha3 = x[8*i+2]; 112766ed3db0SBarry Smith alpha4 = x[8*i+3]; 112866ed3db0SBarry Smith alpha5 = x[8*i+4]; 112966ed3db0SBarry Smith alpha6 = x[8*i+5]; 113066ed3db0SBarry Smith alpha7 = x[8*i+6]; 113166ed3db0SBarry Smith alpha8 = x[8*i+7]; 113266ed3db0SBarry Smith while (n-->0) { 113366ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 113466ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 113566ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 113666ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 113766ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 113866ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 113966ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 114066ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 114166ed3db0SBarry Smith idx++; v++; 114266ed3db0SBarry Smith } 114366ed3db0SBarry Smith } 114466ed3db0SBarry Smith PetscLogFlops(16*a->nz); 1145*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1146*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1147*2f7816d4SBarry Smith PetscFunctionReturn(0); 1148*2f7816d4SBarry Smith } 1149*2f7816d4SBarry Smith 1150*2f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 1151*2f7816d4SBarry Smith #undef __FUNCT__ 1152*2f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 1153*2f7816d4SBarry Smith int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1154*2f7816d4SBarry Smith { 1155*2f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1156*2f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1157*2f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1158*2f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1159*2f7816d4SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 1160*2f7816d4SBarry Smith int n,i,jrow,j; 1161*2f7816d4SBarry Smith 1162*2f7816d4SBarry Smith PetscFunctionBegin; 1163*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1164*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1165*2f7816d4SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 1166*2f7816d4SBarry Smith idx = a->j; 1167*2f7816d4SBarry Smith v = a->a; 1168*2f7816d4SBarry Smith ii = a->i; 1169*2f7816d4SBarry Smith 1170*2f7816d4SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 1171*2f7816d4SBarry Smith idx += shift; 1172*2f7816d4SBarry Smith for (i=0; i<m; i++) { 1173*2f7816d4SBarry Smith jrow = ii[i]; 1174*2f7816d4SBarry Smith n = ii[i+1] - jrow; 1175*2f7816d4SBarry Smith sum1 = 0.0; 1176*2f7816d4SBarry Smith sum2 = 0.0; 1177*2f7816d4SBarry Smith sum3 = 0.0; 1178*2f7816d4SBarry Smith sum4 = 0.0; 1179*2f7816d4SBarry Smith sum5 = 0.0; 1180*2f7816d4SBarry Smith sum6 = 0.0; 1181*2f7816d4SBarry Smith sum7 = 0.0; 1182*2f7816d4SBarry Smith sum8 = 0.0; 1183*2f7816d4SBarry Smith sum9 = 0.0; 1184*2f7816d4SBarry Smith sum10 = 0.0; 1185*2f7816d4SBarry Smith sum11 = 0.0; 1186*2f7816d4SBarry Smith sum12 = 0.0; 1187*2f7816d4SBarry Smith sum13 = 0.0; 1188*2f7816d4SBarry Smith sum14 = 0.0; 1189*2f7816d4SBarry Smith sum15 = 0.0; 1190*2f7816d4SBarry Smith sum16 = 0.0; 1191*2f7816d4SBarry Smith for (j=0; j<n; j++) { 1192*2f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 1193*2f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 1194*2f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 1195*2f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 1196*2f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 1197*2f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 1198*2f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 1199*2f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 1200*2f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 1201*2f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 1202*2f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 1203*2f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 1204*2f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 1205*2f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 1206*2f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 1207*2f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 1208*2f7816d4SBarry Smith jrow++; 1209*2f7816d4SBarry Smith } 1210*2f7816d4SBarry Smith y[16*i] = sum1; 1211*2f7816d4SBarry Smith y[16*i+1] = sum2; 1212*2f7816d4SBarry Smith y[16*i+2] = sum3; 1213*2f7816d4SBarry Smith y[16*i+3] = sum4; 1214*2f7816d4SBarry Smith y[16*i+4] = sum5; 1215*2f7816d4SBarry Smith y[16*i+5] = sum6; 1216*2f7816d4SBarry Smith y[16*i+6] = sum7; 1217*2f7816d4SBarry Smith y[16*i+7] = sum8; 1218*2f7816d4SBarry Smith y[16*i+8] = sum9; 1219*2f7816d4SBarry Smith y[16*i+9] = sum10; 1220*2f7816d4SBarry Smith y[16*i+10] = sum11; 1221*2f7816d4SBarry Smith y[16*i+11] = sum12; 1222*2f7816d4SBarry Smith y[16*i+12] = sum13; 1223*2f7816d4SBarry Smith y[16*i+13] = sum14; 1224*2f7816d4SBarry Smith y[16*i+14] = sum15; 1225*2f7816d4SBarry Smith y[16*i+15] = sum16; 1226*2f7816d4SBarry Smith } 1227*2f7816d4SBarry Smith 1228*2f7816d4SBarry Smith PetscLogFlops(32*a->nz - 16*m); 1229*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1230*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1231*2f7816d4SBarry Smith PetscFunctionReturn(0); 1232*2f7816d4SBarry Smith } 1233*2f7816d4SBarry Smith 1234*2f7816d4SBarry Smith #undef __FUNCT__ 1235*2f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1236*2f7816d4SBarry Smith int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1237*2f7816d4SBarry Smith { 1238*2f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1239*2f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1240*2f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1241*2f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1242*2f7816d4SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1243*2f7816d4SBarry Smith 1244*2f7816d4SBarry Smith PetscFunctionBegin; 1245*2f7816d4SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1246*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1247*2f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1248*2f7816d4SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 1249*2f7816d4SBarry Smith for (i=0; i<m; i++) { 1250*2f7816d4SBarry Smith idx = a->j + a->i[i] + shift; 1251*2f7816d4SBarry Smith v = a->a + a->i[i] + shift; 1252*2f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 1253*2f7816d4SBarry Smith alpha1 = x[16*i]; 1254*2f7816d4SBarry Smith alpha2 = x[16*i+1]; 1255*2f7816d4SBarry Smith alpha3 = x[16*i+2]; 1256*2f7816d4SBarry Smith alpha4 = x[16*i+3]; 1257*2f7816d4SBarry Smith alpha5 = x[16*i+4]; 1258*2f7816d4SBarry Smith alpha6 = x[16*i+5]; 1259*2f7816d4SBarry Smith alpha7 = x[16*i+6]; 1260*2f7816d4SBarry Smith alpha8 = x[16*i+7]; 1261*2f7816d4SBarry Smith alpha9 = x[16*i+8]; 1262*2f7816d4SBarry Smith alpha10 = x[16*i+9]; 1263*2f7816d4SBarry Smith alpha11 = x[16*i+10]; 1264*2f7816d4SBarry Smith alpha12 = x[16*i+11]; 1265*2f7816d4SBarry Smith alpha13 = x[16*i+12]; 1266*2f7816d4SBarry Smith alpha14 = x[16*i+13]; 1267*2f7816d4SBarry Smith alpha15 = x[16*i+14]; 1268*2f7816d4SBarry Smith alpha16 = x[16*i+15]; 1269*2f7816d4SBarry Smith while (n-->0) { 1270*2f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 1271*2f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 1272*2f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 1273*2f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 1274*2f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 1275*2f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 1276*2f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 1277*2f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 1278*2f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 1279*2f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 1280*2f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 1281*2f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 1282*2f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 1283*2f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 1284*2f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 1285*2f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 1286*2f7816d4SBarry Smith idx++; v++; 1287*2f7816d4SBarry Smith } 1288*2f7816d4SBarry Smith } 1289*2f7816d4SBarry Smith PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1290*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1291*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1292*2f7816d4SBarry Smith PetscFunctionReturn(0); 1293*2f7816d4SBarry Smith } 1294*2f7816d4SBarry Smith 1295*2f7816d4SBarry Smith #undef __FUNCT__ 1296*2f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1297*2f7816d4SBarry Smith int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1298*2f7816d4SBarry Smith { 1299*2f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1300*2f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1301*2f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1302*2f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1303*2f7816d4SBarry Smith int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 1304*2f7816d4SBarry Smith int n,i,jrow,j; 1305*2f7816d4SBarry Smith 1306*2f7816d4SBarry Smith PetscFunctionBegin; 1307*2f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1308*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1309*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1310*2f7816d4SBarry Smith x = x + shift; /* shift for Fortran start by 1 indexing */ 1311*2f7816d4SBarry Smith idx = a->j; 1312*2f7816d4SBarry Smith v = a->a; 1313*2f7816d4SBarry Smith ii = a->i; 1314*2f7816d4SBarry Smith 1315*2f7816d4SBarry Smith v += shift; /* shift for Fortran start by 1 indexing */ 1316*2f7816d4SBarry Smith idx += shift; 1317*2f7816d4SBarry Smith for (i=0; i<m; i++) { 1318*2f7816d4SBarry Smith jrow = ii[i]; 1319*2f7816d4SBarry Smith n = ii[i+1] - jrow; 1320*2f7816d4SBarry Smith sum1 = 0.0; 1321*2f7816d4SBarry Smith sum2 = 0.0; 1322*2f7816d4SBarry Smith sum3 = 0.0; 1323*2f7816d4SBarry Smith sum4 = 0.0; 1324*2f7816d4SBarry Smith sum5 = 0.0; 1325*2f7816d4SBarry Smith sum6 = 0.0; 1326*2f7816d4SBarry Smith sum7 = 0.0; 1327*2f7816d4SBarry Smith sum8 = 0.0; 1328*2f7816d4SBarry Smith sum9 = 0.0; 1329*2f7816d4SBarry Smith sum10 = 0.0; 1330*2f7816d4SBarry Smith sum11 = 0.0; 1331*2f7816d4SBarry Smith sum12 = 0.0; 1332*2f7816d4SBarry Smith sum13 = 0.0; 1333*2f7816d4SBarry Smith sum14 = 0.0; 1334*2f7816d4SBarry Smith sum15 = 0.0; 1335*2f7816d4SBarry Smith sum16 = 0.0; 1336*2f7816d4SBarry Smith for (j=0; j<n; j++) { 1337*2f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 1338*2f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 1339*2f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 1340*2f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 1341*2f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 1342*2f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 1343*2f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 1344*2f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 1345*2f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 1346*2f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 1347*2f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 1348*2f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 1349*2f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 1350*2f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 1351*2f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 1352*2f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 1353*2f7816d4SBarry Smith jrow++; 1354*2f7816d4SBarry Smith } 1355*2f7816d4SBarry Smith y[16*i] += sum1; 1356*2f7816d4SBarry Smith y[16*i+1] += sum2; 1357*2f7816d4SBarry Smith y[16*i+2] += sum3; 1358*2f7816d4SBarry Smith y[16*i+3] += sum4; 1359*2f7816d4SBarry Smith y[16*i+4] += sum5; 1360*2f7816d4SBarry Smith y[16*i+5] += sum6; 1361*2f7816d4SBarry Smith y[16*i+6] += sum7; 1362*2f7816d4SBarry Smith y[16*i+7] += sum8; 1363*2f7816d4SBarry Smith y[16*i+8] += sum9; 1364*2f7816d4SBarry Smith y[16*i+9] += sum10; 1365*2f7816d4SBarry Smith y[16*i+10] += sum11; 1366*2f7816d4SBarry Smith y[16*i+11] += sum12; 1367*2f7816d4SBarry Smith y[16*i+12] += sum13; 1368*2f7816d4SBarry Smith y[16*i+13] += sum14; 1369*2f7816d4SBarry Smith y[16*i+14] += sum15; 1370*2f7816d4SBarry Smith y[16*i+15] += sum16; 1371*2f7816d4SBarry Smith } 1372*2f7816d4SBarry Smith 1373*2f7816d4SBarry Smith PetscLogFlops(32*a->nz); 1374*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1375*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1376*2f7816d4SBarry Smith PetscFunctionReturn(0); 1377*2f7816d4SBarry Smith } 1378*2f7816d4SBarry Smith 1379*2f7816d4SBarry Smith #undef __FUNCT__ 1380*2f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1381*2f7816d4SBarry Smith int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1382*2f7816d4SBarry Smith { 1383*2f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1384*2f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1385*2f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1386*2f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1387*2f7816d4SBarry Smith int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1388*2f7816d4SBarry Smith 1389*2f7816d4SBarry Smith PetscFunctionBegin; 1390*2f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1391*2f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1392*2f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1393*2f7816d4SBarry Smith y = y + shift; /* shift for Fortran start by 1 indexing */ 1394*2f7816d4SBarry Smith for (i=0; i<m; i++) { 1395*2f7816d4SBarry Smith idx = a->j + a->i[i] + shift; 1396*2f7816d4SBarry Smith v = a->a + a->i[i] + shift; 1397*2f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 1398*2f7816d4SBarry Smith alpha1 = x[16*i]; 1399*2f7816d4SBarry Smith alpha2 = x[16*i+1]; 1400*2f7816d4SBarry Smith alpha3 = x[16*i+2]; 1401*2f7816d4SBarry Smith alpha4 = x[16*i+3]; 1402*2f7816d4SBarry Smith alpha5 = x[16*i+4]; 1403*2f7816d4SBarry Smith alpha6 = x[16*i+5]; 1404*2f7816d4SBarry Smith alpha7 = x[16*i+6]; 1405*2f7816d4SBarry Smith alpha8 = x[16*i+7]; 1406*2f7816d4SBarry Smith alpha9 = x[16*i+8]; 1407*2f7816d4SBarry Smith alpha10 = x[16*i+9]; 1408*2f7816d4SBarry Smith alpha11 = x[16*i+10]; 1409*2f7816d4SBarry Smith alpha12 = x[16*i+11]; 1410*2f7816d4SBarry Smith alpha13 = x[16*i+12]; 1411*2f7816d4SBarry Smith alpha14 = x[16*i+13]; 1412*2f7816d4SBarry Smith alpha15 = x[16*i+14]; 1413*2f7816d4SBarry Smith alpha16 = x[16*i+15]; 1414*2f7816d4SBarry Smith while (n-->0) { 1415*2f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 1416*2f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 1417*2f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 1418*2f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 1419*2f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 1420*2f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 1421*2f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 1422*2f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 1423*2f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 1424*2f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 1425*2f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 1426*2f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 1427*2f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 1428*2f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 1429*2f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 1430*2f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 1431*2f7816d4SBarry Smith idx++; v++; 1432*2f7816d4SBarry Smith } 1433*2f7816d4SBarry Smith } 1434*2f7816d4SBarry Smith PetscLogFlops(32*a->nz); 1435*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1436*2f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 143766ed3db0SBarry Smith PetscFunctionReturn(0); 143866ed3db0SBarry Smith } 143966ed3db0SBarry Smith 1440f4a53059SBarry Smith /*===================================================================================*/ 14414a2ae208SSatish Balay #undef __FUNCT__ 14424a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1443f4a53059SBarry Smith int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1444f4a53059SBarry Smith { 14454cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1446f4a53059SBarry Smith int ierr; 1447f4a53059SBarry Smith PetscFunctionBegin; 1448f4a53059SBarry Smith 1449f4a53059SBarry Smith /* start the scatter */ 1450f4a53059SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 14514cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1452f4a53059SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 14534cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1454f4a53059SBarry Smith PetscFunctionReturn(0); 1455f4a53059SBarry Smith } 1456f4a53059SBarry Smith 14574a2ae208SSatish Balay #undef __FUNCT__ 14584a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 14594cb9d9b8SBarry Smith int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 14604cb9d9b8SBarry Smith { 14614cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 14624cb9d9b8SBarry Smith int ierr; 14634cb9d9b8SBarry Smith PetscFunctionBegin; 14644cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 14654cb9d9b8SBarry Smith ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 14664cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 14674cb9d9b8SBarry Smith ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 14684cb9d9b8SBarry Smith PetscFunctionReturn(0); 14694cb9d9b8SBarry Smith } 14704cb9d9b8SBarry Smith 14714a2ae208SSatish Balay #undef __FUNCT__ 14724a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1473d72c5c08SBarry Smith int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 14744cb9d9b8SBarry Smith { 14754cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 14764cb9d9b8SBarry Smith int ierr; 14774cb9d9b8SBarry Smith PetscFunctionBegin; 14784cb9d9b8SBarry Smith 14794cb9d9b8SBarry Smith /* start the scatter */ 14804cb9d9b8SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1481d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 14824cb9d9b8SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1483d72c5c08SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 14844cb9d9b8SBarry Smith PetscFunctionReturn(0); 14854cb9d9b8SBarry Smith } 14864cb9d9b8SBarry Smith 14874a2ae208SSatish Balay #undef __FUNCT__ 14884a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1489d72c5c08SBarry Smith int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 14904cb9d9b8SBarry Smith { 14914cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 14924cb9d9b8SBarry Smith int ierr; 14934cb9d9b8SBarry Smith PetscFunctionBegin; 14944cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1495d72c5c08SBarry Smith ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1496d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1497d72c5c08SBarry Smith ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 14984cb9d9b8SBarry Smith PetscFunctionReturn(0); 14994cb9d9b8SBarry Smith } 15004cb9d9b8SBarry Smith 1501bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 15024a2ae208SSatish Balay #undef __FUNCT__ 15034a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 1504cd3bbe55SBarry Smith int MatCreateMAIJ(Mat A,int dof,Mat *maij) 150582b1193eSBarry Smith { 1506f4a53059SBarry Smith int ierr,size,n; 15074cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 150882b1193eSBarry Smith Mat B; 150982b1193eSBarry Smith 151082b1193eSBarry Smith PetscFunctionBegin; 1511d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1512d72c5c08SBarry Smith 1513d72c5c08SBarry Smith if (dof == 1) { 1514d72c5c08SBarry Smith *maij = A; 1515d72c5c08SBarry Smith } else { 151683903688SBarry Smith ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1517cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1518d72c5c08SBarry Smith 1519f4a53059SBarry Smith ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1520f4a53059SBarry Smith if (size == 1) { 1521b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 15224cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 1523b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 1524b9b97703SBarry Smith b->dof = dof; 15254cb9d9b8SBarry Smith b->AIJ = A; 1526d72c5c08SBarry Smith if (dof == 2) { 1527cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1528cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1529cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1530cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1531bcc973b7SBarry Smith } else if (dof == 3) { 1532bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1533bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1534bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1535bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1536bcc973b7SBarry Smith } else if (dof == 4) { 1537bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1538bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1539bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1540bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1541f9fae5adSBarry Smith } else if (dof == 5) { 1542f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1543f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1544f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1545f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 15466cd98798SBarry Smith } else if (dof == 6) { 15476cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 15486cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 15496cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 15506cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 155166ed3db0SBarry Smith } else if (dof == 8) { 155266ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 155366ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 155466ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 155566ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1556*2f7816d4SBarry Smith } else if (dof == 16) { 1557*2f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 1558*2f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1559*2f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1560*2f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 156182b1193eSBarry Smith } else { 156266ed3db0SBarry Smith SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 156382b1193eSBarry Smith } 1564f4a53059SBarry Smith } else { 1565f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1566f4a53059SBarry Smith IS from,to; 1567f4a53059SBarry Smith Vec gvec; 1568f4a53059SBarry Smith int *garray,i; 1569f4a53059SBarry Smith 1570b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1571d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 1572b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 1573b9b97703SBarry Smith b->dof = dof; 1574b9b97703SBarry Smith b->A = A; 15754cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 15764cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 15774cb9d9b8SBarry Smith 1578f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1579f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1580f4a53059SBarry Smith 1581f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 1582b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1583f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1584f4a53059SBarry Smith ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1585f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 1586f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1587f4a53059SBarry Smith 1588f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 1589f4a53059SBarry Smith ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1590f4a53059SBarry Smith 1591f4a53059SBarry Smith /* generate the scatter context */ 1592f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1593f4a53059SBarry Smith 1594f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 1595f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 1596f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 1597f4a53059SBarry Smith 1598f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 15994cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 16004cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 16014cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1602f4a53059SBarry Smith } 1603cd3bbe55SBarry Smith *maij = B; 1604d72c5c08SBarry Smith } 160582b1193eSBarry Smith PetscFunctionReturn(0); 160682b1193eSBarry Smith } 160782b1193eSBarry Smith 160882b1193eSBarry Smith 160982b1193eSBarry Smith 161082b1193eSBarry Smith 161182b1193eSBarry Smith 161282b1193eSBarry Smith 161382b1193eSBarry Smith 161482b1193eSBarry Smith 161582b1193eSBarry Smith 161682b1193eSBarry Smith 161782b1193eSBarry Smith 161882b1193eSBarry Smith 1619