173f4d377SMatthew Knepley /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/ 282b1193eSBarry Smith /* 3cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 4cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 5cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 6cd3bbe55SBarry Smith independently. 7cd3bbe55SBarry Smith 8cd3bbe55SBarry Smith We provide: 9cd3bbe55SBarry Smith MatMult() 10cd3bbe55SBarry Smith MatMultTranspose() 11cd3bbe55SBarry Smith MatMultTransposeAdd() 12cd3bbe55SBarry Smith MatMultAdd() 13cd3bbe55SBarry Smith and 14cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 15f4a53059SBarry Smith 16f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1782b1193eSBarry Smith */ 1882b1193eSBarry Smith 19be911618SKris Buschelman #include "src/mat/impls/maij/maij.h" 202f7816d4SBarry Smith #include "src/vec/vecimpl.h" 2182b1193eSBarry Smith 224a2ae208SSatish Balay #undef __FUNCT__ 234a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 24b9b97703SBarry Smith int MatMAIJGetAIJ(Mat A,Mat *B) 25b9b97703SBarry Smith { 26b9b97703SBarry Smith int ierr; 27b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 28b9b97703SBarry Smith 29b9b97703SBarry Smith PetscFunctionBegin; 30b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 31b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 32b9b97703SBarry Smith if (ismpimaij) { 33b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 34b9b97703SBarry Smith 35b9b97703SBarry Smith *B = b->A; 36b9b97703SBarry Smith } else if (isseqmaij) { 37b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 38b9b97703SBarry Smith 39b9b97703SBarry Smith *B = b->AIJ; 40b9b97703SBarry Smith } else { 41b9b97703SBarry Smith *B = A; 42b9b97703SBarry Smith } 43b9b97703SBarry Smith PetscFunctionReturn(0); 44b9b97703SBarry Smith } 45b9b97703SBarry Smith 464a2ae208SSatish Balay #undef __FUNCT__ 474a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 48b9b97703SBarry Smith int MatMAIJRedimension(Mat A,int dof,Mat *B) 49b9b97703SBarry Smith { 50b9b97703SBarry Smith int ierr; 51b9b97703SBarry Smith Mat Aij; 52b9b97703SBarry Smith 53b9b97703SBarry Smith PetscFunctionBegin; 54b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 55b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 56b9b97703SBarry Smith PetscFunctionReturn(0); 57b9b97703SBarry Smith } 58b9b97703SBarry Smith 594a2ae208SSatish Balay #undef __FUNCT__ 604a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 614cb9d9b8SBarry Smith int MatDestroy_SeqMAIJ(Mat A) 6282b1193eSBarry Smith { 6382b1193eSBarry Smith int ierr; 644cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6582b1193eSBarry Smith 6682b1193eSBarry Smith PetscFunctionBegin; 67cd3bbe55SBarry Smith if (b->AIJ) { 68cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 6982b1193eSBarry Smith } 704cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 714cb9d9b8SBarry Smith PetscFunctionReturn(0); 724cb9d9b8SBarry Smith } 734cb9d9b8SBarry Smith 744a2ae208SSatish Balay #undef __FUNCT__ 754a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 764cb9d9b8SBarry Smith int MatDestroy_MPIMAIJ(Mat A) 774cb9d9b8SBarry Smith { 784cb9d9b8SBarry Smith int ierr; 794cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 804cb9d9b8SBarry Smith 814cb9d9b8SBarry Smith PetscFunctionBegin; 824cb9d9b8SBarry Smith if (b->AIJ) { 834cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 844cb9d9b8SBarry Smith } 85f4a53059SBarry Smith if (b->OAIJ) { 86f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 87f4a53059SBarry Smith } 88b9b97703SBarry Smith if (b->A) { 89b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 90b9b97703SBarry Smith } 91f4a53059SBarry Smith if (b->ctx) { 92f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 93f4a53059SBarry Smith } 94f4a53059SBarry Smith if (b->w) { 95f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 96f4a53059SBarry Smith } 97cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 98b9b97703SBarry Smith PetscFunctionReturn(0); 99b9b97703SBarry Smith } 100b9b97703SBarry Smith 1010bad9183SKris Buschelman /*MC 102fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1030bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1040bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1050bad9183SKris Buschelman 1060bad9183SKris Buschelman Operations provided: 1070bad9183SKris Buschelman . MatMult 1080bad9183SKris Buschelman . MatMultTranspose 1090bad9183SKris Buschelman . MatMultAdd 1100bad9183SKris Buschelman . MatMultTransposeAdd 1110bad9183SKris Buschelman 1120bad9183SKris Buschelman Level: advanced 1130bad9183SKris Buschelman 1140bad9183SKris Buschelman .seealso: MatCreateSeqDense 1150bad9183SKris Buschelman M*/ 1160bad9183SKris Buschelman 11782b1193eSBarry Smith EXTERN_C_BEGIN 1184a2ae208SSatish Balay #undef __FUNCT__ 1194a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 120f4a53059SBarry Smith int MatCreate_MAIJ(Mat A) 12182b1193eSBarry Smith { 122cd3bbe55SBarry Smith int ierr; 1234cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 12482b1193eSBarry Smith 12582b1193eSBarry Smith PetscFunctionBegin; 126b0a32e0cSBarry Smith ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 127b0a32e0cSBarry Smith A->data = (void*)b; 1284cb9d9b8SBarry Smith ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 129cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 130cd3bbe55SBarry Smith A->factor = 0; 131cd3bbe55SBarry Smith A->mapping = 0; 132f4a53059SBarry Smith 133cd3bbe55SBarry Smith b->AIJ = 0; 134cd3bbe55SBarry Smith b->dof = 0; 135f4a53059SBarry Smith b->OAIJ = 0; 136f4a53059SBarry Smith b->ctx = 0; 137f4a53059SBarry Smith b->w = 0; 13882b1193eSBarry Smith PetscFunctionReturn(0); 13982b1193eSBarry Smith } 14082b1193eSBarry Smith EXTERN_C_END 14182b1193eSBarry Smith 142cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1434a2ae208SSatish Balay #undef __FUNCT__ 144b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 145cd3bbe55SBarry Smith int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 14682b1193eSBarry Smith { 1474cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 148bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 150bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 151bcc973b7SBarry Smith int n,i,jrow,j; 15282b1193eSBarry Smith 153bcc973b7SBarry Smith PetscFunctionBegin; 1542f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1552f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 156bcc973b7SBarry Smith idx = a->j; 157bcc973b7SBarry Smith v = a->a; 158bcc973b7SBarry Smith ii = a->i; 159bcc973b7SBarry Smith 160bcc973b7SBarry Smith for (i=0; i<m; i++) { 161bcc973b7SBarry Smith jrow = ii[i]; 162bcc973b7SBarry Smith n = ii[i+1] - jrow; 163bcc973b7SBarry Smith sum1 = 0.0; 164bcc973b7SBarry Smith sum2 = 0.0; 165bcc973b7SBarry Smith for (j=0; j<n; j++) { 166bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 167bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 168bcc973b7SBarry Smith jrow++; 169bcc973b7SBarry Smith } 170bcc973b7SBarry Smith y[2*i] = sum1; 171bcc973b7SBarry Smith y[2*i+1] = sum2; 172bcc973b7SBarry Smith } 173bcc973b7SBarry Smith 174b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*m); 1752f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1762f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 17782b1193eSBarry Smith PetscFunctionReturn(0); 17882b1193eSBarry Smith } 179bcc973b7SBarry Smith 1804a2ae208SSatish Balay #undef __FUNCT__ 181b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 182cd3bbe55SBarry Smith int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 18382b1193eSBarry Smith { 1844cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 185bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 187bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 18882b1193eSBarry Smith 189bcc973b7SBarry Smith PetscFunctionBegin; 190bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1912f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1922f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 193bfec09a0SHong Zhang 194bcc973b7SBarry Smith for (i=0; i<m; i++) { 195bfec09a0SHong Zhang idx = a->j + a->i[i] ; 196bfec09a0SHong Zhang v = a->a + a->i[i] ; 197bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 198bcc973b7SBarry Smith alpha1 = x[2*i]; 199bcc973b7SBarry Smith alpha2 = x[2*i+1]; 200bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 201bcc973b7SBarry Smith } 202b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*b->AIJ->n); 2032f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 2042f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 20582b1193eSBarry Smith PetscFunctionReturn(0); 20682b1193eSBarry Smith } 207bcc973b7SBarry Smith 2084a2ae208SSatish Balay #undef __FUNCT__ 209b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 210cd3bbe55SBarry Smith int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 21182b1193eSBarry Smith { 2124cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 213bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 21487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 215bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 216bcc973b7SBarry Smith int n,i,jrow,j; 21782b1193eSBarry Smith 218bcc973b7SBarry Smith PetscFunctionBegin; 219f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2202f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 2212f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 222bcc973b7SBarry Smith idx = a->j; 223bcc973b7SBarry Smith v = a->a; 224bcc973b7SBarry Smith ii = a->i; 225bcc973b7SBarry Smith 226bcc973b7SBarry Smith for (i=0; i<m; i++) { 227bcc973b7SBarry Smith jrow = ii[i]; 228bcc973b7SBarry Smith n = ii[i+1] - jrow; 229bcc973b7SBarry Smith sum1 = 0.0; 230bcc973b7SBarry Smith sum2 = 0.0; 231bcc973b7SBarry Smith for (j=0; j<n; j++) { 232bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 233bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 234bcc973b7SBarry Smith jrow++; 235bcc973b7SBarry Smith } 236bcc973b7SBarry Smith y[2*i] += sum1; 237bcc973b7SBarry Smith y[2*i+1] += sum2; 238bcc973b7SBarry Smith } 239bcc973b7SBarry Smith 240b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*m); 2412f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 2422f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 243bcc973b7SBarry Smith PetscFunctionReturn(0); 24482b1193eSBarry Smith } 2454a2ae208SSatish Balay #undef __FUNCT__ 246b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 247cd3bbe55SBarry Smith int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 24882b1193eSBarry Smith { 2494cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 250bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 25187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 252bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 25382b1193eSBarry Smith 254bcc973b7SBarry Smith PetscFunctionBegin; 255f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2562f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 2572f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 258bfec09a0SHong Zhang 259bcc973b7SBarry Smith for (i=0; i<m; i++) { 260bfec09a0SHong Zhang idx = a->j + a->i[i] ; 261bfec09a0SHong Zhang v = a->a + a->i[i] ; 262bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 263bcc973b7SBarry Smith alpha1 = x[2*i]; 264bcc973b7SBarry Smith alpha2 = x[2*i+1]; 265bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 266bcc973b7SBarry Smith } 267b0a32e0cSBarry Smith PetscLogFlops(4*a->nz - 2*b->AIJ->n); 2682f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 2692f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 270bcc973b7SBarry Smith PetscFunctionReturn(0); 27182b1193eSBarry Smith } 272cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 2734a2ae208SSatish Balay #undef __FUNCT__ 274b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 275bcc973b7SBarry Smith int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 276bcc973b7SBarry Smith { 2774cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 278bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 27987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 280bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 281bcc973b7SBarry Smith int n,i,jrow,j; 28282b1193eSBarry Smith 283bcc973b7SBarry Smith PetscFunctionBegin; 2842f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 2852f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 286bcc973b7SBarry Smith idx = a->j; 287bcc973b7SBarry Smith v = a->a; 288bcc973b7SBarry Smith ii = a->i; 289bcc973b7SBarry Smith 290bcc973b7SBarry Smith for (i=0; i<m; i++) { 291bcc973b7SBarry Smith jrow = ii[i]; 292bcc973b7SBarry Smith n = ii[i+1] - jrow; 293bcc973b7SBarry Smith sum1 = 0.0; 294bcc973b7SBarry Smith sum2 = 0.0; 295bcc973b7SBarry Smith sum3 = 0.0; 296bcc973b7SBarry Smith for (j=0; j<n; j++) { 297bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 298bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 299bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 300bcc973b7SBarry Smith jrow++; 301bcc973b7SBarry Smith } 302bcc973b7SBarry Smith y[3*i] = sum1; 303bcc973b7SBarry Smith y[3*i+1] = sum2; 304bcc973b7SBarry Smith y[3*i+2] = sum3; 305bcc973b7SBarry Smith } 306bcc973b7SBarry Smith 307b0a32e0cSBarry Smith PetscLogFlops(6*a->nz - 3*m); 3082f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 3092f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 310bcc973b7SBarry Smith PetscFunctionReturn(0); 311bcc973b7SBarry Smith } 312bcc973b7SBarry Smith 3134a2ae208SSatish Balay #undef __FUNCT__ 314b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 315bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 316bcc973b7SBarry Smith { 3174cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 318bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 31987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 320bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 321bcc973b7SBarry Smith 322bcc973b7SBarry Smith PetscFunctionBegin; 323bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 3242f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 3252f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 326bfec09a0SHong Zhang 327bcc973b7SBarry Smith for (i=0; i<m; i++) { 328bfec09a0SHong Zhang idx = a->j + a->i[i]; 329bfec09a0SHong Zhang v = a->a + a->i[i]; 330bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 331bcc973b7SBarry Smith alpha1 = x[3*i]; 332bcc973b7SBarry Smith alpha2 = x[3*i+1]; 333bcc973b7SBarry Smith alpha3 = x[3*i+2]; 334bcc973b7SBarry Smith while (n-->0) { 335bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 336bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 337bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 338bcc973b7SBarry Smith idx++; v++; 339bcc973b7SBarry Smith } 340bcc973b7SBarry Smith } 341b0a32e0cSBarry Smith PetscLogFlops(6*a->nz - 3*b->AIJ->n); 3422f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 3432f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 344bcc973b7SBarry Smith PetscFunctionReturn(0); 345bcc973b7SBarry Smith } 346bcc973b7SBarry Smith 3474a2ae208SSatish Balay #undef __FUNCT__ 348b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 349bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 350bcc973b7SBarry Smith { 3514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 35387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 354bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 355bcc973b7SBarry Smith int n,i,jrow,j; 356bcc973b7SBarry Smith 357bcc973b7SBarry Smith PetscFunctionBegin; 358f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3592f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 3602f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 361bcc973b7SBarry Smith idx = a->j; 362bcc973b7SBarry Smith v = a->a; 363bcc973b7SBarry Smith ii = a->i; 364bcc973b7SBarry Smith 365bcc973b7SBarry Smith for (i=0; i<m; i++) { 366bcc973b7SBarry Smith jrow = ii[i]; 367bcc973b7SBarry Smith n = ii[i+1] - jrow; 368bcc973b7SBarry Smith sum1 = 0.0; 369bcc973b7SBarry Smith sum2 = 0.0; 370bcc973b7SBarry Smith sum3 = 0.0; 371bcc973b7SBarry Smith for (j=0; j<n; j++) { 372bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 373bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 374bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 375bcc973b7SBarry Smith jrow++; 376bcc973b7SBarry Smith } 377bcc973b7SBarry Smith y[3*i] += sum1; 378bcc973b7SBarry Smith y[3*i+1] += sum2; 379bcc973b7SBarry Smith y[3*i+2] += sum3; 380bcc973b7SBarry Smith } 381bcc973b7SBarry Smith 382b0a32e0cSBarry Smith PetscLogFlops(6*a->nz); 3832f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 3842f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 385bcc973b7SBarry Smith PetscFunctionReturn(0); 386bcc973b7SBarry Smith } 3874a2ae208SSatish Balay #undef __FUNCT__ 388b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 389bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 390bcc973b7SBarry Smith { 3914cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 392bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 39387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 394bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 395bcc973b7SBarry Smith 396bcc973b7SBarry Smith PetscFunctionBegin; 397f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 3982f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 3992f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 400bcc973b7SBarry Smith for (i=0; i<m; i++) { 401bfec09a0SHong Zhang idx = a->j + a->i[i] ; 402bfec09a0SHong Zhang v = a->a + a->i[i] ; 403bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 404bcc973b7SBarry Smith alpha1 = x[3*i]; 405bcc973b7SBarry Smith alpha2 = x[3*i+1]; 406bcc973b7SBarry Smith alpha3 = x[3*i+2]; 407bcc973b7SBarry Smith while (n-->0) { 408bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 409bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 410bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 411bcc973b7SBarry Smith idx++; v++; 412bcc973b7SBarry Smith } 413bcc973b7SBarry Smith } 414b0a32e0cSBarry Smith PetscLogFlops(6*a->nz); 4152f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 4162f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 417bcc973b7SBarry Smith PetscFunctionReturn(0); 418bcc973b7SBarry Smith } 419bcc973b7SBarry Smith 420bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4214a2ae208SSatish Balay #undef __FUNCT__ 422b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 423bcc973b7SBarry Smith int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 424bcc973b7SBarry Smith { 4254cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 426bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 42787828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 428bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 429bcc973b7SBarry Smith int n,i,jrow,j; 430bcc973b7SBarry Smith 431bcc973b7SBarry Smith PetscFunctionBegin; 4322f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 4332f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 434bcc973b7SBarry Smith idx = a->j; 435bcc973b7SBarry Smith v = a->a; 436bcc973b7SBarry Smith ii = a->i; 437bcc973b7SBarry Smith 438bcc973b7SBarry Smith for (i=0; i<m; i++) { 439bcc973b7SBarry Smith jrow = ii[i]; 440bcc973b7SBarry Smith n = ii[i+1] - jrow; 441bcc973b7SBarry Smith sum1 = 0.0; 442bcc973b7SBarry Smith sum2 = 0.0; 443bcc973b7SBarry Smith sum3 = 0.0; 444bcc973b7SBarry Smith sum4 = 0.0; 445bcc973b7SBarry Smith for (j=0; j<n; j++) { 446bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 447bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 448bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 449bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 450bcc973b7SBarry Smith jrow++; 451bcc973b7SBarry Smith } 452bcc973b7SBarry Smith y[4*i] = sum1; 453bcc973b7SBarry Smith y[4*i+1] = sum2; 454bcc973b7SBarry Smith y[4*i+2] = sum3; 455bcc973b7SBarry Smith y[4*i+3] = sum4; 456bcc973b7SBarry Smith } 457bcc973b7SBarry Smith 458b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*m); 4592f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 4602f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 461bcc973b7SBarry Smith PetscFunctionReturn(0); 462bcc973b7SBarry Smith } 463bcc973b7SBarry Smith 4644a2ae208SSatish Balay #undef __FUNCT__ 465b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 466bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 467bcc973b7SBarry Smith { 4684cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 469bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 47087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 471bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 472bcc973b7SBarry Smith 473bcc973b7SBarry Smith PetscFunctionBegin; 474bcc973b7SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 4752f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 4762f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 477bcc973b7SBarry Smith for (i=0; i<m; i++) { 478bfec09a0SHong Zhang idx = a->j + a->i[i] ; 479bfec09a0SHong Zhang v = a->a + a->i[i] ; 480bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 481bcc973b7SBarry Smith alpha1 = x[4*i]; 482bcc973b7SBarry Smith alpha2 = x[4*i+1]; 483bcc973b7SBarry Smith alpha3 = x[4*i+2]; 484bcc973b7SBarry Smith alpha4 = x[4*i+3]; 485bcc973b7SBarry Smith while (n-->0) { 486bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 487bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 488bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 489bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 490bcc973b7SBarry Smith idx++; v++; 491bcc973b7SBarry Smith } 492bcc973b7SBarry Smith } 493b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*b->AIJ->n); 4942f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 4952f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 496bcc973b7SBarry Smith PetscFunctionReturn(0); 497bcc973b7SBarry Smith } 498bcc973b7SBarry Smith 4994a2ae208SSatish Balay #undef __FUNCT__ 500b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 501bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 502bcc973b7SBarry Smith { 5034cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 504f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 50587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 506bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 507f9fae5adSBarry Smith int n,i,jrow,j; 508f9fae5adSBarry Smith 509f9fae5adSBarry Smith PetscFunctionBegin; 510f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5112f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 5122f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 513f9fae5adSBarry Smith idx = a->j; 514f9fae5adSBarry Smith v = a->a; 515f9fae5adSBarry Smith ii = a->i; 516f9fae5adSBarry Smith 517f9fae5adSBarry Smith for (i=0; i<m; i++) { 518f9fae5adSBarry Smith jrow = ii[i]; 519f9fae5adSBarry Smith n = ii[i+1] - jrow; 520f9fae5adSBarry Smith sum1 = 0.0; 521f9fae5adSBarry Smith sum2 = 0.0; 522f9fae5adSBarry Smith sum3 = 0.0; 523f9fae5adSBarry Smith sum4 = 0.0; 524f9fae5adSBarry Smith for (j=0; j<n; j++) { 525f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 526f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 527f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 528f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 529f9fae5adSBarry Smith jrow++; 530f9fae5adSBarry Smith } 531f9fae5adSBarry Smith y[4*i] += sum1; 532f9fae5adSBarry Smith y[4*i+1] += sum2; 533f9fae5adSBarry Smith y[4*i+2] += sum3; 534f9fae5adSBarry Smith y[4*i+3] += sum4; 535f9fae5adSBarry Smith } 536f9fae5adSBarry Smith 537b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*m); 5382f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 5392f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 540f9fae5adSBarry Smith PetscFunctionReturn(0); 541bcc973b7SBarry Smith } 5424a2ae208SSatish Balay #undef __FUNCT__ 543b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 544bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 545bcc973b7SBarry Smith { 5464cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 547f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 54887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 549bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 550f9fae5adSBarry Smith 551f9fae5adSBarry Smith PetscFunctionBegin; 552f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5532f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 5542f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 555bfec09a0SHong Zhang 556f9fae5adSBarry Smith for (i=0; i<m; i++) { 557bfec09a0SHong Zhang idx = a->j + a->i[i] ; 558bfec09a0SHong Zhang v = a->a + a->i[i] ; 559f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 560f9fae5adSBarry Smith alpha1 = x[4*i]; 561f9fae5adSBarry Smith alpha2 = x[4*i+1]; 562f9fae5adSBarry Smith alpha3 = x[4*i+2]; 563f9fae5adSBarry Smith alpha4 = x[4*i+3]; 564f9fae5adSBarry Smith while (n-->0) { 565f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 566f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 567f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 568f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 569f9fae5adSBarry Smith idx++; v++; 570f9fae5adSBarry Smith } 571f9fae5adSBarry Smith } 572b0a32e0cSBarry Smith PetscLogFlops(8*a->nz - 4*b->AIJ->n); 5732f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 5742f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 575f9fae5adSBarry Smith PetscFunctionReturn(0); 576f9fae5adSBarry Smith } 577f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 5786cd98798SBarry Smith 5794a2ae208SSatish Balay #undef __FUNCT__ 580b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 581f9fae5adSBarry Smith int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 582f9fae5adSBarry Smith { 5834cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 584f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 58587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 586bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 587f9fae5adSBarry Smith int n,i,jrow,j; 588f9fae5adSBarry Smith 589f9fae5adSBarry Smith PetscFunctionBegin; 5902f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 5912f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 592f9fae5adSBarry Smith idx = a->j; 593f9fae5adSBarry Smith v = a->a; 594f9fae5adSBarry Smith ii = a->i; 595f9fae5adSBarry Smith 596f9fae5adSBarry Smith for (i=0; i<m; i++) { 597f9fae5adSBarry Smith jrow = ii[i]; 598f9fae5adSBarry Smith n = ii[i+1] - jrow; 599f9fae5adSBarry Smith sum1 = 0.0; 600f9fae5adSBarry Smith sum2 = 0.0; 601f9fae5adSBarry Smith sum3 = 0.0; 602f9fae5adSBarry Smith sum4 = 0.0; 603f9fae5adSBarry Smith sum5 = 0.0; 604f9fae5adSBarry Smith for (j=0; j<n; j++) { 605f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 606f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 607f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 608f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 609f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 610f9fae5adSBarry Smith jrow++; 611f9fae5adSBarry Smith } 612f9fae5adSBarry Smith y[5*i] = sum1; 613f9fae5adSBarry Smith y[5*i+1] = sum2; 614f9fae5adSBarry Smith y[5*i+2] = sum3; 615f9fae5adSBarry Smith y[5*i+3] = sum4; 616f9fae5adSBarry Smith y[5*i+4] = sum5; 617f9fae5adSBarry Smith } 618f9fae5adSBarry Smith 619b0a32e0cSBarry Smith PetscLogFlops(10*a->nz - 5*m); 6202f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 6212f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 622f9fae5adSBarry Smith PetscFunctionReturn(0); 623f9fae5adSBarry Smith } 624f9fae5adSBarry Smith 6254a2ae208SSatish Balay #undef __FUNCT__ 626b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 627f9fae5adSBarry Smith int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 628f9fae5adSBarry Smith { 6294cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 630f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 63187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 632bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 633f9fae5adSBarry Smith 634f9fae5adSBarry Smith PetscFunctionBegin; 635f9fae5adSBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 6362f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 6372f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 638bfec09a0SHong Zhang 639f9fae5adSBarry Smith for (i=0; i<m; i++) { 640bfec09a0SHong Zhang idx = a->j + a->i[i] ; 641bfec09a0SHong Zhang v = a->a + a->i[i] ; 642f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 643f9fae5adSBarry Smith alpha1 = x[5*i]; 644f9fae5adSBarry Smith alpha2 = x[5*i+1]; 645f9fae5adSBarry Smith alpha3 = x[5*i+2]; 646f9fae5adSBarry Smith alpha4 = x[5*i+3]; 647f9fae5adSBarry Smith alpha5 = x[5*i+4]; 648f9fae5adSBarry Smith while (n-->0) { 649f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 650f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 651f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 652f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 653f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 654f9fae5adSBarry Smith idx++; v++; 655f9fae5adSBarry Smith } 656f9fae5adSBarry Smith } 657b0a32e0cSBarry Smith PetscLogFlops(10*a->nz - 5*b->AIJ->n); 6582f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 6592f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 660f9fae5adSBarry Smith PetscFunctionReturn(0); 661f9fae5adSBarry Smith } 662f9fae5adSBarry Smith 6634a2ae208SSatish Balay #undef __FUNCT__ 664b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 665f9fae5adSBarry Smith int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 666f9fae5adSBarry Smith { 6674cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 668f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 66987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 670bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 671f9fae5adSBarry Smith int n,i,jrow,j; 672f9fae5adSBarry Smith 673f9fae5adSBarry Smith PetscFunctionBegin; 674f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6752f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 6762f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 677f9fae5adSBarry Smith idx = a->j; 678f9fae5adSBarry Smith v = a->a; 679f9fae5adSBarry Smith ii = a->i; 680f9fae5adSBarry Smith 681f9fae5adSBarry Smith for (i=0; i<m; i++) { 682f9fae5adSBarry Smith jrow = ii[i]; 683f9fae5adSBarry Smith n = ii[i+1] - jrow; 684f9fae5adSBarry Smith sum1 = 0.0; 685f9fae5adSBarry Smith sum2 = 0.0; 686f9fae5adSBarry Smith sum3 = 0.0; 687f9fae5adSBarry Smith sum4 = 0.0; 688f9fae5adSBarry Smith sum5 = 0.0; 689f9fae5adSBarry Smith for (j=0; j<n; j++) { 690f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 691f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 692f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 693f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 694f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 695f9fae5adSBarry Smith jrow++; 696f9fae5adSBarry Smith } 697f9fae5adSBarry Smith y[5*i] += sum1; 698f9fae5adSBarry Smith y[5*i+1] += sum2; 699f9fae5adSBarry Smith y[5*i+2] += sum3; 700f9fae5adSBarry Smith y[5*i+3] += sum4; 701f9fae5adSBarry Smith y[5*i+4] += sum5; 702f9fae5adSBarry Smith } 703f9fae5adSBarry Smith 704b0a32e0cSBarry Smith PetscLogFlops(10*a->nz); 7052f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 7062f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 707f9fae5adSBarry Smith PetscFunctionReturn(0); 708f9fae5adSBarry Smith } 709f9fae5adSBarry Smith 7104a2ae208SSatish Balay #undef __FUNCT__ 711b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 712f9fae5adSBarry Smith int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 713f9fae5adSBarry Smith { 7144cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 715f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 71687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 717bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 718f9fae5adSBarry Smith 719f9fae5adSBarry Smith PetscFunctionBegin; 720f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7212f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 7222f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 723bfec09a0SHong Zhang 724f9fae5adSBarry Smith for (i=0; i<m; i++) { 725bfec09a0SHong Zhang idx = a->j + a->i[i] ; 726bfec09a0SHong Zhang v = a->a + a->i[i] ; 727f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 728f9fae5adSBarry Smith alpha1 = x[5*i]; 729f9fae5adSBarry Smith alpha2 = x[5*i+1]; 730f9fae5adSBarry Smith alpha3 = x[5*i+2]; 731f9fae5adSBarry Smith alpha4 = x[5*i+3]; 732f9fae5adSBarry Smith alpha5 = x[5*i+4]; 733f9fae5adSBarry Smith while (n-->0) { 734f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 735f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 736f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 737f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 738f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 739f9fae5adSBarry Smith idx++; v++; 740f9fae5adSBarry Smith } 741f9fae5adSBarry Smith } 742b0a32e0cSBarry Smith PetscLogFlops(10*a->nz); 7432f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 7442f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 745f9fae5adSBarry Smith PetscFunctionReturn(0); 746bcc973b7SBarry Smith } 747bcc973b7SBarry Smith 7486cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 7494a2ae208SSatish Balay #undef __FUNCT__ 750b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 7516cd98798SBarry Smith int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 7526cd98798SBarry Smith { 7536cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 7546cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 75587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 756bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 7576cd98798SBarry Smith int n,i,jrow,j; 7586cd98798SBarry Smith 7596cd98798SBarry Smith PetscFunctionBegin; 7602f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 7612f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 7626cd98798SBarry Smith idx = a->j; 7636cd98798SBarry Smith v = a->a; 7646cd98798SBarry Smith ii = a->i; 7656cd98798SBarry Smith 7666cd98798SBarry Smith for (i=0; i<m; i++) { 7676cd98798SBarry Smith jrow = ii[i]; 7686cd98798SBarry Smith n = ii[i+1] - jrow; 7696cd98798SBarry Smith sum1 = 0.0; 7706cd98798SBarry Smith sum2 = 0.0; 7716cd98798SBarry Smith sum3 = 0.0; 7726cd98798SBarry Smith sum4 = 0.0; 7736cd98798SBarry Smith sum5 = 0.0; 7746cd98798SBarry Smith sum6 = 0.0; 7756cd98798SBarry Smith for (j=0; j<n; j++) { 7766cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 7776cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 7786cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 7796cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 7806cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 7816cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 7826cd98798SBarry Smith jrow++; 7836cd98798SBarry Smith } 7846cd98798SBarry Smith y[6*i] = sum1; 7856cd98798SBarry Smith y[6*i+1] = sum2; 7866cd98798SBarry Smith y[6*i+2] = sum3; 7876cd98798SBarry Smith y[6*i+3] = sum4; 7886cd98798SBarry Smith y[6*i+4] = sum5; 7896cd98798SBarry Smith y[6*i+5] = sum6; 7906cd98798SBarry Smith } 7916cd98798SBarry Smith 792b0a32e0cSBarry Smith PetscLogFlops(12*a->nz - 6*m); 7932f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 7942f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 7956cd98798SBarry Smith PetscFunctionReturn(0); 7966cd98798SBarry Smith } 7976cd98798SBarry Smith 7984a2ae208SSatish Balay #undef __FUNCT__ 799b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 8006cd98798SBarry Smith int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8016cd98798SBarry Smith { 8026cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8036cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 80487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 805bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 8066cd98798SBarry Smith 8076cd98798SBarry Smith PetscFunctionBegin; 8086cd98798SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 8092f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 8102f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 811bfec09a0SHong Zhang 8126cd98798SBarry Smith for (i=0; i<m; i++) { 813bfec09a0SHong Zhang idx = a->j + a->i[i] ; 814bfec09a0SHong Zhang v = a->a + a->i[i] ; 8156cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8166cd98798SBarry Smith alpha1 = x[6*i]; 8176cd98798SBarry Smith alpha2 = x[6*i+1]; 8186cd98798SBarry Smith alpha3 = x[6*i+2]; 8196cd98798SBarry Smith alpha4 = x[6*i+3]; 8206cd98798SBarry Smith alpha5 = x[6*i+4]; 8216cd98798SBarry Smith alpha6 = x[6*i+5]; 8226cd98798SBarry Smith while (n-->0) { 8236cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8246cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8256cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8266cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8276cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8286cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8296cd98798SBarry Smith idx++; v++; 8306cd98798SBarry Smith } 8316cd98798SBarry Smith } 832b0a32e0cSBarry Smith PetscLogFlops(12*a->nz - 6*b->AIJ->n); 8332f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 8342f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 8356cd98798SBarry Smith PetscFunctionReturn(0); 8366cd98798SBarry Smith } 8376cd98798SBarry Smith 8384a2ae208SSatish Balay #undef __FUNCT__ 839b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 8406cd98798SBarry Smith int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8416cd98798SBarry Smith { 8426cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8436cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 84487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 845bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 8466cd98798SBarry Smith int n,i,jrow,j; 8476cd98798SBarry Smith 8486cd98798SBarry Smith PetscFunctionBegin; 8496cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8502f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 8512f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 8526cd98798SBarry Smith idx = a->j; 8536cd98798SBarry Smith v = a->a; 8546cd98798SBarry Smith ii = a->i; 8556cd98798SBarry Smith 8566cd98798SBarry Smith for (i=0; i<m; i++) { 8576cd98798SBarry Smith jrow = ii[i]; 8586cd98798SBarry Smith n = ii[i+1] - jrow; 8596cd98798SBarry Smith sum1 = 0.0; 8606cd98798SBarry Smith sum2 = 0.0; 8616cd98798SBarry Smith sum3 = 0.0; 8626cd98798SBarry Smith sum4 = 0.0; 8636cd98798SBarry Smith sum5 = 0.0; 8646cd98798SBarry Smith sum6 = 0.0; 8656cd98798SBarry Smith for (j=0; j<n; j++) { 8666cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8676cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8686cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8696cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8706cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8716cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8726cd98798SBarry Smith jrow++; 8736cd98798SBarry Smith } 8746cd98798SBarry Smith y[6*i] += sum1; 8756cd98798SBarry Smith y[6*i+1] += sum2; 8766cd98798SBarry Smith y[6*i+2] += sum3; 8776cd98798SBarry Smith y[6*i+3] += sum4; 8786cd98798SBarry Smith y[6*i+4] += sum5; 8796cd98798SBarry Smith y[6*i+5] += sum6; 8806cd98798SBarry Smith } 8816cd98798SBarry Smith 882b0a32e0cSBarry Smith PetscLogFlops(12*a->nz); 8832f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 8842f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 8856cd98798SBarry Smith PetscFunctionReturn(0); 8866cd98798SBarry Smith } 8876cd98798SBarry Smith 8884a2ae208SSatish Balay #undef __FUNCT__ 889b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 8906cd98798SBarry Smith int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8916cd98798SBarry Smith { 8926cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8936cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 89487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 895bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 8966cd98798SBarry Smith 8976cd98798SBarry Smith PetscFunctionBegin; 8986cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 8992f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 9002f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 901bfec09a0SHong Zhang 9026cd98798SBarry Smith for (i=0; i<m; i++) { 903bfec09a0SHong Zhang idx = a->j + a->i[i] ; 904bfec09a0SHong Zhang v = a->a + a->i[i] ; 9056cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9066cd98798SBarry Smith alpha1 = x[6*i]; 9076cd98798SBarry Smith alpha2 = x[6*i+1]; 9086cd98798SBarry Smith alpha3 = x[6*i+2]; 9096cd98798SBarry Smith alpha4 = x[6*i+3]; 9106cd98798SBarry Smith alpha5 = x[6*i+4]; 9116cd98798SBarry Smith alpha6 = x[6*i+5]; 9126cd98798SBarry Smith while (n-->0) { 9136cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9146cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9156cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9166cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9176cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9186cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9196cd98798SBarry Smith idx++; v++; 9206cd98798SBarry Smith } 9216cd98798SBarry Smith } 922b0a32e0cSBarry Smith PetscLogFlops(12*a->nz); 9232f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 9242f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 9256cd98798SBarry Smith PetscFunctionReturn(0); 9266cd98798SBarry Smith } 9276cd98798SBarry Smith 92866ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 92966ed3db0SBarry Smith #undef __FUNCT__ 93066ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 93166ed3db0SBarry Smith int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 93266ed3db0SBarry Smith { 93366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 93466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 93587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 936bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 93766ed3db0SBarry Smith int n,i,jrow,j; 93866ed3db0SBarry Smith 93966ed3db0SBarry Smith PetscFunctionBegin; 9402f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 9412f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 94266ed3db0SBarry Smith idx = a->j; 94366ed3db0SBarry Smith v = a->a; 94466ed3db0SBarry Smith ii = a->i; 94566ed3db0SBarry Smith 94666ed3db0SBarry Smith for (i=0; i<m; i++) { 94766ed3db0SBarry Smith jrow = ii[i]; 94866ed3db0SBarry Smith n = ii[i+1] - jrow; 94966ed3db0SBarry Smith sum1 = 0.0; 95066ed3db0SBarry Smith sum2 = 0.0; 95166ed3db0SBarry Smith sum3 = 0.0; 95266ed3db0SBarry Smith sum4 = 0.0; 95366ed3db0SBarry Smith sum5 = 0.0; 95466ed3db0SBarry Smith sum6 = 0.0; 95566ed3db0SBarry Smith sum7 = 0.0; 95666ed3db0SBarry Smith sum8 = 0.0; 95766ed3db0SBarry Smith for (j=0; j<n; j++) { 95866ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 95966ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 96066ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 96166ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 96266ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 96366ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 96466ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 96566ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 96666ed3db0SBarry Smith jrow++; 96766ed3db0SBarry Smith } 96866ed3db0SBarry Smith y[8*i] = sum1; 96966ed3db0SBarry Smith y[8*i+1] = sum2; 97066ed3db0SBarry Smith y[8*i+2] = sum3; 97166ed3db0SBarry Smith y[8*i+3] = sum4; 97266ed3db0SBarry Smith y[8*i+4] = sum5; 97366ed3db0SBarry Smith y[8*i+5] = sum6; 97466ed3db0SBarry Smith y[8*i+6] = sum7; 97566ed3db0SBarry Smith y[8*i+7] = sum8; 97666ed3db0SBarry Smith } 97766ed3db0SBarry Smith 97866ed3db0SBarry Smith PetscLogFlops(16*a->nz - 8*m); 9792f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 9802f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 98166ed3db0SBarry Smith PetscFunctionReturn(0); 98266ed3db0SBarry Smith } 98366ed3db0SBarry Smith 98466ed3db0SBarry Smith #undef __FUNCT__ 98566ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 98666ed3db0SBarry Smith int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 98766ed3db0SBarry Smith { 98866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 98966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 99087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 991bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 99266ed3db0SBarry Smith 99366ed3db0SBarry Smith PetscFunctionBegin; 99466ed3db0SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 9952f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 9962f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 997bfec09a0SHong Zhang 99866ed3db0SBarry Smith for (i=0; i<m; i++) { 999bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1000bfec09a0SHong Zhang v = a->a + a->i[i] ; 100166ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 100266ed3db0SBarry Smith alpha1 = x[8*i]; 100366ed3db0SBarry Smith alpha2 = x[8*i+1]; 100466ed3db0SBarry Smith alpha3 = x[8*i+2]; 100566ed3db0SBarry Smith alpha4 = x[8*i+3]; 100666ed3db0SBarry Smith alpha5 = x[8*i+4]; 100766ed3db0SBarry Smith alpha6 = x[8*i+5]; 100866ed3db0SBarry Smith alpha7 = x[8*i+6]; 100966ed3db0SBarry Smith alpha8 = x[8*i+7]; 101066ed3db0SBarry Smith while (n-->0) { 101166ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 101266ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 101366ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 101466ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 101566ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 101666ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 101766ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 101866ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 101966ed3db0SBarry Smith idx++; v++; 102066ed3db0SBarry Smith } 102166ed3db0SBarry Smith } 102266ed3db0SBarry Smith PetscLogFlops(16*a->nz - 8*b->AIJ->n); 10232f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 10242f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 102566ed3db0SBarry Smith PetscFunctionReturn(0); 102666ed3db0SBarry Smith } 102766ed3db0SBarry Smith 102866ed3db0SBarry Smith #undef __FUNCT__ 102966ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 103066ed3db0SBarry Smith int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 103166ed3db0SBarry Smith { 103266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 103366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 103487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1035bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 103666ed3db0SBarry Smith int n,i,jrow,j; 103766ed3db0SBarry Smith 103866ed3db0SBarry Smith PetscFunctionBegin; 103966ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 10402f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 10412f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 104266ed3db0SBarry Smith idx = a->j; 104366ed3db0SBarry Smith v = a->a; 104466ed3db0SBarry Smith ii = a->i; 104566ed3db0SBarry Smith 104666ed3db0SBarry Smith for (i=0; i<m; i++) { 104766ed3db0SBarry Smith jrow = ii[i]; 104866ed3db0SBarry Smith n = ii[i+1] - jrow; 104966ed3db0SBarry Smith sum1 = 0.0; 105066ed3db0SBarry Smith sum2 = 0.0; 105166ed3db0SBarry Smith sum3 = 0.0; 105266ed3db0SBarry Smith sum4 = 0.0; 105366ed3db0SBarry Smith sum5 = 0.0; 105466ed3db0SBarry Smith sum6 = 0.0; 105566ed3db0SBarry Smith sum7 = 0.0; 105666ed3db0SBarry Smith sum8 = 0.0; 105766ed3db0SBarry Smith for (j=0; j<n; j++) { 105866ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 105966ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 106066ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 106166ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 106266ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 106366ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 106466ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 106566ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 106666ed3db0SBarry Smith jrow++; 106766ed3db0SBarry Smith } 106866ed3db0SBarry Smith y[8*i] += sum1; 106966ed3db0SBarry Smith y[8*i+1] += sum2; 107066ed3db0SBarry Smith y[8*i+2] += sum3; 107166ed3db0SBarry Smith y[8*i+3] += sum4; 107266ed3db0SBarry Smith y[8*i+4] += sum5; 107366ed3db0SBarry Smith y[8*i+5] += sum6; 107466ed3db0SBarry Smith y[8*i+6] += sum7; 107566ed3db0SBarry Smith y[8*i+7] += sum8; 107666ed3db0SBarry Smith } 107766ed3db0SBarry Smith 107866ed3db0SBarry Smith PetscLogFlops(16*a->nz); 10792f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 10802f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 108166ed3db0SBarry Smith PetscFunctionReturn(0); 108266ed3db0SBarry Smith } 108366ed3db0SBarry Smith 108466ed3db0SBarry Smith #undef __FUNCT__ 108566ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 108666ed3db0SBarry Smith int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 108766ed3db0SBarry Smith { 108866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 108966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 109087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1091bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 109266ed3db0SBarry Smith 109366ed3db0SBarry Smith PetscFunctionBegin; 109466ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 10952f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 10962f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 109766ed3db0SBarry Smith for (i=0; i<m; i++) { 1098bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1099bfec09a0SHong Zhang v = a->a + a->i[i] ; 110066ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 110166ed3db0SBarry Smith alpha1 = x[8*i]; 110266ed3db0SBarry Smith alpha2 = x[8*i+1]; 110366ed3db0SBarry Smith alpha3 = x[8*i+2]; 110466ed3db0SBarry Smith alpha4 = x[8*i+3]; 110566ed3db0SBarry Smith alpha5 = x[8*i+4]; 110666ed3db0SBarry Smith alpha6 = x[8*i+5]; 110766ed3db0SBarry Smith alpha7 = x[8*i+6]; 110866ed3db0SBarry Smith alpha8 = x[8*i+7]; 110966ed3db0SBarry Smith while (n-->0) { 111066ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 111166ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 111266ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 111366ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 111466ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 111566ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 111666ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 111766ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 111866ed3db0SBarry Smith idx++; v++; 111966ed3db0SBarry Smith } 112066ed3db0SBarry Smith } 112166ed3db0SBarry Smith PetscLogFlops(16*a->nz); 11222f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 11232f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 11242f7816d4SBarry Smith PetscFunctionReturn(0); 11252f7816d4SBarry Smith } 11262f7816d4SBarry Smith 11272b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 11282b692628SMatthew Knepley #undef __FUNCT__ 11292b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 11302b692628SMatthew Knepley int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 11312b692628SMatthew Knepley { 11322b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 11332b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 11342b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 11352b692628SMatthew Knepley int ierr,m = b->AIJ->m,*idx,*ii; 11362b692628SMatthew Knepley int n,i,jrow,j; 11372b692628SMatthew Knepley 11382b692628SMatthew Knepley PetscFunctionBegin; 11392b692628SMatthew Knepley ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 11402b692628SMatthew Knepley ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 11412b692628SMatthew Knepley idx = a->j; 11422b692628SMatthew Knepley v = a->a; 11432b692628SMatthew Knepley ii = a->i; 11442b692628SMatthew Knepley 11452b692628SMatthew Knepley for (i=0; i<m; i++) { 11462b692628SMatthew Knepley jrow = ii[i]; 11472b692628SMatthew Knepley n = ii[i+1] - jrow; 11482b692628SMatthew Knepley sum1 = 0.0; 11492b692628SMatthew Knepley sum2 = 0.0; 11502b692628SMatthew Knepley sum3 = 0.0; 11512b692628SMatthew Knepley sum4 = 0.0; 11522b692628SMatthew Knepley sum5 = 0.0; 11532b692628SMatthew Knepley sum6 = 0.0; 11542b692628SMatthew Knepley sum7 = 0.0; 11552b692628SMatthew Knepley sum8 = 0.0; 11562b692628SMatthew Knepley sum9 = 0.0; 11572b692628SMatthew Knepley for (j=0; j<n; j++) { 11582b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 11592b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 11602b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 11612b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 11622b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 11632b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 11642b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 11652b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 11662b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 11672b692628SMatthew Knepley jrow++; 11682b692628SMatthew Knepley } 11692b692628SMatthew Knepley y[9*i] = sum1; 11702b692628SMatthew Knepley y[9*i+1] = sum2; 11712b692628SMatthew Knepley y[9*i+2] = sum3; 11722b692628SMatthew Knepley y[9*i+3] = sum4; 11732b692628SMatthew Knepley y[9*i+4] = sum5; 11742b692628SMatthew Knepley y[9*i+5] = sum6; 11752b692628SMatthew Knepley y[9*i+6] = sum7; 11762b692628SMatthew Knepley y[9*i+7] = sum8; 11772b692628SMatthew Knepley y[9*i+8] = sum9; 11782b692628SMatthew Knepley } 11792b692628SMatthew Knepley 11802b692628SMatthew Knepley PetscLogFlops(18*a->nz - 9*m); 11812b692628SMatthew Knepley ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 11822b692628SMatthew Knepley ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 11832b692628SMatthew Knepley PetscFunctionReturn(0); 11842b692628SMatthew Knepley } 11852b692628SMatthew Knepley 11862b692628SMatthew Knepley #undef __FUNCT__ 11872b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 11882b692628SMatthew Knepley int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 11892b692628SMatthew Knepley { 11902b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 11912b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 11922b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 11932b692628SMatthew Knepley int ierr,m = b->AIJ->m,n,i,*idx; 11942b692628SMatthew Knepley 11952b692628SMatthew Knepley PetscFunctionBegin; 11962b692628SMatthew Knepley ierr = VecSet(&zero,yy);CHKERRQ(ierr); 11972b692628SMatthew Knepley ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 11982b692628SMatthew Knepley ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 11992b692628SMatthew Knepley 12002b692628SMatthew Knepley for (i=0; i<m; i++) { 12012b692628SMatthew Knepley idx = a->j + a->i[i] ; 12022b692628SMatthew Knepley v = a->a + a->i[i] ; 12032b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 12042b692628SMatthew Knepley alpha1 = x[9*i]; 12052b692628SMatthew Knepley alpha2 = x[9*i+1]; 12062b692628SMatthew Knepley alpha3 = x[9*i+2]; 12072b692628SMatthew Knepley alpha4 = x[9*i+3]; 12082b692628SMatthew Knepley alpha5 = x[9*i+4]; 12092b692628SMatthew Knepley alpha6 = x[9*i+5]; 12102b692628SMatthew Knepley alpha7 = x[9*i+6]; 12112b692628SMatthew Knepley alpha8 = x[9*i+7]; 1212*2f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 12132b692628SMatthew Knepley while (n-->0) { 12142b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 12152b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 12162b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 12172b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 12182b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 12192b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 12202b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 12212b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 12222b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 12232b692628SMatthew Knepley idx++; v++; 12242b692628SMatthew Knepley } 12252b692628SMatthew Knepley } 12262b692628SMatthew Knepley PetscLogFlops(18*a->nz - 9*b->AIJ->n); 12272b692628SMatthew Knepley ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 12282b692628SMatthew Knepley ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 12292b692628SMatthew Knepley PetscFunctionReturn(0); 12302b692628SMatthew Knepley } 12312b692628SMatthew Knepley 12322b692628SMatthew Knepley #undef __FUNCT__ 12332b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 12342b692628SMatthew Knepley int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 12352b692628SMatthew Knepley { 12362b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 12372b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 12382b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 12392b692628SMatthew Knepley int ierr,m = b->AIJ->m,*idx,*ii; 12402b692628SMatthew Knepley int n,i,jrow,j; 12412b692628SMatthew Knepley 12422b692628SMatthew Knepley PetscFunctionBegin; 12432b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12442b692628SMatthew Knepley ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 12452b692628SMatthew Knepley ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 12462b692628SMatthew Knepley idx = a->j; 12472b692628SMatthew Knepley v = a->a; 12482b692628SMatthew Knepley ii = a->i; 12492b692628SMatthew Knepley 12502b692628SMatthew Knepley for (i=0; i<m; i++) { 12512b692628SMatthew Knepley jrow = ii[i]; 12522b692628SMatthew Knepley n = ii[i+1] - jrow; 12532b692628SMatthew Knepley sum1 = 0.0; 12542b692628SMatthew Knepley sum2 = 0.0; 12552b692628SMatthew Knepley sum3 = 0.0; 12562b692628SMatthew Knepley sum4 = 0.0; 12572b692628SMatthew Knepley sum5 = 0.0; 12582b692628SMatthew Knepley sum6 = 0.0; 12592b692628SMatthew Knepley sum7 = 0.0; 12602b692628SMatthew Knepley sum8 = 0.0; 12612b692628SMatthew Knepley sum9 = 0.0; 12622b692628SMatthew Knepley for (j=0; j<n; j++) { 12632b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 12642b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 12652b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 12662b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 12672b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 12682b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 12692b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 12702b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 12712b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 12722b692628SMatthew Knepley jrow++; 12732b692628SMatthew Knepley } 12742b692628SMatthew Knepley y[9*i] += sum1; 12752b692628SMatthew Knepley y[9*i+1] += sum2; 12762b692628SMatthew Knepley y[9*i+2] += sum3; 12772b692628SMatthew Knepley y[9*i+3] += sum4; 12782b692628SMatthew Knepley y[9*i+4] += sum5; 12792b692628SMatthew Knepley y[9*i+5] += sum6; 12802b692628SMatthew Knepley y[9*i+6] += sum7; 12812b692628SMatthew Knepley y[9*i+7] += sum8; 12822b692628SMatthew Knepley y[9*i+8] += sum9; 12832b692628SMatthew Knepley } 12842b692628SMatthew Knepley 12852b692628SMatthew Knepley PetscLogFlops(18*a->nz); 12862b692628SMatthew Knepley ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 12872b692628SMatthew Knepley ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 12882b692628SMatthew Knepley PetscFunctionReturn(0); 12892b692628SMatthew Knepley } 12902b692628SMatthew Knepley 12912b692628SMatthew Knepley #undef __FUNCT__ 12922b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 12932b692628SMatthew Knepley int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 12942b692628SMatthew Knepley { 12952b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 12962b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 12972b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 12982b692628SMatthew Knepley int ierr,m = b->AIJ->m,n,i,*idx; 12992b692628SMatthew Knepley 13002b692628SMatthew Knepley PetscFunctionBegin; 13012b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13022b692628SMatthew Knepley ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 13032b692628SMatthew Knepley ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 13042b692628SMatthew Knepley for (i=0; i<m; i++) { 13052b692628SMatthew Knepley idx = a->j + a->i[i] ; 13062b692628SMatthew Knepley v = a->a + a->i[i] ; 13072b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 13082b692628SMatthew Knepley alpha1 = x[9*i]; 13092b692628SMatthew Knepley alpha2 = x[9*i+1]; 13102b692628SMatthew Knepley alpha3 = x[9*i+2]; 13112b692628SMatthew Knepley alpha4 = x[9*i+3]; 13122b692628SMatthew Knepley alpha5 = x[9*i+4]; 13132b692628SMatthew Knepley alpha6 = x[9*i+5]; 13142b692628SMatthew Knepley alpha7 = x[9*i+6]; 13152b692628SMatthew Knepley alpha8 = x[9*i+7]; 13162b692628SMatthew Knepley alpha9 = x[9*i+8]; 13172b692628SMatthew Knepley while (n-->0) { 13182b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 13192b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 13202b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 13212b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 13222b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 13232b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 13242b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 13252b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 13262b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 13272b692628SMatthew Knepley idx++; v++; 13282b692628SMatthew Knepley } 13292b692628SMatthew Knepley } 13302b692628SMatthew Knepley PetscLogFlops(18*a->nz); 13312b692628SMatthew Knepley ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 13322b692628SMatthew Knepley ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 13332b692628SMatthew Knepley PetscFunctionReturn(0); 13342b692628SMatthew Knepley } 13352b692628SMatthew Knepley 13362f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 13372f7816d4SBarry Smith #undef __FUNCT__ 13382f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 13392f7816d4SBarry Smith int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 13402f7816d4SBarry Smith { 13412f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13422f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13432f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 13442f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1345bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 13462f7816d4SBarry Smith int n,i,jrow,j; 13472f7816d4SBarry Smith 13482f7816d4SBarry Smith PetscFunctionBegin; 13492f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 13502f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 13512f7816d4SBarry Smith idx = a->j; 13522f7816d4SBarry Smith v = a->a; 13532f7816d4SBarry Smith ii = a->i; 13542f7816d4SBarry Smith 13552f7816d4SBarry Smith for (i=0; i<m; i++) { 13562f7816d4SBarry Smith jrow = ii[i]; 13572f7816d4SBarry Smith n = ii[i+1] - jrow; 13582f7816d4SBarry Smith sum1 = 0.0; 13592f7816d4SBarry Smith sum2 = 0.0; 13602f7816d4SBarry Smith sum3 = 0.0; 13612f7816d4SBarry Smith sum4 = 0.0; 13622f7816d4SBarry Smith sum5 = 0.0; 13632f7816d4SBarry Smith sum6 = 0.0; 13642f7816d4SBarry Smith sum7 = 0.0; 13652f7816d4SBarry Smith sum8 = 0.0; 13662f7816d4SBarry Smith sum9 = 0.0; 13672f7816d4SBarry Smith sum10 = 0.0; 13682f7816d4SBarry Smith sum11 = 0.0; 13692f7816d4SBarry Smith sum12 = 0.0; 13702f7816d4SBarry Smith sum13 = 0.0; 13712f7816d4SBarry Smith sum14 = 0.0; 13722f7816d4SBarry Smith sum15 = 0.0; 13732f7816d4SBarry Smith sum16 = 0.0; 13742f7816d4SBarry Smith for (j=0; j<n; j++) { 13752f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 13762f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 13772f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 13782f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 13792f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 13802f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 13812f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 13822f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 13832f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 13842f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 13852f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 13862f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 13872f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 13882f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 13892f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 13902f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 13912f7816d4SBarry Smith jrow++; 13922f7816d4SBarry Smith } 13932f7816d4SBarry Smith y[16*i] = sum1; 13942f7816d4SBarry Smith y[16*i+1] = sum2; 13952f7816d4SBarry Smith y[16*i+2] = sum3; 13962f7816d4SBarry Smith y[16*i+3] = sum4; 13972f7816d4SBarry Smith y[16*i+4] = sum5; 13982f7816d4SBarry Smith y[16*i+5] = sum6; 13992f7816d4SBarry Smith y[16*i+6] = sum7; 14002f7816d4SBarry Smith y[16*i+7] = sum8; 14012f7816d4SBarry Smith y[16*i+8] = sum9; 14022f7816d4SBarry Smith y[16*i+9] = sum10; 14032f7816d4SBarry Smith y[16*i+10] = sum11; 14042f7816d4SBarry Smith y[16*i+11] = sum12; 14052f7816d4SBarry Smith y[16*i+12] = sum13; 14062f7816d4SBarry Smith y[16*i+13] = sum14; 14072f7816d4SBarry Smith y[16*i+14] = sum15; 14082f7816d4SBarry Smith y[16*i+15] = sum16; 14092f7816d4SBarry Smith } 14102f7816d4SBarry Smith 14112f7816d4SBarry Smith PetscLogFlops(32*a->nz - 16*m); 14122f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 14132f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 14142f7816d4SBarry Smith PetscFunctionReturn(0); 14152f7816d4SBarry Smith } 14162f7816d4SBarry Smith 14172f7816d4SBarry Smith #undef __FUNCT__ 14182f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 14192f7816d4SBarry Smith int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 14202f7816d4SBarry Smith { 14212f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14222f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14232f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 14242f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1425bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 14262f7816d4SBarry Smith 14272f7816d4SBarry Smith PetscFunctionBegin; 14282f7816d4SBarry Smith ierr = VecSet(&zero,yy);CHKERRQ(ierr); 14292f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 14302f7816d4SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1431bfec09a0SHong Zhang 14322f7816d4SBarry Smith for (i=0; i<m; i++) { 1433bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1434bfec09a0SHong Zhang v = a->a + a->i[i] ; 14352f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 14362f7816d4SBarry Smith alpha1 = x[16*i]; 14372f7816d4SBarry Smith alpha2 = x[16*i+1]; 14382f7816d4SBarry Smith alpha3 = x[16*i+2]; 14392f7816d4SBarry Smith alpha4 = x[16*i+3]; 14402f7816d4SBarry Smith alpha5 = x[16*i+4]; 14412f7816d4SBarry Smith alpha6 = x[16*i+5]; 14422f7816d4SBarry Smith alpha7 = x[16*i+6]; 14432f7816d4SBarry Smith alpha8 = x[16*i+7]; 14442f7816d4SBarry Smith alpha9 = x[16*i+8]; 14452f7816d4SBarry Smith alpha10 = x[16*i+9]; 14462f7816d4SBarry Smith alpha11 = x[16*i+10]; 14472f7816d4SBarry Smith alpha12 = x[16*i+11]; 14482f7816d4SBarry Smith alpha13 = x[16*i+12]; 14492f7816d4SBarry Smith alpha14 = x[16*i+13]; 14502f7816d4SBarry Smith alpha15 = x[16*i+14]; 14512f7816d4SBarry Smith alpha16 = x[16*i+15]; 14522f7816d4SBarry Smith while (n-->0) { 14532f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 14542f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 14552f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 14562f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 14572f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 14582f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 14592f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 14602f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 14612f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 14622f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 14632f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 14642f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 14652f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 14662f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 14672f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 14682f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 14692f7816d4SBarry Smith idx++; v++; 14702f7816d4SBarry Smith } 14712f7816d4SBarry Smith } 14722f7816d4SBarry Smith PetscLogFlops(32*a->nz - 16*b->AIJ->n); 14732f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 14742f7816d4SBarry Smith ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 14752f7816d4SBarry Smith PetscFunctionReturn(0); 14762f7816d4SBarry Smith } 14772f7816d4SBarry Smith 14782f7816d4SBarry Smith #undef __FUNCT__ 14792f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 14802f7816d4SBarry Smith int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 14812f7816d4SBarry Smith { 14822f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14832f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14842f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 14852f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1486bfec09a0SHong Zhang int ierr,m = b->AIJ->m,*idx,*ii; 14872f7816d4SBarry Smith int n,i,jrow,j; 14882f7816d4SBarry Smith 14892f7816d4SBarry Smith PetscFunctionBegin; 14902f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 14912f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 14922f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 14932f7816d4SBarry Smith idx = a->j; 14942f7816d4SBarry Smith v = a->a; 14952f7816d4SBarry Smith ii = a->i; 14962f7816d4SBarry Smith 14972f7816d4SBarry Smith for (i=0; i<m; i++) { 14982f7816d4SBarry Smith jrow = ii[i]; 14992f7816d4SBarry Smith n = ii[i+1] - jrow; 15002f7816d4SBarry Smith sum1 = 0.0; 15012f7816d4SBarry Smith sum2 = 0.0; 15022f7816d4SBarry Smith sum3 = 0.0; 15032f7816d4SBarry Smith sum4 = 0.0; 15042f7816d4SBarry Smith sum5 = 0.0; 15052f7816d4SBarry Smith sum6 = 0.0; 15062f7816d4SBarry Smith sum7 = 0.0; 15072f7816d4SBarry Smith sum8 = 0.0; 15082f7816d4SBarry Smith sum9 = 0.0; 15092f7816d4SBarry Smith sum10 = 0.0; 15102f7816d4SBarry Smith sum11 = 0.0; 15112f7816d4SBarry Smith sum12 = 0.0; 15122f7816d4SBarry Smith sum13 = 0.0; 15132f7816d4SBarry Smith sum14 = 0.0; 15142f7816d4SBarry Smith sum15 = 0.0; 15152f7816d4SBarry Smith sum16 = 0.0; 15162f7816d4SBarry Smith for (j=0; j<n; j++) { 15172f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 15182f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 15192f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 15202f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 15212f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 15222f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 15232f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 15242f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 15252f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 15262f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 15272f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 15282f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 15292f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 15302f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 15312f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 15322f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 15332f7816d4SBarry Smith jrow++; 15342f7816d4SBarry Smith } 15352f7816d4SBarry Smith y[16*i] += sum1; 15362f7816d4SBarry Smith y[16*i+1] += sum2; 15372f7816d4SBarry Smith y[16*i+2] += sum3; 15382f7816d4SBarry Smith y[16*i+3] += sum4; 15392f7816d4SBarry Smith y[16*i+4] += sum5; 15402f7816d4SBarry Smith y[16*i+5] += sum6; 15412f7816d4SBarry Smith y[16*i+6] += sum7; 15422f7816d4SBarry Smith y[16*i+7] += sum8; 15432f7816d4SBarry Smith y[16*i+8] += sum9; 15442f7816d4SBarry Smith y[16*i+9] += sum10; 15452f7816d4SBarry Smith y[16*i+10] += sum11; 15462f7816d4SBarry Smith y[16*i+11] += sum12; 15472f7816d4SBarry Smith y[16*i+12] += sum13; 15482f7816d4SBarry Smith y[16*i+13] += sum14; 15492f7816d4SBarry Smith y[16*i+14] += sum15; 15502f7816d4SBarry Smith y[16*i+15] += sum16; 15512f7816d4SBarry Smith } 15522f7816d4SBarry Smith 15532f7816d4SBarry Smith PetscLogFlops(32*a->nz); 15542f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 15552f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 15562f7816d4SBarry Smith PetscFunctionReturn(0); 15572f7816d4SBarry Smith } 15582f7816d4SBarry Smith 15592f7816d4SBarry Smith #undef __FUNCT__ 15602f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 15612f7816d4SBarry Smith int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 15622f7816d4SBarry Smith { 15632f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15642f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15652f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 15662f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1567bfec09a0SHong Zhang int ierr,m = b->AIJ->m,n,i,*idx; 15682f7816d4SBarry Smith 15692f7816d4SBarry Smith PetscFunctionBegin; 15702f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15712f7816d4SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 15722f7816d4SBarry Smith ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 15732f7816d4SBarry Smith for (i=0; i<m; i++) { 1574bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1575bfec09a0SHong Zhang v = a->a + a->i[i] ; 15762f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 15772f7816d4SBarry Smith alpha1 = x[16*i]; 15782f7816d4SBarry Smith alpha2 = x[16*i+1]; 15792f7816d4SBarry Smith alpha3 = x[16*i+2]; 15802f7816d4SBarry Smith alpha4 = x[16*i+3]; 15812f7816d4SBarry Smith alpha5 = x[16*i+4]; 15822f7816d4SBarry Smith alpha6 = x[16*i+5]; 15832f7816d4SBarry Smith alpha7 = x[16*i+6]; 15842f7816d4SBarry Smith alpha8 = x[16*i+7]; 15852f7816d4SBarry Smith alpha9 = x[16*i+8]; 15862f7816d4SBarry Smith alpha10 = x[16*i+9]; 15872f7816d4SBarry Smith alpha11 = x[16*i+10]; 15882f7816d4SBarry Smith alpha12 = x[16*i+11]; 15892f7816d4SBarry Smith alpha13 = x[16*i+12]; 15902f7816d4SBarry Smith alpha14 = x[16*i+13]; 15912f7816d4SBarry Smith alpha15 = x[16*i+14]; 15922f7816d4SBarry Smith alpha16 = x[16*i+15]; 15932f7816d4SBarry Smith while (n-->0) { 15942f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 15952f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 15962f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 15972f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 15982f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 15992f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 16002f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 16012f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 16022f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 16032f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 16042f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 16052f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 16062f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 16072f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 16082f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 16092f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 16102f7816d4SBarry Smith idx++; v++; 16112f7816d4SBarry Smith } 16122f7816d4SBarry Smith } 16132f7816d4SBarry Smith PetscLogFlops(32*a->nz); 16142f7816d4SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 16152f7816d4SBarry Smith ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 161666ed3db0SBarry Smith PetscFunctionReturn(0); 161766ed3db0SBarry Smith } 161866ed3db0SBarry Smith 1619f4a53059SBarry Smith /*===================================================================================*/ 16204a2ae208SSatish Balay #undef __FUNCT__ 16214a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1622f4a53059SBarry Smith int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1623f4a53059SBarry Smith { 16244cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1625f4a53059SBarry Smith int ierr; 1626f4a53059SBarry Smith PetscFunctionBegin; 1627f4a53059SBarry Smith 1628f4a53059SBarry Smith /* start the scatter */ 1629f4a53059SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 16304cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1631f4a53059SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 16324cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1633f4a53059SBarry Smith PetscFunctionReturn(0); 1634f4a53059SBarry Smith } 1635f4a53059SBarry Smith 16364a2ae208SSatish Balay #undef __FUNCT__ 16374a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 16384cb9d9b8SBarry Smith int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 16394cb9d9b8SBarry Smith { 16404cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 16414cb9d9b8SBarry Smith int ierr; 16424cb9d9b8SBarry Smith PetscFunctionBegin; 16434cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 16444cb9d9b8SBarry Smith ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 16454cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 16464cb9d9b8SBarry Smith ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 16474cb9d9b8SBarry Smith PetscFunctionReturn(0); 16484cb9d9b8SBarry Smith } 16494cb9d9b8SBarry Smith 16504a2ae208SSatish Balay #undef __FUNCT__ 16514a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1652d72c5c08SBarry Smith int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 16534cb9d9b8SBarry Smith { 16544cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 16554cb9d9b8SBarry Smith int ierr; 16564cb9d9b8SBarry Smith PetscFunctionBegin; 16574cb9d9b8SBarry Smith 16584cb9d9b8SBarry Smith /* start the scatter */ 16594cb9d9b8SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1660d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 16614cb9d9b8SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1662d72c5c08SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 16634cb9d9b8SBarry Smith PetscFunctionReturn(0); 16644cb9d9b8SBarry Smith } 16654cb9d9b8SBarry Smith 16664a2ae208SSatish Balay #undef __FUNCT__ 16674a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1668d72c5c08SBarry Smith int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 16694cb9d9b8SBarry Smith { 16704cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 16714cb9d9b8SBarry Smith int ierr; 16724cb9d9b8SBarry Smith PetscFunctionBegin; 16734cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1674d72c5c08SBarry Smith ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1675d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1676d72c5c08SBarry Smith ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 16774cb9d9b8SBarry Smith PetscFunctionReturn(0); 16784cb9d9b8SBarry Smith } 16794cb9d9b8SBarry Smith 1680bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 16815983afb6SSatish Balay /*MC 16820bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 16830bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 16840bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 16850bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 16860bad9183SKris Buschelman 16870bad9183SKris Buschelman Operations provided: 16880bad9183SKris Buschelman . MatMult 16890bad9183SKris Buschelman . MatMultTranspose 16900bad9183SKris Buschelman . MatMultAdd 16910bad9183SKris Buschelman . MatMultTransposeAdd 16920bad9183SKris Buschelman 16930bad9183SKris Buschelman Level: advanced 16940bad9183SKris Buschelman 16950bad9183SKris Buschelman M*/ 16964a2ae208SSatish Balay #undef __FUNCT__ 16974a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 1698cd3bbe55SBarry Smith int MatCreateMAIJ(Mat A,int dof,Mat *maij) 169982b1193eSBarry Smith { 1700f4a53059SBarry Smith int ierr,size,n; 17014cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 170282b1193eSBarry Smith Mat B; 170382b1193eSBarry Smith 170482b1193eSBarry Smith PetscFunctionBegin; 1705d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1706d72c5c08SBarry Smith 1707d72c5c08SBarry Smith if (dof == 1) { 1708d72c5c08SBarry Smith *maij = A; 1709d72c5c08SBarry Smith } else { 171083903688SBarry Smith ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1711cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1712d72c5c08SBarry Smith 1713f4a53059SBarry Smith ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1714f4a53059SBarry Smith if (size == 1) { 1715b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 17164cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 1717b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 1718b9b97703SBarry Smith b->dof = dof; 17194cb9d9b8SBarry Smith b->AIJ = A; 1720d72c5c08SBarry Smith if (dof == 2) { 1721cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1722cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1723cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1724cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1725bcc973b7SBarry Smith } else if (dof == 3) { 1726bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1727bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1728bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1729bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1730bcc973b7SBarry Smith } else if (dof == 4) { 1731bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1732bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1733bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1734bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1735f9fae5adSBarry Smith } else if (dof == 5) { 1736f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1737f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1738f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1739f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 17406cd98798SBarry Smith } else if (dof == 6) { 17416cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 17426cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 17436cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 17446cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 174566ed3db0SBarry Smith } else if (dof == 8) { 174666ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 174766ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 174866ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 174966ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 17502b692628SMatthew Knepley } else if (dof == 9) { 17512b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 17522b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 17532b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 17542b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 17552f7816d4SBarry Smith } else if (dof == 16) { 17562f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 17572f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 17582f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 17592f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 176082b1193eSBarry Smith } else { 176166ed3db0SBarry Smith SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 176282b1193eSBarry Smith } 1763f4a53059SBarry Smith } else { 1764f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1765f4a53059SBarry Smith IS from,to; 1766f4a53059SBarry Smith Vec gvec; 1767f4a53059SBarry Smith int *garray,i; 1768f4a53059SBarry Smith 1769b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1770d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 1771b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 1772b9b97703SBarry Smith b->dof = dof; 1773b9b97703SBarry Smith b->A = A; 17744cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 17754cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 17764cb9d9b8SBarry Smith 1777f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1778f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1779f4a53059SBarry Smith 1780f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 1781b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1782f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1783f4a53059SBarry Smith ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1784f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 1785f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1786f4a53059SBarry Smith 1787f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 1788f4a53059SBarry Smith ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1789f4a53059SBarry Smith 1790f4a53059SBarry Smith /* generate the scatter context */ 1791f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1792f4a53059SBarry Smith 1793f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 1794f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 1795f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 1796f4a53059SBarry Smith 1797f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 17984cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 17994cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 18004cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1801f4a53059SBarry Smith } 1802cd3bbe55SBarry Smith *maij = B; 1803d72c5c08SBarry Smith } 180482b1193eSBarry Smith PetscFunctionReturn(0); 180582b1193eSBarry Smith } 180682b1193eSBarry Smith 180782b1193eSBarry Smith 180882b1193eSBarry Smith 180982b1193eSBarry Smith 181082b1193eSBarry Smith 181182b1193eSBarry Smith 181282b1193eSBarry Smith 181382b1193eSBarry Smith 181482b1193eSBarry Smith 181582b1193eSBarry Smith 181682b1193eSBarry Smith 181782b1193eSBarry Smith 1818