1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 382b1193eSBarry Smith /* 4cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 5cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 6cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 7cd3bbe55SBarry Smith independently. 8cd3bbe55SBarry Smith 9cd3bbe55SBarry Smith We provide: 10cd3bbe55SBarry Smith MatMult() 11cd3bbe55SBarry Smith MatMultTranspose() 12cd3bbe55SBarry Smith MatMultTransposeAdd() 13cd3bbe55SBarry Smith MatMultAdd() 14cd3bbe55SBarry Smith and 15cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 16f4a53059SBarry Smith 17f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1882b1193eSBarry Smith */ 1982b1193eSBarry Smith 20be911618SKris Buschelman #include "src/mat/impls/maij/maij.h" 217ba1a0bfSKris Buschelman #include "src/mat/utils/freespace.h" 221d8d5f9aSSatish Balay #include "private/vecimpl.h" 2382b1193eSBarry Smith 244a2ae208SSatish Balay #undef __FUNCT__ 254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 26be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27b9b97703SBarry Smith { 28dfbe8321SBarry Smith PetscErrorCode ierr; 29b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 30b9b97703SBarry Smith 31b9b97703SBarry Smith PetscFunctionBegin; 32b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34b9b97703SBarry Smith if (ismpimaij) { 35b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->A; 38b9b97703SBarry Smith } else if (isseqmaij) { 39b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40b9b97703SBarry Smith 41b9b97703SBarry Smith *B = b->AIJ; 42b9b97703SBarry Smith } else { 43b9b97703SBarry Smith *B = A; 44b9b97703SBarry Smith } 45b9b97703SBarry Smith PetscFunctionReturn(0); 46b9b97703SBarry Smith } 47b9b97703SBarry Smith 484a2ae208SSatish Balay #undef __FUNCT__ 494a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 50be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51b9b97703SBarry Smith { 52dfbe8321SBarry Smith PetscErrorCode ierr; 53b9b97703SBarry Smith Mat Aij; 54b9b97703SBarry Smith 55b9b97703SBarry Smith PetscFunctionBegin; 56b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58b9b97703SBarry Smith PetscFunctionReturn(0); 59b9b97703SBarry Smith } 60b9b97703SBarry Smith 614a2ae208SSatish Balay #undef __FUNCT__ 624a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 63dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 6482b1193eSBarry Smith { 65dfbe8321SBarry Smith PetscErrorCode ierr; 664cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6782b1193eSBarry Smith 6882b1193eSBarry Smith PetscFunctionBegin; 69cd3bbe55SBarry Smith if (b->AIJ) { 70cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 7182b1193eSBarry Smith } 724cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 734cb9d9b8SBarry Smith PetscFunctionReturn(0); 744cb9d9b8SBarry Smith } 754cb9d9b8SBarry Smith 764a2ae208SSatish Balay #undef __FUNCT__ 770fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 780fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 790fd73130SBarry Smith { 800fd73130SBarry Smith PetscErrorCode ierr; 810fd73130SBarry Smith Mat B; 820fd73130SBarry Smith 830fd73130SBarry Smith PetscFunctionBegin; 84ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 850fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 860fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 870fd73130SBarry Smith PetscFunctionReturn(0); 880fd73130SBarry Smith } 890fd73130SBarry Smith 900fd73130SBarry Smith #undef __FUNCT__ 910fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 920fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 930fd73130SBarry Smith { 940fd73130SBarry Smith PetscErrorCode ierr; 950fd73130SBarry Smith Mat B; 960fd73130SBarry Smith 970fd73130SBarry Smith PetscFunctionBegin; 98ceb03754SKris Buschelman ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 990fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1000fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1010fd73130SBarry Smith PetscFunctionReturn(0); 1020fd73130SBarry Smith } 1030fd73130SBarry Smith 1040fd73130SBarry Smith #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 106dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1074cb9d9b8SBarry Smith { 108dfbe8321SBarry Smith PetscErrorCode ierr; 1094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1104cb9d9b8SBarry Smith 1114cb9d9b8SBarry Smith PetscFunctionBegin; 1124cb9d9b8SBarry Smith if (b->AIJ) { 1134cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 1144cb9d9b8SBarry Smith } 115f4a53059SBarry Smith if (b->OAIJ) { 116f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117f4a53059SBarry Smith } 118b9b97703SBarry Smith if (b->A) { 119b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 120b9b97703SBarry Smith } 121f4a53059SBarry Smith if (b->ctx) { 122f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123f4a53059SBarry Smith } 124f4a53059SBarry Smith if (b->w) { 125f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 126f4a53059SBarry Smith } 127cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 128b9b97703SBarry Smith PetscFunctionReturn(0); 129b9b97703SBarry Smith } 130b9b97703SBarry Smith 1310bad9183SKris Buschelman /*MC 132fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1330bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1340bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1350bad9183SKris Buschelman 1360bad9183SKris Buschelman Operations provided: 1370bad9183SKris Buschelman . MatMult 1380bad9183SKris Buschelman . MatMultTranspose 1390bad9183SKris Buschelman . MatMultAdd 1400bad9183SKris Buschelman . MatMultTransposeAdd 1410bad9183SKris Buschelman 1420bad9183SKris Buschelman Level: advanced 1430bad9183SKris Buschelman 1440bad9183SKris Buschelman .seealso: MatCreateSeqDense 1450bad9183SKris Buschelman M*/ 1460bad9183SKris Buschelman 14782b1193eSBarry Smith EXTERN_C_BEGIN 1484a2ae208SSatish Balay #undef __FUNCT__ 1494a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 150be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 15182b1193eSBarry Smith { 152dfbe8321SBarry Smith PetscErrorCode ierr; 1534cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 154*64b52464SHong Zhang PetscMPIInt size; 15582b1193eSBarry Smith 15682b1193eSBarry Smith PetscFunctionBegin; 157b0a32e0cSBarry Smith ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 158b0a32e0cSBarry Smith A->data = (void*)b; 159cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 160cd3bbe55SBarry Smith A->factor = 0; 161cd3bbe55SBarry Smith A->mapping = 0; 162f4a53059SBarry Smith 163cd3bbe55SBarry Smith b->AIJ = 0; 164cd3bbe55SBarry Smith b->dof = 0; 165f4a53059SBarry Smith b->OAIJ = 0; 166f4a53059SBarry Smith b->ctx = 0; 167f4a53059SBarry Smith b->w = 0; 168*64b52464SHong Zhang ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 169*64b52464SHong Zhang if (size == 1){ 170*64b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 171*64b52464SHong Zhang } else { 172*64b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 173*64b52464SHong Zhang } 17482b1193eSBarry Smith PetscFunctionReturn(0); 17582b1193eSBarry Smith } 17682b1193eSBarry Smith EXTERN_C_END 17782b1193eSBarry Smith 178cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1794a2ae208SSatish Balay #undef __FUNCT__ 180b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 181dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 18282b1193eSBarry Smith { 1834cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 184bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 186dfbe8321SBarry Smith PetscErrorCode ierr; 187899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 188b24ad042SBarry Smith PetscInt n,i,jrow,j; 18982b1193eSBarry Smith 190bcc973b7SBarry Smith PetscFunctionBegin; 1911ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1921ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 193bcc973b7SBarry Smith idx = a->j; 194bcc973b7SBarry Smith v = a->a; 195bcc973b7SBarry Smith ii = a->i; 196bcc973b7SBarry Smith 197bcc973b7SBarry Smith for (i=0; i<m; i++) { 198bcc973b7SBarry Smith jrow = ii[i]; 199bcc973b7SBarry Smith n = ii[i+1] - jrow; 200bcc973b7SBarry Smith sum1 = 0.0; 201bcc973b7SBarry Smith sum2 = 0.0; 202bcc973b7SBarry Smith for (j=0; j<n; j++) { 203bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 204bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 205bcc973b7SBarry Smith jrow++; 206bcc973b7SBarry Smith } 207bcc973b7SBarry Smith y[2*i] = sum1; 208bcc973b7SBarry Smith y[2*i+1] = sum2; 209bcc973b7SBarry Smith } 210bcc973b7SBarry Smith 211efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 2121ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2131ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 21482b1193eSBarry Smith PetscFunctionReturn(0); 21582b1193eSBarry Smith } 216bcc973b7SBarry Smith 2174a2ae208SSatish Balay #undef __FUNCT__ 218b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 219dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 22082b1193eSBarry Smith { 2214cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 222bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 22387828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 224dfbe8321SBarry Smith PetscErrorCode ierr; 225899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 22682b1193eSBarry Smith 227bcc973b7SBarry Smith PetscFunctionBegin; 2282dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 2291ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2301ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 231bfec09a0SHong Zhang 232bcc973b7SBarry Smith for (i=0; i<m; i++) { 233bfec09a0SHong Zhang idx = a->j + a->i[i] ; 234bfec09a0SHong Zhang v = a->a + a->i[i] ; 235bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 236bcc973b7SBarry Smith alpha1 = x[2*i]; 237bcc973b7SBarry Smith alpha2 = x[2*i+1]; 238bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 239bcc973b7SBarry Smith } 240899cda47SBarry Smith ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 2411ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2421ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 24382b1193eSBarry Smith PetscFunctionReturn(0); 24482b1193eSBarry Smith } 245bcc973b7SBarry Smith 2464a2ae208SSatish Balay #undef __FUNCT__ 247b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 248dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 24982b1193eSBarry Smith { 2504cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 251bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 25287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 253dfbe8321SBarry Smith PetscErrorCode ierr; 254899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 255b24ad042SBarry Smith PetscInt n,i,jrow,j; 25682b1193eSBarry Smith 257bcc973b7SBarry Smith PetscFunctionBegin; 258f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2591ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2601ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 261bcc973b7SBarry Smith idx = a->j; 262bcc973b7SBarry Smith v = a->a; 263bcc973b7SBarry Smith ii = a->i; 264bcc973b7SBarry Smith 265bcc973b7SBarry Smith for (i=0; i<m; i++) { 266bcc973b7SBarry Smith jrow = ii[i]; 267bcc973b7SBarry Smith n = ii[i+1] - jrow; 268bcc973b7SBarry Smith sum1 = 0.0; 269bcc973b7SBarry Smith sum2 = 0.0; 270bcc973b7SBarry Smith for (j=0; j<n; j++) { 271bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 272bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 273bcc973b7SBarry Smith jrow++; 274bcc973b7SBarry Smith } 275bcc973b7SBarry Smith y[2*i] += sum1; 276bcc973b7SBarry Smith y[2*i+1] += sum2; 277bcc973b7SBarry Smith } 278bcc973b7SBarry Smith 279efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 2801ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2811ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 282bcc973b7SBarry Smith PetscFunctionReturn(0); 28382b1193eSBarry Smith } 2844a2ae208SSatish Balay #undef __FUNCT__ 285b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 286dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 28782b1193eSBarry Smith { 2884cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 289bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 29087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 291dfbe8321SBarry Smith PetscErrorCode ierr; 292899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 29382b1193eSBarry Smith 294bcc973b7SBarry Smith PetscFunctionBegin; 295f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2961ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2971ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 298bfec09a0SHong Zhang 299bcc973b7SBarry Smith for (i=0; i<m; i++) { 300bfec09a0SHong Zhang idx = a->j + a->i[i] ; 301bfec09a0SHong Zhang v = a->a + a->i[i] ; 302bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 303bcc973b7SBarry Smith alpha1 = x[2*i]; 304bcc973b7SBarry Smith alpha2 = x[2*i+1]; 305bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 306bcc973b7SBarry Smith } 307899cda47SBarry Smith ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 3081ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3091ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 310bcc973b7SBarry Smith PetscFunctionReturn(0); 31182b1193eSBarry Smith } 312cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3134a2ae208SSatish Balay #undef __FUNCT__ 314b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 315dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 316bcc973b7SBarry Smith { 3174cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 318bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 31987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 320dfbe8321SBarry Smith PetscErrorCode ierr; 321899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 322b24ad042SBarry Smith PetscInt n,i,jrow,j; 32382b1193eSBarry Smith 324bcc973b7SBarry Smith PetscFunctionBegin; 3251ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3261ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 327bcc973b7SBarry Smith idx = a->j; 328bcc973b7SBarry Smith v = a->a; 329bcc973b7SBarry Smith ii = a->i; 330bcc973b7SBarry Smith 331bcc973b7SBarry Smith for (i=0; i<m; i++) { 332bcc973b7SBarry Smith jrow = ii[i]; 333bcc973b7SBarry Smith n = ii[i+1] - jrow; 334bcc973b7SBarry Smith sum1 = 0.0; 335bcc973b7SBarry Smith sum2 = 0.0; 336bcc973b7SBarry Smith sum3 = 0.0; 337bcc973b7SBarry Smith for (j=0; j<n; j++) { 338bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 339bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 340bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 341bcc973b7SBarry Smith jrow++; 342bcc973b7SBarry Smith } 343bcc973b7SBarry Smith y[3*i] = sum1; 344bcc973b7SBarry Smith y[3*i+1] = sum2; 345bcc973b7SBarry Smith y[3*i+2] = sum3; 346bcc973b7SBarry Smith } 347bcc973b7SBarry Smith 348efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 3491ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3501ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 351bcc973b7SBarry Smith PetscFunctionReturn(0); 352bcc973b7SBarry Smith } 353bcc973b7SBarry Smith 3544a2ae208SSatish Balay #undef __FUNCT__ 355b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 356dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 357bcc973b7SBarry Smith { 3584cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 359bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 36087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 361dfbe8321SBarry Smith PetscErrorCode ierr; 362899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 363bcc973b7SBarry Smith 364bcc973b7SBarry Smith PetscFunctionBegin; 3652dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 3661ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3671ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 368bfec09a0SHong Zhang 369bcc973b7SBarry Smith for (i=0; i<m; i++) { 370bfec09a0SHong Zhang idx = a->j + a->i[i]; 371bfec09a0SHong Zhang v = a->a + a->i[i]; 372bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 373bcc973b7SBarry Smith alpha1 = x[3*i]; 374bcc973b7SBarry Smith alpha2 = x[3*i+1]; 375bcc973b7SBarry Smith alpha3 = x[3*i+2]; 376bcc973b7SBarry Smith while (n-->0) { 377bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 378bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 379bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 380bcc973b7SBarry Smith idx++; v++; 381bcc973b7SBarry Smith } 382bcc973b7SBarry Smith } 383899cda47SBarry Smith ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr); 3841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3851ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 386bcc973b7SBarry Smith PetscFunctionReturn(0); 387bcc973b7SBarry Smith } 388bcc973b7SBarry Smith 3894a2ae208SSatish Balay #undef __FUNCT__ 390b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 391dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 392bcc973b7SBarry Smith { 3934cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 394bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 39587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 396dfbe8321SBarry Smith PetscErrorCode ierr; 397899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 398b24ad042SBarry Smith PetscInt n,i,jrow,j; 399bcc973b7SBarry Smith 400bcc973b7SBarry Smith PetscFunctionBegin; 401f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4021ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4031ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 404bcc973b7SBarry Smith idx = a->j; 405bcc973b7SBarry Smith v = a->a; 406bcc973b7SBarry Smith ii = a->i; 407bcc973b7SBarry Smith 408bcc973b7SBarry Smith for (i=0; i<m; i++) { 409bcc973b7SBarry Smith jrow = ii[i]; 410bcc973b7SBarry Smith n = ii[i+1] - jrow; 411bcc973b7SBarry Smith sum1 = 0.0; 412bcc973b7SBarry Smith sum2 = 0.0; 413bcc973b7SBarry Smith sum3 = 0.0; 414bcc973b7SBarry Smith for (j=0; j<n; j++) { 415bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 416bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 417bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 418bcc973b7SBarry Smith jrow++; 419bcc973b7SBarry Smith } 420bcc973b7SBarry Smith y[3*i] += sum1; 421bcc973b7SBarry Smith y[3*i+1] += sum2; 422bcc973b7SBarry Smith y[3*i+2] += sum3; 423bcc973b7SBarry Smith } 424bcc973b7SBarry Smith 425efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4261ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4271ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 428bcc973b7SBarry Smith PetscFunctionReturn(0); 429bcc973b7SBarry Smith } 4304a2ae208SSatish Balay #undef __FUNCT__ 431b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 432dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 433bcc973b7SBarry Smith { 4344cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 435bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 43687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 437dfbe8321SBarry Smith PetscErrorCode ierr; 438899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 439bcc973b7SBarry Smith 440bcc973b7SBarry Smith PetscFunctionBegin; 441f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4421ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4431ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 444bcc973b7SBarry Smith for (i=0; i<m; i++) { 445bfec09a0SHong Zhang idx = a->j + a->i[i] ; 446bfec09a0SHong Zhang v = a->a + a->i[i] ; 447bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 448bcc973b7SBarry Smith alpha1 = x[3*i]; 449bcc973b7SBarry Smith alpha2 = x[3*i+1]; 450bcc973b7SBarry Smith alpha3 = x[3*i+2]; 451bcc973b7SBarry Smith while (n-->0) { 452bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 453bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 454bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 455bcc973b7SBarry Smith idx++; v++; 456bcc973b7SBarry Smith } 457bcc973b7SBarry Smith } 458efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4591ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4601ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 461bcc973b7SBarry Smith PetscFunctionReturn(0); 462bcc973b7SBarry Smith } 463bcc973b7SBarry Smith 464bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4654a2ae208SSatish Balay #undef __FUNCT__ 466b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 467dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 468bcc973b7SBarry Smith { 4694cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 470bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 47187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 472dfbe8321SBarry Smith PetscErrorCode ierr; 473899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 474b24ad042SBarry Smith PetscInt n,i,jrow,j; 475bcc973b7SBarry Smith 476bcc973b7SBarry Smith PetscFunctionBegin; 4771ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4781ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 479bcc973b7SBarry Smith idx = a->j; 480bcc973b7SBarry Smith v = a->a; 481bcc973b7SBarry Smith ii = a->i; 482bcc973b7SBarry Smith 483bcc973b7SBarry Smith for (i=0; i<m; i++) { 484bcc973b7SBarry Smith jrow = ii[i]; 485bcc973b7SBarry Smith n = ii[i+1] - jrow; 486bcc973b7SBarry Smith sum1 = 0.0; 487bcc973b7SBarry Smith sum2 = 0.0; 488bcc973b7SBarry Smith sum3 = 0.0; 489bcc973b7SBarry Smith sum4 = 0.0; 490bcc973b7SBarry Smith for (j=0; j<n; j++) { 491bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 492bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 493bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 494bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 495bcc973b7SBarry Smith jrow++; 496bcc973b7SBarry Smith } 497bcc973b7SBarry Smith y[4*i] = sum1; 498bcc973b7SBarry Smith y[4*i+1] = sum2; 499bcc973b7SBarry Smith y[4*i+2] = sum3; 500bcc973b7SBarry Smith y[4*i+3] = sum4; 501bcc973b7SBarry Smith } 502bcc973b7SBarry Smith 503efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 5041ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5051ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 506bcc973b7SBarry Smith PetscFunctionReturn(0); 507bcc973b7SBarry Smith } 508bcc973b7SBarry Smith 5094a2ae208SSatish Balay #undef __FUNCT__ 510b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 511dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 512bcc973b7SBarry Smith { 5134cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 514bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 51587828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 516dfbe8321SBarry Smith PetscErrorCode ierr; 517899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 518bcc973b7SBarry Smith 519bcc973b7SBarry Smith PetscFunctionBegin; 5202dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 5211ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5221ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 523bcc973b7SBarry Smith for (i=0; i<m; i++) { 524bfec09a0SHong Zhang idx = a->j + a->i[i] ; 525bfec09a0SHong Zhang v = a->a + a->i[i] ; 526bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 527bcc973b7SBarry Smith alpha1 = x[4*i]; 528bcc973b7SBarry Smith alpha2 = x[4*i+1]; 529bcc973b7SBarry Smith alpha3 = x[4*i+2]; 530bcc973b7SBarry Smith alpha4 = x[4*i+3]; 531bcc973b7SBarry Smith while (n-->0) { 532bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 533bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 534bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 535bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 536bcc973b7SBarry Smith idx++; v++; 537bcc973b7SBarry Smith } 538bcc973b7SBarry Smith } 539899cda47SBarry Smith ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 5401ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5411ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 542bcc973b7SBarry Smith PetscFunctionReturn(0); 543bcc973b7SBarry Smith } 544bcc973b7SBarry Smith 5454a2ae208SSatish Balay #undef __FUNCT__ 546b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 547dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 548bcc973b7SBarry Smith { 5494cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 550f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 55187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 552dfbe8321SBarry Smith PetscErrorCode ierr; 553899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 554b24ad042SBarry Smith PetscInt n,i,jrow,j; 555f9fae5adSBarry Smith 556f9fae5adSBarry Smith PetscFunctionBegin; 557f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5581ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5591ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 560f9fae5adSBarry Smith idx = a->j; 561f9fae5adSBarry Smith v = a->a; 562f9fae5adSBarry Smith ii = a->i; 563f9fae5adSBarry Smith 564f9fae5adSBarry Smith for (i=0; i<m; i++) { 565f9fae5adSBarry Smith jrow = ii[i]; 566f9fae5adSBarry Smith n = ii[i+1] - jrow; 567f9fae5adSBarry Smith sum1 = 0.0; 568f9fae5adSBarry Smith sum2 = 0.0; 569f9fae5adSBarry Smith sum3 = 0.0; 570f9fae5adSBarry Smith sum4 = 0.0; 571f9fae5adSBarry Smith for (j=0; j<n; j++) { 572f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 573f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 574f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 575f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 576f9fae5adSBarry Smith jrow++; 577f9fae5adSBarry Smith } 578f9fae5adSBarry Smith y[4*i] += sum1; 579f9fae5adSBarry Smith y[4*i+1] += sum2; 580f9fae5adSBarry Smith y[4*i+2] += sum3; 581f9fae5adSBarry Smith y[4*i+3] += sum4; 582f9fae5adSBarry Smith } 583f9fae5adSBarry Smith 584efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 5851ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5861ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 587f9fae5adSBarry Smith PetscFunctionReturn(0); 588bcc973b7SBarry Smith } 5894a2ae208SSatish Balay #undef __FUNCT__ 590b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 591dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 592bcc973b7SBarry Smith { 5934cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 594f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 59587828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 596dfbe8321SBarry Smith PetscErrorCode ierr; 597899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 598f9fae5adSBarry Smith 599f9fae5adSBarry Smith PetscFunctionBegin; 600f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6011ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6021ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 603bfec09a0SHong Zhang 604f9fae5adSBarry Smith for (i=0; i<m; i++) { 605bfec09a0SHong Zhang idx = a->j + a->i[i] ; 606bfec09a0SHong Zhang v = a->a + a->i[i] ; 607f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 608f9fae5adSBarry Smith alpha1 = x[4*i]; 609f9fae5adSBarry Smith alpha2 = x[4*i+1]; 610f9fae5adSBarry Smith alpha3 = x[4*i+2]; 611f9fae5adSBarry Smith alpha4 = x[4*i+3]; 612f9fae5adSBarry Smith while (n-->0) { 613f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 614f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 615f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 616f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 617f9fae5adSBarry Smith idx++; v++; 618f9fae5adSBarry Smith } 619f9fae5adSBarry Smith } 620899cda47SBarry Smith ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 6211ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6221ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 623f9fae5adSBarry Smith PetscFunctionReturn(0); 624f9fae5adSBarry Smith } 625f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6266cd98798SBarry Smith 6274a2ae208SSatish Balay #undef __FUNCT__ 628b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 629dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 630f9fae5adSBarry Smith { 6314cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 632f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 63387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 634dfbe8321SBarry Smith PetscErrorCode ierr; 635899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 636b24ad042SBarry Smith PetscInt n,i,jrow,j; 637f9fae5adSBarry Smith 638f9fae5adSBarry Smith PetscFunctionBegin; 6391ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6401ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 641f9fae5adSBarry Smith idx = a->j; 642f9fae5adSBarry Smith v = a->a; 643f9fae5adSBarry Smith ii = a->i; 644f9fae5adSBarry Smith 645f9fae5adSBarry Smith for (i=0; i<m; i++) { 646f9fae5adSBarry Smith jrow = ii[i]; 647f9fae5adSBarry Smith n = ii[i+1] - jrow; 648f9fae5adSBarry Smith sum1 = 0.0; 649f9fae5adSBarry Smith sum2 = 0.0; 650f9fae5adSBarry Smith sum3 = 0.0; 651f9fae5adSBarry Smith sum4 = 0.0; 652f9fae5adSBarry Smith sum5 = 0.0; 653f9fae5adSBarry Smith for (j=0; j<n; j++) { 654f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 655f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 656f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 657f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 658f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 659f9fae5adSBarry Smith jrow++; 660f9fae5adSBarry Smith } 661f9fae5adSBarry Smith y[5*i] = sum1; 662f9fae5adSBarry Smith y[5*i+1] = sum2; 663f9fae5adSBarry Smith y[5*i+2] = sum3; 664f9fae5adSBarry Smith y[5*i+3] = sum4; 665f9fae5adSBarry Smith y[5*i+4] = sum5; 666f9fae5adSBarry Smith } 667f9fae5adSBarry Smith 668efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 6691ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6701ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 671f9fae5adSBarry Smith PetscFunctionReturn(0); 672f9fae5adSBarry Smith } 673f9fae5adSBarry Smith 6744a2ae208SSatish Balay #undef __FUNCT__ 675b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 676dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 677f9fae5adSBarry Smith { 6784cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 679f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 68087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 681dfbe8321SBarry Smith PetscErrorCode ierr; 682899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 683f9fae5adSBarry Smith 684f9fae5adSBarry Smith PetscFunctionBegin; 6852dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 6861ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6871ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 688bfec09a0SHong Zhang 689f9fae5adSBarry Smith for (i=0; i<m; i++) { 690bfec09a0SHong Zhang idx = a->j + a->i[i] ; 691bfec09a0SHong Zhang v = a->a + a->i[i] ; 692f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 693f9fae5adSBarry Smith alpha1 = x[5*i]; 694f9fae5adSBarry Smith alpha2 = x[5*i+1]; 695f9fae5adSBarry Smith alpha3 = x[5*i+2]; 696f9fae5adSBarry Smith alpha4 = x[5*i+3]; 697f9fae5adSBarry Smith alpha5 = x[5*i+4]; 698f9fae5adSBarry Smith while (n-->0) { 699f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 700f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 701f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 702f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 703f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 704f9fae5adSBarry Smith idx++; v++; 705f9fae5adSBarry Smith } 706f9fae5adSBarry Smith } 707899cda47SBarry Smith ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr); 7081ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7091ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 710f9fae5adSBarry Smith PetscFunctionReturn(0); 711f9fae5adSBarry Smith } 712f9fae5adSBarry Smith 7134a2ae208SSatish Balay #undef __FUNCT__ 714b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 715dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 716f9fae5adSBarry Smith { 7174cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 718f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 71987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 720dfbe8321SBarry Smith PetscErrorCode ierr; 721899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 722b24ad042SBarry Smith PetscInt n,i,jrow,j; 723f9fae5adSBarry Smith 724f9fae5adSBarry Smith PetscFunctionBegin; 725f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7261ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7271ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 728f9fae5adSBarry Smith idx = a->j; 729f9fae5adSBarry Smith v = a->a; 730f9fae5adSBarry Smith ii = a->i; 731f9fae5adSBarry Smith 732f9fae5adSBarry Smith for (i=0; i<m; i++) { 733f9fae5adSBarry Smith jrow = ii[i]; 734f9fae5adSBarry Smith n = ii[i+1] - jrow; 735f9fae5adSBarry Smith sum1 = 0.0; 736f9fae5adSBarry Smith sum2 = 0.0; 737f9fae5adSBarry Smith sum3 = 0.0; 738f9fae5adSBarry Smith sum4 = 0.0; 739f9fae5adSBarry Smith sum5 = 0.0; 740f9fae5adSBarry Smith for (j=0; j<n; j++) { 741f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 742f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 743f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 744f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 745f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 746f9fae5adSBarry Smith jrow++; 747f9fae5adSBarry Smith } 748f9fae5adSBarry Smith y[5*i] += sum1; 749f9fae5adSBarry Smith y[5*i+1] += sum2; 750f9fae5adSBarry Smith y[5*i+2] += sum3; 751f9fae5adSBarry Smith y[5*i+3] += sum4; 752f9fae5adSBarry Smith y[5*i+4] += sum5; 753f9fae5adSBarry Smith } 754f9fae5adSBarry Smith 755efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7561ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7571ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 758f9fae5adSBarry Smith PetscFunctionReturn(0); 759f9fae5adSBarry Smith } 760f9fae5adSBarry Smith 7614a2ae208SSatish Balay #undef __FUNCT__ 762b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 763dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 764f9fae5adSBarry Smith { 7654cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 766f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 76787828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 768dfbe8321SBarry Smith PetscErrorCode ierr; 769899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 770f9fae5adSBarry Smith 771f9fae5adSBarry Smith PetscFunctionBegin; 772f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7731ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7741ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 775bfec09a0SHong Zhang 776f9fae5adSBarry Smith for (i=0; i<m; i++) { 777bfec09a0SHong Zhang idx = a->j + a->i[i] ; 778bfec09a0SHong Zhang v = a->a + a->i[i] ; 779f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 780f9fae5adSBarry Smith alpha1 = x[5*i]; 781f9fae5adSBarry Smith alpha2 = x[5*i+1]; 782f9fae5adSBarry Smith alpha3 = x[5*i+2]; 783f9fae5adSBarry Smith alpha4 = x[5*i+3]; 784f9fae5adSBarry Smith alpha5 = x[5*i+4]; 785f9fae5adSBarry Smith while (n-->0) { 786f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 787f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 788f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 789f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 790f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 791f9fae5adSBarry Smith idx++; v++; 792f9fae5adSBarry Smith } 793f9fae5adSBarry Smith } 794efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7951ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7961ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 797f9fae5adSBarry Smith PetscFunctionReturn(0); 798bcc973b7SBarry Smith } 799bcc973b7SBarry Smith 8006cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 8014a2ae208SSatish Balay #undef __FUNCT__ 802b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 803dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8046cd98798SBarry Smith { 8056cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8066cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 80787828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 808dfbe8321SBarry Smith PetscErrorCode ierr; 809899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 810b24ad042SBarry Smith PetscInt n,i,jrow,j; 8116cd98798SBarry Smith 8126cd98798SBarry Smith PetscFunctionBegin; 8131ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8141ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8156cd98798SBarry Smith idx = a->j; 8166cd98798SBarry Smith v = a->a; 8176cd98798SBarry Smith ii = a->i; 8186cd98798SBarry Smith 8196cd98798SBarry Smith for (i=0; i<m; i++) { 8206cd98798SBarry Smith jrow = ii[i]; 8216cd98798SBarry Smith n = ii[i+1] - jrow; 8226cd98798SBarry Smith sum1 = 0.0; 8236cd98798SBarry Smith sum2 = 0.0; 8246cd98798SBarry Smith sum3 = 0.0; 8256cd98798SBarry Smith sum4 = 0.0; 8266cd98798SBarry Smith sum5 = 0.0; 8276cd98798SBarry Smith sum6 = 0.0; 8286cd98798SBarry Smith for (j=0; j<n; j++) { 8296cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8306cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8316cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8326cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8336cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8346cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8356cd98798SBarry Smith jrow++; 8366cd98798SBarry Smith } 8376cd98798SBarry Smith y[6*i] = sum1; 8386cd98798SBarry Smith y[6*i+1] = sum2; 8396cd98798SBarry Smith y[6*i+2] = sum3; 8406cd98798SBarry Smith y[6*i+3] = sum4; 8416cd98798SBarry Smith y[6*i+4] = sum5; 8426cd98798SBarry Smith y[6*i+5] = sum6; 8436cd98798SBarry Smith } 8446cd98798SBarry Smith 845efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 8461ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8471ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8486cd98798SBarry Smith PetscFunctionReturn(0); 8496cd98798SBarry Smith } 8506cd98798SBarry Smith 8514a2ae208SSatish Balay #undef __FUNCT__ 852b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 853dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8546cd98798SBarry Smith { 8556cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8566cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 85787828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 858dfbe8321SBarry Smith PetscErrorCode ierr; 859899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 8606cd98798SBarry Smith 8616cd98798SBarry Smith PetscFunctionBegin; 8622dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 8631ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8641ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 865bfec09a0SHong Zhang 8666cd98798SBarry Smith for (i=0; i<m; i++) { 867bfec09a0SHong Zhang idx = a->j + a->i[i] ; 868bfec09a0SHong Zhang v = a->a + a->i[i] ; 8696cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8706cd98798SBarry Smith alpha1 = x[6*i]; 8716cd98798SBarry Smith alpha2 = x[6*i+1]; 8726cd98798SBarry Smith alpha3 = x[6*i+2]; 8736cd98798SBarry Smith alpha4 = x[6*i+3]; 8746cd98798SBarry Smith alpha5 = x[6*i+4]; 8756cd98798SBarry Smith alpha6 = x[6*i+5]; 8766cd98798SBarry Smith while (n-->0) { 8776cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8786cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8796cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8806cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8816cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8826cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8836cd98798SBarry Smith idx++; v++; 8846cd98798SBarry Smith } 8856cd98798SBarry Smith } 886899cda47SBarry Smith ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr); 8871ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8881ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8896cd98798SBarry Smith PetscFunctionReturn(0); 8906cd98798SBarry Smith } 8916cd98798SBarry Smith 8924a2ae208SSatish Balay #undef __FUNCT__ 893b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 894dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8956cd98798SBarry Smith { 8966cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8976cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 89887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 899dfbe8321SBarry Smith PetscErrorCode ierr; 900899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 901b24ad042SBarry Smith PetscInt n,i,jrow,j; 9026cd98798SBarry Smith 9036cd98798SBarry Smith PetscFunctionBegin; 9046cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9051ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9061ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9076cd98798SBarry Smith idx = a->j; 9086cd98798SBarry Smith v = a->a; 9096cd98798SBarry Smith ii = a->i; 9106cd98798SBarry Smith 9116cd98798SBarry Smith for (i=0; i<m; i++) { 9126cd98798SBarry Smith jrow = ii[i]; 9136cd98798SBarry Smith n = ii[i+1] - jrow; 9146cd98798SBarry Smith sum1 = 0.0; 9156cd98798SBarry Smith sum2 = 0.0; 9166cd98798SBarry Smith sum3 = 0.0; 9176cd98798SBarry Smith sum4 = 0.0; 9186cd98798SBarry Smith sum5 = 0.0; 9196cd98798SBarry Smith sum6 = 0.0; 9206cd98798SBarry Smith for (j=0; j<n; j++) { 9216cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9226cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9236cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9246cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9256cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9266cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9276cd98798SBarry Smith jrow++; 9286cd98798SBarry Smith } 9296cd98798SBarry Smith y[6*i] += sum1; 9306cd98798SBarry Smith y[6*i+1] += sum2; 9316cd98798SBarry Smith y[6*i+2] += sum3; 9326cd98798SBarry Smith y[6*i+3] += sum4; 9336cd98798SBarry Smith y[6*i+4] += sum5; 9346cd98798SBarry Smith y[6*i+5] += sum6; 9356cd98798SBarry Smith } 9366cd98798SBarry Smith 937efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9381ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9391ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9406cd98798SBarry Smith PetscFunctionReturn(0); 9416cd98798SBarry Smith } 9426cd98798SBarry Smith 9434a2ae208SSatish Balay #undef __FUNCT__ 944b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 945dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9466cd98798SBarry Smith { 9476cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9486cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 94987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 950dfbe8321SBarry Smith PetscErrorCode ierr; 951899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 9526cd98798SBarry Smith 9536cd98798SBarry Smith PetscFunctionBegin; 9546cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9551ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9561ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 957bfec09a0SHong Zhang 9586cd98798SBarry Smith for (i=0; i<m; i++) { 959bfec09a0SHong Zhang idx = a->j + a->i[i] ; 960bfec09a0SHong Zhang v = a->a + a->i[i] ; 9616cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9626cd98798SBarry Smith alpha1 = x[6*i]; 9636cd98798SBarry Smith alpha2 = x[6*i+1]; 9646cd98798SBarry Smith alpha3 = x[6*i+2]; 9656cd98798SBarry Smith alpha4 = x[6*i+3]; 9666cd98798SBarry Smith alpha5 = x[6*i+4]; 9676cd98798SBarry Smith alpha6 = x[6*i+5]; 9686cd98798SBarry Smith while (n-->0) { 9696cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9706cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9716cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9726cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9736cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9746cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9756cd98798SBarry Smith idx++; v++; 9766cd98798SBarry Smith } 9776cd98798SBarry Smith } 978efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9791ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9801ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9816cd98798SBarry Smith PetscFunctionReturn(0); 9826cd98798SBarry Smith } 9836cd98798SBarry Smith 98466ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 98566ed3db0SBarry Smith #undef __FUNCT__ 986ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 987ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 988ed8eea03SBarry Smith { 989ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 990ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 991ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 992ed8eea03SBarry Smith PetscErrorCode ierr; 993899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 994ed8eea03SBarry Smith PetscInt n,i,jrow,j; 995ed8eea03SBarry Smith 996ed8eea03SBarry Smith PetscFunctionBegin; 997ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 998ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 999ed8eea03SBarry Smith idx = a->j; 1000ed8eea03SBarry Smith v = a->a; 1001ed8eea03SBarry Smith ii = a->i; 1002ed8eea03SBarry Smith 1003ed8eea03SBarry Smith for (i=0; i<m; i++) { 1004ed8eea03SBarry Smith jrow = ii[i]; 1005ed8eea03SBarry Smith n = ii[i+1] - jrow; 1006ed8eea03SBarry Smith sum1 = 0.0; 1007ed8eea03SBarry Smith sum2 = 0.0; 1008ed8eea03SBarry Smith sum3 = 0.0; 1009ed8eea03SBarry Smith sum4 = 0.0; 1010ed8eea03SBarry Smith sum5 = 0.0; 1011ed8eea03SBarry Smith sum6 = 0.0; 1012ed8eea03SBarry Smith sum7 = 0.0; 1013ed8eea03SBarry Smith for (j=0; j<n; j++) { 1014ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1015ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1016ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1017ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1018ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1019ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1020ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1021ed8eea03SBarry Smith jrow++; 1022ed8eea03SBarry Smith } 1023ed8eea03SBarry Smith y[7*i] = sum1; 1024ed8eea03SBarry Smith y[7*i+1] = sum2; 1025ed8eea03SBarry Smith y[7*i+2] = sum3; 1026ed8eea03SBarry Smith y[7*i+3] = sum4; 1027ed8eea03SBarry Smith y[7*i+4] = sum5; 1028ed8eea03SBarry Smith y[7*i+5] = sum6; 1029ed8eea03SBarry Smith y[7*i+6] = sum7; 1030ed8eea03SBarry Smith } 1031ed8eea03SBarry Smith 1032efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1033ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1034ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1035ed8eea03SBarry Smith PetscFunctionReturn(0); 1036ed8eea03SBarry Smith } 1037ed8eea03SBarry Smith 1038ed8eea03SBarry Smith #undef __FUNCT__ 1039ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1040ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1041ed8eea03SBarry Smith { 1042ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1043ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1044ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1045ed8eea03SBarry Smith PetscErrorCode ierr; 1046899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1047ed8eea03SBarry Smith 1048ed8eea03SBarry Smith PetscFunctionBegin; 10492dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 1050ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1051ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1052ed8eea03SBarry Smith 1053ed8eea03SBarry Smith for (i=0; i<m; i++) { 1054ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1055ed8eea03SBarry Smith v = a->a + a->i[i] ; 1056ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1057ed8eea03SBarry Smith alpha1 = x[7*i]; 1058ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1059ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1060ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1061ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1062ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1063ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1064ed8eea03SBarry Smith while (n-->0) { 1065ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1066ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1067ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1068ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1069ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1070ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1071ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1072ed8eea03SBarry Smith idx++; v++; 1073ed8eea03SBarry Smith } 1074ed8eea03SBarry Smith } 1075899cda47SBarry Smith ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr); 1076ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1077ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1078ed8eea03SBarry Smith PetscFunctionReturn(0); 1079ed8eea03SBarry Smith } 1080ed8eea03SBarry Smith 1081ed8eea03SBarry Smith #undef __FUNCT__ 1082ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1083ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1084ed8eea03SBarry Smith { 1085ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1086ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1087ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1088ed8eea03SBarry Smith PetscErrorCode ierr; 1089899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1090ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1091ed8eea03SBarry Smith 1092ed8eea03SBarry Smith PetscFunctionBegin; 1093ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1094ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1095ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1096ed8eea03SBarry Smith idx = a->j; 1097ed8eea03SBarry Smith v = a->a; 1098ed8eea03SBarry Smith ii = a->i; 1099ed8eea03SBarry Smith 1100ed8eea03SBarry Smith for (i=0; i<m; i++) { 1101ed8eea03SBarry Smith jrow = ii[i]; 1102ed8eea03SBarry Smith n = ii[i+1] - jrow; 1103ed8eea03SBarry Smith sum1 = 0.0; 1104ed8eea03SBarry Smith sum2 = 0.0; 1105ed8eea03SBarry Smith sum3 = 0.0; 1106ed8eea03SBarry Smith sum4 = 0.0; 1107ed8eea03SBarry Smith sum5 = 0.0; 1108ed8eea03SBarry Smith sum6 = 0.0; 1109ed8eea03SBarry Smith sum7 = 0.0; 1110ed8eea03SBarry Smith for (j=0; j<n; j++) { 1111ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1112ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1113ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1114ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1115ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1116ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1117ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1118ed8eea03SBarry Smith jrow++; 1119ed8eea03SBarry Smith } 1120ed8eea03SBarry Smith y[7*i] += sum1; 1121ed8eea03SBarry Smith y[7*i+1] += sum2; 1122ed8eea03SBarry Smith y[7*i+2] += sum3; 1123ed8eea03SBarry Smith y[7*i+3] += sum4; 1124ed8eea03SBarry Smith y[7*i+4] += sum5; 1125ed8eea03SBarry Smith y[7*i+5] += sum6; 1126ed8eea03SBarry Smith y[7*i+6] += sum7; 1127ed8eea03SBarry Smith } 1128ed8eea03SBarry Smith 1129efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1130ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1131ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1132ed8eea03SBarry Smith PetscFunctionReturn(0); 1133ed8eea03SBarry Smith } 1134ed8eea03SBarry Smith 1135ed8eea03SBarry Smith #undef __FUNCT__ 1136ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1137ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1138ed8eea03SBarry Smith { 1139ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1140ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1141ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1142ed8eea03SBarry Smith PetscErrorCode ierr; 1143899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1144ed8eea03SBarry Smith 1145ed8eea03SBarry Smith PetscFunctionBegin; 1146ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1147ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1148ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1149ed8eea03SBarry Smith for (i=0; i<m; i++) { 1150ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1151ed8eea03SBarry Smith v = a->a + a->i[i] ; 1152ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1153ed8eea03SBarry Smith alpha1 = x[7*i]; 1154ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1155ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1156ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1157ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1158ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1159ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1160ed8eea03SBarry Smith while (n-->0) { 1161ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1162ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1163ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1164ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1165ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1166ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1167ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1168ed8eea03SBarry Smith idx++; v++; 1169ed8eea03SBarry Smith } 1170ed8eea03SBarry Smith } 1171efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1172ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1173ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1174ed8eea03SBarry Smith PetscFunctionReturn(0); 1175ed8eea03SBarry Smith } 1176ed8eea03SBarry Smith 1177ed8eea03SBarry Smith #undef __FUNCT__ 117866ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1179dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 118066ed3db0SBarry Smith { 118166ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 118266ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 118387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1184dfbe8321SBarry Smith PetscErrorCode ierr; 1185899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1186b24ad042SBarry Smith PetscInt n,i,jrow,j; 118766ed3db0SBarry Smith 118866ed3db0SBarry Smith PetscFunctionBegin; 11891ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 11901ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 119166ed3db0SBarry Smith idx = a->j; 119266ed3db0SBarry Smith v = a->a; 119366ed3db0SBarry Smith ii = a->i; 119466ed3db0SBarry Smith 119566ed3db0SBarry Smith for (i=0; i<m; i++) { 119666ed3db0SBarry Smith jrow = ii[i]; 119766ed3db0SBarry Smith n = ii[i+1] - jrow; 119866ed3db0SBarry Smith sum1 = 0.0; 119966ed3db0SBarry Smith sum2 = 0.0; 120066ed3db0SBarry Smith sum3 = 0.0; 120166ed3db0SBarry Smith sum4 = 0.0; 120266ed3db0SBarry Smith sum5 = 0.0; 120366ed3db0SBarry Smith sum6 = 0.0; 120466ed3db0SBarry Smith sum7 = 0.0; 120566ed3db0SBarry Smith sum8 = 0.0; 120666ed3db0SBarry Smith for (j=0; j<n; j++) { 120766ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 120866ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 120966ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 121066ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 121166ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 121266ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 121366ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 121466ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 121566ed3db0SBarry Smith jrow++; 121666ed3db0SBarry Smith } 121766ed3db0SBarry Smith y[8*i] = sum1; 121866ed3db0SBarry Smith y[8*i+1] = sum2; 121966ed3db0SBarry Smith y[8*i+2] = sum3; 122066ed3db0SBarry Smith y[8*i+3] = sum4; 122166ed3db0SBarry Smith y[8*i+4] = sum5; 122266ed3db0SBarry Smith y[8*i+5] = sum6; 122366ed3db0SBarry Smith y[8*i+6] = sum7; 122466ed3db0SBarry Smith y[8*i+7] = sum8; 122566ed3db0SBarry Smith } 122666ed3db0SBarry Smith 1227efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 12281ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12291ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 123066ed3db0SBarry Smith PetscFunctionReturn(0); 123166ed3db0SBarry Smith } 123266ed3db0SBarry Smith 123366ed3db0SBarry Smith #undef __FUNCT__ 123466ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1235dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 123666ed3db0SBarry Smith { 123766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 123866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 123987828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1240dfbe8321SBarry Smith PetscErrorCode ierr; 1241899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 124266ed3db0SBarry Smith 124366ed3db0SBarry Smith PetscFunctionBegin; 12442dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 12451ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12461ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1247bfec09a0SHong Zhang 124866ed3db0SBarry Smith for (i=0; i<m; i++) { 1249bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1250bfec09a0SHong Zhang v = a->a + a->i[i] ; 125166ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 125266ed3db0SBarry Smith alpha1 = x[8*i]; 125366ed3db0SBarry Smith alpha2 = x[8*i+1]; 125466ed3db0SBarry Smith alpha3 = x[8*i+2]; 125566ed3db0SBarry Smith alpha4 = x[8*i+3]; 125666ed3db0SBarry Smith alpha5 = x[8*i+4]; 125766ed3db0SBarry Smith alpha6 = x[8*i+5]; 125866ed3db0SBarry Smith alpha7 = x[8*i+6]; 125966ed3db0SBarry Smith alpha8 = x[8*i+7]; 126066ed3db0SBarry Smith while (n-->0) { 126166ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 126266ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 126366ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 126466ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 126566ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 126666ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 126766ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 126866ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 126966ed3db0SBarry Smith idx++; v++; 127066ed3db0SBarry Smith } 127166ed3db0SBarry Smith } 1272899cda47SBarry Smith ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr); 12731ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12741ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 127566ed3db0SBarry Smith PetscFunctionReturn(0); 127666ed3db0SBarry Smith } 127766ed3db0SBarry Smith 127866ed3db0SBarry Smith #undef __FUNCT__ 127966ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1280dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 128166ed3db0SBarry Smith { 128266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 128366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 128487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1285dfbe8321SBarry Smith PetscErrorCode ierr; 1286899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1287b24ad042SBarry Smith PetscInt n,i,jrow,j; 128866ed3db0SBarry Smith 128966ed3db0SBarry Smith PetscFunctionBegin; 129066ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12911ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12921ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 129366ed3db0SBarry Smith idx = a->j; 129466ed3db0SBarry Smith v = a->a; 129566ed3db0SBarry Smith ii = a->i; 129666ed3db0SBarry Smith 129766ed3db0SBarry Smith for (i=0; i<m; i++) { 129866ed3db0SBarry Smith jrow = ii[i]; 129966ed3db0SBarry Smith n = ii[i+1] - jrow; 130066ed3db0SBarry Smith sum1 = 0.0; 130166ed3db0SBarry Smith sum2 = 0.0; 130266ed3db0SBarry Smith sum3 = 0.0; 130366ed3db0SBarry Smith sum4 = 0.0; 130466ed3db0SBarry Smith sum5 = 0.0; 130566ed3db0SBarry Smith sum6 = 0.0; 130666ed3db0SBarry Smith sum7 = 0.0; 130766ed3db0SBarry Smith sum8 = 0.0; 130866ed3db0SBarry Smith for (j=0; j<n; j++) { 130966ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 131066ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 131166ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 131266ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 131366ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 131466ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 131566ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 131666ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 131766ed3db0SBarry Smith jrow++; 131866ed3db0SBarry Smith } 131966ed3db0SBarry Smith y[8*i] += sum1; 132066ed3db0SBarry Smith y[8*i+1] += sum2; 132166ed3db0SBarry Smith y[8*i+2] += sum3; 132266ed3db0SBarry Smith y[8*i+3] += sum4; 132366ed3db0SBarry Smith y[8*i+4] += sum5; 132466ed3db0SBarry Smith y[8*i+5] += sum6; 132566ed3db0SBarry Smith y[8*i+6] += sum7; 132666ed3db0SBarry Smith y[8*i+7] += sum8; 132766ed3db0SBarry Smith } 132866ed3db0SBarry Smith 1329efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13301ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13311ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 133266ed3db0SBarry Smith PetscFunctionReturn(0); 133366ed3db0SBarry Smith } 133466ed3db0SBarry Smith 133566ed3db0SBarry Smith #undef __FUNCT__ 133666ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1337dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 133866ed3db0SBarry Smith { 133966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 134066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 134187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1342dfbe8321SBarry Smith PetscErrorCode ierr; 1343899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 134466ed3db0SBarry Smith 134566ed3db0SBarry Smith PetscFunctionBegin; 134666ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13471ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13481ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 134966ed3db0SBarry Smith for (i=0; i<m; i++) { 1350bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1351bfec09a0SHong Zhang v = a->a + a->i[i] ; 135266ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 135366ed3db0SBarry Smith alpha1 = x[8*i]; 135466ed3db0SBarry Smith alpha2 = x[8*i+1]; 135566ed3db0SBarry Smith alpha3 = x[8*i+2]; 135666ed3db0SBarry Smith alpha4 = x[8*i+3]; 135766ed3db0SBarry Smith alpha5 = x[8*i+4]; 135866ed3db0SBarry Smith alpha6 = x[8*i+5]; 135966ed3db0SBarry Smith alpha7 = x[8*i+6]; 136066ed3db0SBarry Smith alpha8 = x[8*i+7]; 136166ed3db0SBarry Smith while (n-->0) { 136266ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 136366ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 136466ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 136566ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 136666ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 136766ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 136866ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 136966ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 137066ed3db0SBarry Smith idx++; v++; 137166ed3db0SBarry Smith } 137266ed3db0SBarry Smith } 1373efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13741ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13751ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 13762f7816d4SBarry Smith PetscFunctionReturn(0); 13772f7816d4SBarry Smith } 13782f7816d4SBarry Smith 13792b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 13802b692628SMatthew Knepley #undef __FUNCT__ 13812b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1382dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 13832b692628SMatthew Knepley { 13842b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13852b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13862b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1387dfbe8321SBarry Smith PetscErrorCode ierr; 1388899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1389b24ad042SBarry Smith PetscInt n,i,jrow,j; 13902b692628SMatthew Knepley 13912b692628SMatthew Knepley PetscFunctionBegin; 13921ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13931ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 13942b692628SMatthew Knepley idx = a->j; 13952b692628SMatthew Knepley v = a->a; 13962b692628SMatthew Knepley ii = a->i; 13972b692628SMatthew Knepley 13982b692628SMatthew Knepley for (i=0; i<m; i++) { 13992b692628SMatthew Knepley jrow = ii[i]; 14002b692628SMatthew Knepley n = ii[i+1] - jrow; 14012b692628SMatthew Knepley sum1 = 0.0; 14022b692628SMatthew Knepley sum2 = 0.0; 14032b692628SMatthew Knepley sum3 = 0.0; 14042b692628SMatthew Knepley sum4 = 0.0; 14052b692628SMatthew Knepley sum5 = 0.0; 14062b692628SMatthew Knepley sum6 = 0.0; 14072b692628SMatthew Knepley sum7 = 0.0; 14082b692628SMatthew Knepley sum8 = 0.0; 14092b692628SMatthew Knepley sum9 = 0.0; 14102b692628SMatthew Knepley for (j=0; j<n; j++) { 14112b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14122b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14132b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14142b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14152b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14162b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14172b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14182b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14192b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14202b692628SMatthew Knepley jrow++; 14212b692628SMatthew Knepley } 14222b692628SMatthew Knepley y[9*i] = sum1; 14232b692628SMatthew Knepley y[9*i+1] = sum2; 14242b692628SMatthew Knepley y[9*i+2] = sum3; 14252b692628SMatthew Knepley y[9*i+3] = sum4; 14262b692628SMatthew Knepley y[9*i+4] = sum5; 14272b692628SMatthew Knepley y[9*i+5] = sum6; 14282b692628SMatthew Knepley y[9*i+6] = sum7; 14292b692628SMatthew Knepley y[9*i+7] = sum8; 14302b692628SMatthew Knepley y[9*i+8] = sum9; 14312b692628SMatthew Knepley } 14322b692628SMatthew Knepley 1433efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 14341ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14351ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14362b692628SMatthew Knepley PetscFunctionReturn(0); 14372b692628SMatthew Knepley } 14382b692628SMatthew Knepley 1439b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1440b9cda22cSBarry Smith 14412b692628SMatthew Knepley #undef __FUNCT__ 14422b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1443dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14442b692628SMatthew Knepley { 14452b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14462b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14472b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1448dfbe8321SBarry Smith PetscErrorCode ierr; 1449899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 14502b692628SMatthew Knepley 14512b692628SMatthew Knepley PetscFunctionBegin; 14522dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 14531ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14541ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14552b692628SMatthew Knepley 14562b692628SMatthew Knepley for (i=0; i<m; i++) { 14572b692628SMatthew Knepley idx = a->j + a->i[i] ; 14582b692628SMatthew Knepley v = a->a + a->i[i] ; 14592b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14602b692628SMatthew Knepley alpha1 = x[9*i]; 14612b692628SMatthew Knepley alpha2 = x[9*i+1]; 14622b692628SMatthew Knepley alpha3 = x[9*i+2]; 14632b692628SMatthew Knepley alpha4 = x[9*i+3]; 14642b692628SMatthew Knepley alpha5 = x[9*i+4]; 14652b692628SMatthew Knepley alpha6 = x[9*i+5]; 14662b692628SMatthew Knepley alpha7 = x[9*i+6]; 14672b692628SMatthew Knepley alpha8 = x[9*i+7]; 14682f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14692b692628SMatthew Knepley while (n-->0) { 14702b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 14712b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 14722b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 14732b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 14742b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 14752b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 14762b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 14772b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 14782b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 14792b692628SMatthew Knepley idx++; v++; 14802b692628SMatthew Knepley } 14812b692628SMatthew Knepley } 1482899cda47SBarry Smith ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr); 14831ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14841ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14852b692628SMatthew Knepley PetscFunctionReturn(0); 14862b692628SMatthew Knepley } 14872b692628SMatthew Knepley 14882b692628SMatthew Knepley #undef __FUNCT__ 14892b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1490dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 14912b692628SMatthew Knepley { 14922b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14932b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14942b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1495dfbe8321SBarry Smith PetscErrorCode ierr; 1496899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1497b24ad042SBarry Smith PetscInt n,i,jrow,j; 14982b692628SMatthew Knepley 14992b692628SMatthew Knepley PetscFunctionBegin; 15002b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15011ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15021ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15032b692628SMatthew Knepley idx = a->j; 15042b692628SMatthew Knepley v = a->a; 15052b692628SMatthew Knepley ii = a->i; 15062b692628SMatthew Knepley 15072b692628SMatthew Knepley for (i=0; i<m; i++) { 15082b692628SMatthew Knepley jrow = ii[i]; 15092b692628SMatthew Knepley n = ii[i+1] - jrow; 15102b692628SMatthew Knepley sum1 = 0.0; 15112b692628SMatthew Knepley sum2 = 0.0; 15122b692628SMatthew Knepley sum3 = 0.0; 15132b692628SMatthew Knepley sum4 = 0.0; 15142b692628SMatthew Knepley sum5 = 0.0; 15152b692628SMatthew Knepley sum6 = 0.0; 15162b692628SMatthew Knepley sum7 = 0.0; 15172b692628SMatthew Knepley sum8 = 0.0; 15182b692628SMatthew Knepley sum9 = 0.0; 15192b692628SMatthew Knepley for (j=0; j<n; j++) { 15202b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15212b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15222b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15232b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15242b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15252b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15262b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15272b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15282b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15292b692628SMatthew Knepley jrow++; 15302b692628SMatthew Knepley } 15312b692628SMatthew Knepley y[9*i] += sum1; 15322b692628SMatthew Knepley y[9*i+1] += sum2; 15332b692628SMatthew Knepley y[9*i+2] += sum3; 15342b692628SMatthew Knepley y[9*i+3] += sum4; 15352b692628SMatthew Knepley y[9*i+4] += sum5; 15362b692628SMatthew Knepley y[9*i+5] += sum6; 15372b692628SMatthew Knepley y[9*i+6] += sum7; 15382b692628SMatthew Knepley y[9*i+7] += sum8; 15392b692628SMatthew Knepley y[9*i+8] += sum9; 15402b692628SMatthew Knepley } 15412b692628SMatthew Knepley 1542efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15431ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15441ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15452b692628SMatthew Knepley PetscFunctionReturn(0); 15462b692628SMatthew Knepley } 15472b692628SMatthew Knepley 15482b692628SMatthew Knepley #undef __FUNCT__ 15492b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1550dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15512b692628SMatthew Knepley { 15522b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15532b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15542b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1555dfbe8321SBarry Smith PetscErrorCode ierr; 1556899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 15572b692628SMatthew Knepley 15582b692628SMatthew Knepley PetscFunctionBegin; 15592b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15601ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15611ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15622b692628SMatthew Knepley for (i=0; i<m; i++) { 15632b692628SMatthew Knepley idx = a->j + a->i[i] ; 15642b692628SMatthew Knepley v = a->a + a->i[i] ; 15652b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15662b692628SMatthew Knepley alpha1 = x[9*i]; 15672b692628SMatthew Knepley alpha2 = x[9*i+1]; 15682b692628SMatthew Knepley alpha3 = x[9*i+2]; 15692b692628SMatthew Knepley alpha4 = x[9*i+3]; 15702b692628SMatthew Knepley alpha5 = x[9*i+4]; 15712b692628SMatthew Knepley alpha6 = x[9*i+5]; 15722b692628SMatthew Knepley alpha7 = x[9*i+6]; 15732b692628SMatthew Knepley alpha8 = x[9*i+7]; 15742b692628SMatthew Knepley alpha9 = x[9*i+8]; 15752b692628SMatthew Knepley while (n-->0) { 15762b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15772b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15782b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15792b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15802b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15812b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15822b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15832b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15842b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15852b692628SMatthew Knepley idx++; v++; 15862b692628SMatthew Knepley } 15872b692628SMatthew Knepley } 1588efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15891ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15901ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15912b692628SMatthew Knepley PetscFunctionReturn(0); 15922b692628SMatthew Knepley } 1593b9cda22cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 1594b9cda22cSBarry Smith #undef __FUNCT__ 1595b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1596b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1597b9cda22cSBarry Smith { 1598b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1599b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1600b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1601b9cda22cSBarry Smith PetscErrorCode ierr; 1602899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1603b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1604b9cda22cSBarry Smith 1605b9cda22cSBarry Smith PetscFunctionBegin; 1606b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1607b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1608b9cda22cSBarry Smith idx = a->j; 1609b9cda22cSBarry Smith v = a->a; 1610b9cda22cSBarry Smith ii = a->i; 1611b9cda22cSBarry Smith 1612b9cda22cSBarry Smith for (i=0; i<m; i++) { 1613b9cda22cSBarry Smith jrow = ii[i]; 1614b9cda22cSBarry Smith n = ii[i+1] - jrow; 1615b9cda22cSBarry Smith sum1 = 0.0; 1616b9cda22cSBarry Smith sum2 = 0.0; 1617b9cda22cSBarry Smith sum3 = 0.0; 1618b9cda22cSBarry Smith sum4 = 0.0; 1619b9cda22cSBarry Smith sum5 = 0.0; 1620b9cda22cSBarry Smith sum6 = 0.0; 1621b9cda22cSBarry Smith sum7 = 0.0; 1622b9cda22cSBarry Smith sum8 = 0.0; 1623b9cda22cSBarry Smith sum9 = 0.0; 1624b9cda22cSBarry Smith sum10 = 0.0; 1625b9cda22cSBarry Smith for (j=0; j<n; j++) { 1626b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1627b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1628b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1629b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1630b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1631b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1632b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1633b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1634b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1635b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1636b9cda22cSBarry Smith jrow++; 1637b9cda22cSBarry Smith } 1638b9cda22cSBarry Smith y[10*i] = sum1; 1639b9cda22cSBarry Smith y[10*i+1] = sum2; 1640b9cda22cSBarry Smith y[10*i+2] = sum3; 1641b9cda22cSBarry Smith y[10*i+3] = sum4; 1642b9cda22cSBarry Smith y[10*i+4] = sum5; 1643b9cda22cSBarry Smith y[10*i+5] = sum6; 1644b9cda22cSBarry Smith y[10*i+6] = sum7; 1645b9cda22cSBarry Smith y[10*i+7] = sum8; 1646b9cda22cSBarry Smith y[10*i+8] = sum9; 1647b9cda22cSBarry Smith y[10*i+9] = sum10; 1648b9cda22cSBarry Smith } 1649b9cda22cSBarry Smith 1650b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1651b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1652b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1653b9cda22cSBarry Smith PetscFunctionReturn(0); 1654b9cda22cSBarry Smith } 1655b9cda22cSBarry Smith 1656b9cda22cSBarry Smith #undef __FUNCT__ 1657b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1658b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1659b9cda22cSBarry Smith { 1660b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1661b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1662b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1663b9cda22cSBarry Smith PetscErrorCode ierr; 1664899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1665b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1666b9cda22cSBarry Smith 1667b9cda22cSBarry Smith PetscFunctionBegin; 1668b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1669b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1670b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1671b9cda22cSBarry Smith idx = a->j; 1672b9cda22cSBarry Smith v = a->a; 1673b9cda22cSBarry Smith ii = a->i; 1674b9cda22cSBarry Smith 1675b9cda22cSBarry Smith for (i=0; i<m; i++) { 1676b9cda22cSBarry Smith jrow = ii[i]; 1677b9cda22cSBarry Smith n = ii[i+1] - jrow; 1678b9cda22cSBarry Smith sum1 = 0.0; 1679b9cda22cSBarry Smith sum2 = 0.0; 1680b9cda22cSBarry Smith sum3 = 0.0; 1681b9cda22cSBarry Smith sum4 = 0.0; 1682b9cda22cSBarry Smith sum5 = 0.0; 1683b9cda22cSBarry Smith sum6 = 0.0; 1684b9cda22cSBarry Smith sum7 = 0.0; 1685b9cda22cSBarry Smith sum8 = 0.0; 1686b9cda22cSBarry Smith sum9 = 0.0; 1687b9cda22cSBarry Smith sum10 = 0.0; 1688b9cda22cSBarry Smith for (j=0; j<n; j++) { 1689b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1690b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1691b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1692b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1693b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1694b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1695b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1696b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1697b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1698b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1699b9cda22cSBarry Smith jrow++; 1700b9cda22cSBarry Smith } 1701b9cda22cSBarry Smith y[10*i] += sum1; 1702b9cda22cSBarry Smith y[10*i+1] += sum2; 1703b9cda22cSBarry Smith y[10*i+2] += sum3; 1704b9cda22cSBarry Smith y[10*i+3] += sum4; 1705b9cda22cSBarry Smith y[10*i+4] += sum5; 1706b9cda22cSBarry Smith y[10*i+5] += sum6; 1707b9cda22cSBarry Smith y[10*i+6] += sum7; 1708b9cda22cSBarry Smith y[10*i+7] += sum8; 1709b9cda22cSBarry Smith y[10*i+8] += sum9; 1710b9cda22cSBarry Smith y[10*i+9] += sum10; 1711b9cda22cSBarry Smith } 1712b9cda22cSBarry Smith 1713b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1714b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1715b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1716b9cda22cSBarry Smith PetscFunctionReturn(0); 1717b9cda22cSBarry Smith } 1718b9cda22cSBarry Smith 1719b9cda22cSBarry Smith #undef __FUNCT__ 1720b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1721b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1722b9cda22cSBarry Smith { 1723b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1724b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1725b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1726b9cda22cSBarry Smith PetscErrorCode ierr; 1727899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1728b9cda22cSBarry Smith 1729b9cda22cSBarry Smith PetscFunctionBegin; 1730b9cda22cSBarry Smith ierr = VecSet(yy,zero);CHKERRQ(ierr); 1731b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1732b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1733b9cda22cSBarry Smith 1734b9cda22cSBarry Smith for (i=0; i<m; i++) { 1735b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1736b9cda22cSBarry Smith v = a->a + a->i[i] ; 1737b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1738e29fdc22SBarry Smith alpha1 = x[10*i]; 1739e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1740e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1741e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1742e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1743e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1744e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1745e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1746e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1747b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1748b9cda22cSBarry Smith while (n-->0) { 1749e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1750e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1751e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1752e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1753e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1754e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1755e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1756e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1757e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1758b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1759b9cda22cSBarry Smith idx++; v++; 1760b9cda22cSBarry Smith } 1761b9cda22cSBarry Smith } 1762899cda47SBarry Smith ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr); 1763b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1764b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1765b9cda22cSBarry Smith PetscFunctionReturn(0); 1766b9cda22cSBarry Smith } 1767b9cda22cSBarry Smith 1768b9cda22cSBarry Smith #undef __FUNCT__ 1769b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1770b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1771b9cda22cSBarry Smith { 1772b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1773b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1774b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1775b9cda22cSBarry Smith PetscErrorCode ierr; 1776899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1777b9cda22cSBarry Smith 1778b9cda22cSBarry Smith PetscFunctionBegin; 1779b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1780b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1781b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1782b9cda22cSBarry Smith for (i=0; i<m; i++) { 1783b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1784b9cda22cSBarry Smith v = a->a + a->i[i] ; 1785b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1786b9cda22cSBarry Smith alpha1 = x[10*i]; 1787b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1788b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1789b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1790b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1791b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1792b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1793b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1794b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1795b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1796b9cda22cSBarry Smith while (n-->0) { 1797b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1798b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1799b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1800b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1801b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1802b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1803b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1804b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1805b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1806b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1807b9cda22cSBarry Smith idx++; v++; 1808b9cda22cSBarry Smith } 1809b9cda22cSBarry Smith } 1810b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1811b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1812b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1813b9cda22cSBarry Smith PetscFunctionReturn(0); 1814b9cda22cSBarry Smith } 1815b9cda22cSBarry Smith 18162b692628SMatthew Knepley 18172f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 18182f7816d4SBarry Smith #undef __FUNCT__ 18192f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 1820dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 18212f7816d4SBarry Smith { 18222f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 18232f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18242f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 18252f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1826dfbe8321SBarry Smith PetscErrorCode ierr; 1827899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1828b24ad042SBarry Smith PetscInt n,i,jrow,j; 18292f7816d4SBarry Smith 18302f7816d4SBarry Smith PetscFunctionBegin; 18311ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 18321ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 18332f7816d4SBarry Smith idx = a->j; 18342f7816d4SBarry Smith v = a->a; 18352f7816d4SBarry Smith ii = a->i; 18362f7816d4SBarry Smith 18372f7816d4SBarry Smith for (i=0; i<m; i++) { 18382f7816d4SBarry Smith jrow = ii[i]; 18392f7816d4SBarry Smith n = ii[i+1] - jrow; 18402f7816d4SBarry Smith sum1 = 0.0; 18412f7816d4SBarry Smith sum2 = 0.0; 18422f7816d4SBarry Smith sum3 = 0.0; 18432f7816d4SBarry Smith sum4 = 0.0; 18442f7816d4SBarry Smith sum5 = 0.0; 18452f7816d4SBarry Smith sum6 = 0.0; 18462f7816d4SBarry Smith sum7 = 0.0; 18472f7816d4SBarry Smith sum8 = 0.0; 18482f7816d4SBarry Smith sum9 = 0.0; 18492f7816d4SBarry Smith sum10 = 0.0; 18502f7816d4SBarry Smith sum11 = 0.0; 18512f7816d4SBarry Smith sum12 = 0.0; 18522f7816d4SBarry Smith sum13 = 0.0; 18532f7816d4SBarry Smith sum14 = 0.0; 18542f7816d4SBarry Smith sum15 = 0.0; 18552f7816d4SBarry Smith sum16 = 0.0; 18562f7816d4SBarry Smith for (j=0; j<n; j++) { 18572f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 18582f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 18592f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 18602f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 18612f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 18622f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 18632f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 18642f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 18652f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 18662f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 18672f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 18682f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 18692f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 18702f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 18712f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 18722f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 18732f7816d4SBarry Smith jrow++; 18742f7816d4SBarry Smith } 18752f7816d4SBarry Smith y[16*i] = sum1; 18762f7816d4SBarry Smith y[16*i+1] = sum2; 18772f7816d4SBarry Smith y[16*i+2] = sum3; 18782f7816d4SBarry Smith y[16*i+3] = sum4; 18792f7816d4SBarry Smith y[16*i+4] = sum5; 18802f7816d4SBarry Smith y[16*i+5] = sum6; 18812f7816d4SBarry Smith y[16*i+6] = sum7; 18822f7816d4SBarry Smith y[16*i+7] = sum8; 18832f7816d4SBarry Smith y[16*i+8] = sum9; 18842f7816d4SBarry Smith y[16*i+9] = sum10; 18852f7816d4SBarry Smith y[16*i+10] = sum11; 18862f7816d4SBarry Smith y[16*i+11] = sum12; 18872f7816d4SBarry Smith y[16*i+12] = sum13; 18882f7816d4SBarry Smith y[16*i+13] = sum14; 18892f7816d4SBarry Smith y[16*i+14] = sum15; 18902f7816d4SBarry Smith y[16*i+15] = sum16; 18912f7816d4SBarry Smith } 18922f7816d4SBarry Smith 1893efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 18941ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 18951ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 18962f7816d4SBarry Smith PetscFunctionReturn(0); 18972f7816d4SBarry Smith } 18982f7816d4SBarry Smith 18992f7816d4SBarry Smith #undef __FUNCT__ 19002f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1901dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 19022f7816d4SBarry Smith { 19032f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 19042f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19052f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 19062f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1907dfbe8321SBarry Smith PetscErrorCode ierr; 1908899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 19092f7816d4SBarry Smith 19102f7816d4SBarry Smith PetscFunctionBegin; 19112dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 19121ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19131ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1914bfec09a0SHong Zhang 19152f7816d4SBarry Smith for (i=0; i<m; i++) { 1916bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1917bfec09a0SHong Zhang v = a->a + a->i[i] ; 19182f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 19192f7816d4SBarry Smith alpha1 = x[16*i]; 19202f7816d4SBarry Smith alpha2 = x[16*i+1]; 19212f7816d4SBarry Smith alpha3 = x[16*i+2]; 19222f7816d4SBarry Smith alpha4 = x[16*i+3]; 19232f7816d4SBarry Smith alpha5 = x[16*i+4]; 19242f7816d4SBarry Smith alpha6 = x[16*i+5]; 19252f7816d4SBarry Smith alpha7 = x[16*i+6]; 19262f7816d4SBarry Smith alpha8 = x[16*i+7]; 19272f7816d4SBarry Smith alpha9 = x[16*i+8]; 19282f7816d4SBarry Smith alpha10 = x[16*i+9]; 19292f7816d4SBarry Smith alpha11 = x[16*i+10]; 19302f7816d4SBarry Smith alpha12 = x[16*i+11]; 19312f7816d4SBarry Smith alpha13 = x[16*i+12]; 19322f7816d4SBarry Smith alpha14 = x[16*i+13]; 19332f7816d4SBarry Smith alpha15 = x[16*i+14]; 19342f7816d4SBarry Smith alpha16 = x[16*i+15]; 19352f7816d4SBarry Smith while (n-->0) { 19362f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 19372f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 19382f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 19392f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 19402f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 19412f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 19422f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 19432f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 19442f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 19452f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 19462f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 19472f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 19482f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 19492f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 19502f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 19512f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 19522f7816d4SBarry Smith idx++; v++; 19532f7816d4SBarry Smith } 19542f7816d4SBarry Smith } 1955899cda47SBarry Smith ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr); 19561ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 19571ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 19582f7816d4SBarry Smith PetscFunctionReturn(0); 19592f7816d4SBarry Smith } 19602f7816d4SBarry Smith 19612f7816d4SBarry Smith #undef __FUNCT__ 19622f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1963dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 19642f7816d4SBarry Smith { 19652f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 19662f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19672f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 19682f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1969dfbe8321SBarry Smith PetscErrorCode ierr; 1970899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1971b24ad042SBarry Smith PetscInt n,i,jrow,j; 19722f7816d4SBarry Smith 19732f7816d4SBarry Smith PetscFunctionBegin; 19742f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19751ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19761ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 19772f7816d4SBarry Smith idx = a->j; 19782f7816d4SBarry Smith v = a->a; 19792f7816d4SBarry Smith ii = a->i; 19802f7816d4SBarry Smith 19812f7816d4SBarry Smith for (i=0; i<m; i++) { 19822f7816d4SBarry Smith jrow = ii[i]; 19832f7816d4SBarry Smith n = ii[i+1] - jrow; 19842f7816d4SBarry Smith sum1 = 0.0; 19852f7816d4SBarry Smith sum2 = 0.0; 19862f7816d4SBarry Smith sum3 = 0.0; 19872f7816d4SBarry Smith sum4 = 0.0; 19882f7816d4SBarry Smith sum5 = 0.0; 19892f7816d4SBarry Smith sum6 = 0.0; 19902f7816d4SBarry Smith sum7 = 0.0; 19912f7816d4SBarry Smith sum8 = 0.0; 19922f7816d4SBarry Smith sum9 = 0.0; 19932f7816d4SBarry Smith sum10 = 0.0; 19942f7816d4SBarry Smith sum11 = 0.0; 19952f7816d4SBarry Smith sum12 = 0.0; 19962f7816d4SBarry Smith sum13 = 0.0; 19972f7816d4SBarry Smith sum14 = 0.0; 19982f7816d4SBarry Smith sum15 = 0.0; 19992f7816d4SBarry Smith sum16 = 0.0; 20002f7816d4SBarry Smith for (j=0; j<n; j++) { 20012f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 20022f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 20032f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 20042f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 20052f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 20062f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 20072f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 20082f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 20092f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 20102f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 20112f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 20122f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 20132f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 20142f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 20152f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 20162f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 20172f7816d4SBarry Smith jrow++; 20182f7816d4SBarry Smith } 20192f7816d4SBarry Smith y[16*i] += sum1; 20202f7816d4SBarry Smith y[16*i+1] += sum2; 20212f7816d4SBarry Smith y[16*i+2] += sum3; 20222f7816d4SBarry Smith y[16*i+3] += sum4; 20232f7816d4SBarry Smith y[16*i+4] += sum5; 20242f7816d4SBarry Smith y[16*i+5] += sum6; 20252f7816d4SBarry Smith y[16*i+6] += sum7; 20262f7816d4SBarry Smith y[16*i+7] += sum8; 20272f7816d4SBarry Smith y[16*i+8] += sum9; 20282f7816d4SBarry Smith y[16*i+9] += sum10; 20292f7816d4SBarry Smith y[16*i+10] += sum11; 20302f7816d4SBarry Smith y[16*i+11] += sum12; 20312f7816d4SBarry Smith y[16*i+12] += sum13; 20322f7816d4SBarry Smith y[16*i+13] += sum14; 20332f7816d4SBarry Smith y[16*i+14] += sum15; 20342f7816d4SBarry Smith y[16*i+15] += sum16; 20352f7816d4SBarry Smith } 20362f7816d4SBarry Smith 2037efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 20381ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 20391ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 20402f7816d4SBarry Smith PetscFunctionReturn(0); 20412f7816d4SBarry Smith } 20422f7816d4SBarry Smith 20432f7816d4SBarry Smith #undef __FUNCT__ 20442f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2045dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 20462f7816d4SBarry Smith { 20472f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20482f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 20492f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 20502f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2051dfbe8321SBarry Smith PetscErrorCode ierr; 2052899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 20532f7816d4SBarry Smith 20542f7816d4SBarry Smith PetscFunctionBegin; 20552f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20561ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 20571ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 20582f7816d4SBarry Smith for (i=0; i<m; i++) { 2059bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2060bfec09a0SHong Zhang v = a->a + a->i[i] ; 20612f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 20622f7816d4SBarry Smith alpha1 = x[16*i]; 20632f7816d4SBarry Smith alpha2 = x[16*i+1]; 20642f7816d4SBarry Smith alpha3 = x[16*i+2]; 20652f7816d4SBarry Smith alpha4 = x[16*i+3]; 20662f7816d4SBarry Smith alpha5 = x[16*i+4]; 20672f7816d4SBarry Smith alpha6 = x[16*i+5]; 20682f7816d4SBarry Smith alpha7 = x[16*i+6]; 20692f7816d4SBarry Smith alpha8 = x[16*i+7]; 20702f7816d4SBarry Smith alpha9 = x[16*i+8]; 20712f7816d4SBarry Smith alpha10 = x[16*i+9]; 20722f7816d4SBarry Smith alpha11 = x[16*i+10]; 20732f7816d4SBarry Smith alpha12 = x[16*i+11]; 20742f7816d4SBarry Smith alpha13 = x[16*i+12]; 20752f7816d4SBarry Smith alpha14 = x[16*i+13]; 20762f7816d4SBarry Smith alpha15 = x[16*i+14]; 20772f7816d4SBarry Smith alpha16 = x[16*i+15]; 20782f7816d4SBarry Smith while (n-->0) { 20792f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 20802f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 20812f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 20822f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 20832f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 20842f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 20852f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 20862f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 20872f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 20882f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 20892f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 20902f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 20912f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 20922f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 20932f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 20942f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 20952f7816d4SBarry Smith idx++; v++; 20962f7816d4SBarry Smith } 20972f7816d4SBarry Smith } 2098efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 20991ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 21001ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 210166ed3db0SBarry Smith PetscFunctionReturn(0); 210266ed3db0SBarry Smith } 210366ed3db0SBarry Smith 2104f4a53059SBarry Smith /*===================================================================================*/ 21054a2ae208SSatish Balay #undef __FUNCT__ 21064a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2107dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2108f4a53059SBarry Smith { 21094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2110dfbe8321SBarry Smith PetscErrorCode ierr; 2111f4a53059SBarry Smith 2112b24ad042SBarry Smith PetscFunctionBegin; 2113f4a53059SBarry Smith /* start the scatter */ 2114f4a53059SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 21154cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2116f4a53059SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 21174cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2118f4a53059SBarry Smith PetscFunctionReturn(0); 2119f4a53059SBarry Smith } 2120f4a53059SBarry Smith 21214a2ae208SSatish Balay #undef __FUNCT__ 21224a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2123dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 21244cb9d9b8SBarry Smith { 21254cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2126dfbe8321SBarry Smith PetscErrorCode ierr; 2127b24ad042SBarry Smith 21284cb9d9b8SBarry Smith PetscFunctionBegin; 21294cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 21304cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2131a5ff213dSBarry Smith ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21324cb9d9b8SBarry Smith ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21334cb9d9b8SBarry Smith PetscFunctionReturn(0); 21344cb9d9b8SBarry Smith } 21354cb9d9b8SBarry Smith 21364a2ae208SSatish Balay #undef __FUNCT__ 21374a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2138dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 21394cb9d9b8SBarry Smith { 21404cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2141dfbe8321SBarry Smith PetscErrorCode ierr; 21424cb9d9b8SBarry Smith 2143b24ad042SBarry Smith PetscFunctionBegin; 21444cb9d9b8SBarry Smith /* start the scatter */ 21454cb9d9b8SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2146d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 21474cb9d9b8SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2148717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 21494cb9d9b8SBarry Smith PetscFunctionReturn(0); 21504cb9d9b8SBarry Smith } 21514cb9d9b8SBarry Smith 21524a2ae208SSatish Balay #undef __FUNCT__ 21534a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2154dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 21554cb9d9b8SBarry Smith { 21564cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2157dfbe8321SBarry Smith PetscErrorCode ierr; 2158b24ad042SBarry Smith 21594cb9d9b8SBarry Smith PetscFunctionBegin; 21604cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2161d72c5c08SBarry Smith ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2162d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2163d72c5c08SBarry Smith ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21644cb9d9b8SBarry Smith PetscFunctionReturn(0); 21654cb9d9b8SBarry Smith } 21664cb9d9b8SBarry Smith 21679c4fc4c7SBarry Smith #undef __FUNCT__ 21687ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 21697ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 21707ba1a0bfSKris Buschelman { 21717ba1a0bfSKris Buschelman /* This routine requires testing -- but it's getting better. */ 21727ba1a0bfSKris Buschelman PetscErrorCode ierr; 2173a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 21747ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 21757ba1a0bfSKris Buschelman Mat P=pp->AIJ; 21767ba1a0bfSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 21777ba1a0bfSKris Buschelman PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 21787ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2179899cda47SBarry Smith PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn; 21807ba1a0bfSKris Buschelman PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 21817ba1a0bfSKris Buschelman MatScalar *ca; 21827ba1a0bfSKris Buschelman 21837ba1a0bfSKris Buschelman PetscFunctionBegin; 21847ba1a0bfSKris Buschelman /* Start timer */ 21857ba1a0bfSKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 21867ba1a0bfSKris Buschelman 21877ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 21887ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 21897ba1a0bfSKris Buschelman 21907ba1a0bfSKris Buschelman cn = pn*ppdof; 21917ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 21927ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 21937ba1a0bfSKris Buschelman ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 21947ba1a0bfSKris Buschelman ci[0] = 0; 21957ba1a0bfSKris Buschelman 21967ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 21977ba1a0bfSKris Buschelman ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 21987ba1a0bfSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 21997ba1a0bfSKris Buschelman ptasparserow = ptadenserow + an; 22007ba1a0bfSKris Buschelman denserow = ptasparserow + an; 22017ba1a0bfSKris Buschelman sparserow = denserow + cn; 22027ba1a0bfSKris Buschelman 22037ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 22047ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 22057ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2206a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 22077ba1a0bfSKris Buschelman current_space = free_space; 22087ba1a0bfSKris Buschelman 22097ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 22107ba1a0bfSKris Buschelman for (i=0;i<pn;i++) { 22117ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 22127ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 22137ba1a0bfSKris Buschelman for (dof=0;dof<ppdof;dof++) { 22147ba1a0bfSKris Buschelman ptanzi = 0; 22157ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 22167ba1a0bfSKris Buschelman for (j=0;j<ptnzi;j++) { 22177ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 22187ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 22197ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 22207ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 22217ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 22227ba1a0bfSKris Buschelman for (k=0;k<anzj;k++) { 22237ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 22247ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 22257ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 22267ba1a0bfSKris Buschelman } 22277ba1a0bfSKris Buschelman } 22287ba1a0bfSKris Buschelman } 22297ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 22307ba1a0bfSKris Buschelman ptaj = ptasparserow; 22317ba1a0bfSKris Buschelman cnzi = 0; 22327ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 22337ba1a0bfSKris Buschelman /* Get offset within block of P */ 22347ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 22357ba1a0bfSKris Buschelman /* Get block row of P */ 22367ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 22377ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 22387ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 22397ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 22407ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 22417ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 22427ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 22437ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 22447ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 22457ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 22467ba1a0bfSKris Buschelman } 22477ba1a0bfSKris Buschelman } 22487ba1a0bfSKris Buschelman } 22497ba1a0bfSKris Buschelman 22507ba1a0bfSKris Buschelman /* sort sparserow */ 22517ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 22527ba1a0bfSKris Buschelman 22537ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 22547ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 22557ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 2256a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 22577ba1a0bfSKris Buschelman } 22587ba1a0bfSKris Buschelman 22597ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 22607ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 22617ba1a0bfSKris Buschelman current_space->array += cnzi; 22627ba1a0bfSKris Buschelman current_space->local_used += cnzi; 22637ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 22647ba1a0bfSKris Buschelman 22657ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 22667ba1a0bfSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 22677ba1a0bfSKris Buschelman } 22687ba1a0bfSKris Buschelman for (j=0;j<cnzi;j++) { 22697ba1a0bfSKris Buschelman denserow[sparserow[j]] = 0; 22707ba1a0bfSKris Buschelman } 22717ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 22727ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 22737ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 22747ba1a0bfSKris Buschelman } 22757ba1a0bfSKris Buschelman } 22767ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 22777ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 22787ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 22797ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2280a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 22817ba1a0bfSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 22827ba1a0bfSKris Buschelman 22837ba1a0bfSKris Buschelman /* Allocate space for ca */ 22847ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 22857ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 22867ba1a0bfSKris Buschelman 22877ba1a0bfSKris Buschelman /* put together the new matrix */ 22887ba1a0bfSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 22897ba1a0bfSKris Buschelman 22907ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 22917ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 22927ba1a0bfSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 2293e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2294e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 22957ba1a0bfSKris Buschelman c->nonew = 0; 22967ba1a0bfSKris Buschelman 22977ba1a0bfSKris Buschelman /* Clean up. */ 22987ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 22997ba1a0bfSKris Buschelman 23007ba1a0bfSKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 23017ba1a0bfSKris Buschelman PetscFunctionReturn(0); 23027ba1a0bfSKris Buschelman } 23037ba1a0bfSKris Buschelman 23047ba1a0bfSKris Buschelman #undef __FUNCT__ 23057ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 23067ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 23077ba1a0bfSKris Buschelman { 23087ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 23097ba1a0bfSKris Buschelman PetscErrorCode ierr; 23107ba1a0bfSKris Buschelman PetscInt flops=0; 23117ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 23127ba1a0bfSKris Buschelman Mat P=pp->AIJ; 23137ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 23147ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 23157ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 23167ba1a0bfSKris Buschelman PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 23177ba1a0bfSKris Buschelman PetscInt *ci=c->i,*cj=c->j,*cjj; 2318899cda47SBarry Smith PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof; 23197ba1a0bfSKris Buschelman PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 23207ba1a0bfSKris Buschelman MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 23217ba1a0bfSKris Buschelman 23227ba1a0bfSKris Buschelman PetscFunctionBegin; 23237ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 23247ba1a0bfSKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 23257ba1a0bfSKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 23267ba1a0bfSKris Buschelman 23277ba1a0bfSKris Buschelman apj = (PetscInt *)(apa + cn); 23287ba1a0bfSKris Buschelman apjdense = apj + cn; 23297ba1a0bfSKris Buschelman 23307ba1a0bfSKris Buschelman /* Clear old values in C */ 23317ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 23327ba1a0bfSKris Buschelman 23337ba1a0bfSKris Buschelman for (i=0;i<am;i++) { 23347ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 23357ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 23367ba1a0bfSKris Buschelman apnzj = 0; 23377ba1a0bfSKris Buschelman for (j=0;j<anzi;j++) { 23387ba1a0bfSKris Buschelman /* Get offset within block of P */ 23397ba1a0bfSKris Buschelman pshift = *aj%ppdof; 23407ba1a0bfSKris Buschelman /* Get block row of P */ 23417ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 23427ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 23437ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 23447ba1a0bfSKris Buschelman paj = pa + pi[prow]; 23457ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 23467ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 23477ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 23487ba1a0bfSKris Buschelman apjdense[poffset] = -1; 23497ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 23507ba1a0bfSKris Buschelman } 23517ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 23527ba1a0bfSKris Buschelman } 23537ba1a0bfSKris Buschelman flops += 2*pnzj; 23547ba1a0bfSKris Buschelman aa++; 23557ba1a0bfSKris Buschelman } 23567ba1a0bfSKris Buschelman 23577ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 23587ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 23597ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 23607ba1a0bfSKris Buschelman 23617ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 23627ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 23637ba1a0bfSKris Buschelman pshift = i%ppdof; 23647ba1a0bfSKris Buschelman poffset = pi[prow]; 23657ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 23667ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 23677ba1a0bfSKris Buschelman pJ = pj+poffset; 23687ba1a0bfSKris Buschelman pA = pa+poffset; 23697ba1a0bfSKris Buschelman for (j=0;j<pnzi;j++) { 23707ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 23717ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 23727ba1a0bfSKris Buschelman caj = ca + ci[crow]; 23737ba1a0bfSKris Buschelman pJ++; 23747ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 23757ba1a0bfSKris Buschelman for (k=0,nextap=0;nextap<apnzj;k++) { 23767ba1a0bfSKris Buschelman if (cjj[k]==apj[nextap]) { 23777ba1a0bfSKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 23787ba1a0bfSKris Buschelman } 23797ba1a0bfSKris Buschelman } 23807ba1a0bfSKris Buschelman flops += 2*apnzj; 23817ba1a0bfSKris Buschelman pA++; 23827ba1a0bfSKris Buschelman } 23837ba1a0bfSKris Buschelman 23847ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 23857ba1a0bfSKris Buschelman for (j=0;j<apnzj;j++) { 23867ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 23877ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 23887ba1a0bfSKris Buschelman } 23897ba1a0bfSKris Buschelman } 23907ba1a0bfSKris Buschelman 23917ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 23927ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 23937ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 23947ba1a0bfSKris Buschelman ierr = PetscFree(apa);CHKERRQ(ierr); 23957ba1a0bfSKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 23967ba1a0bfSKris Buschelman 23977ba1a0bfSKris Buschelman PetscFunctionReturn(0); 23987ba1a0bfSKris Buschelman } 23997ba1a0bfSKris Buschelman 2400be1d678aSKris Buschelman EXTERN_C_BEGIN 24017ba1a0bfSKris Buschelman #undef __FUNCT__ 24020fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2403f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 24049c4fc4c7SBarry Smith { 24059c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2406ceb03754SKris Buschelman Mat a = b->AIJ,B; 24079c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 24089c4fc4c7SBarry Smith PetscErrorCode ierr; 24090fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2410ba8c8a56SBarry Smith PetscInt *cols; 2411ba8c8a56SBarry Smith PetscScalar *vals; 24129c4fc4c7SBarry Smith 24139c4fc4c7SBarry Smith PetscFunctionBegin; 24149c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 24157c307921SBarry Smith ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 24169c4fc4c7SBarry Smith for (i=0; i<m; i++) { 24179c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 24180fd73130SBarry Smith for (j=0; j<dof; j++) { 24190fd73130SBarry Smith ilen[dof*i+j] = aij->ilen[i]; 24209c4fc4c7SBarry Smith } 24219c4fc4c7SBarry Smith } 2422ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2423ceb03754SKris Buschelman ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 24249c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 24259c4fc4c7SBarry Smith ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 24269c4fc4c7SBarry Smith ii = 0; 24279c4fc4c7SBarry Smith for (i=0; i<m; i++) { 2428ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24290fd73130SBarry Smith for (j=0; j<dof; j++) { 24309c4fc4c7SBarry Smith for (k=0; k<ncols; k++) { 24310fd73130SBarry Smith icols[k] = dof*cols[k]+j; 24329c4fc4c7SBarry Smith } 2433ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 24349c4fc4c7SBarry Smith ii++; 24359c4fc4c7SBarry Smith } 2436ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24379c4fc4c7SBarry Smith } 24389c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 2439ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2440ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2441ceb03754SKris Buschelman 2442ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 24438ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2444c3d102feSKris Buschelman } else { 2445ceb03754SKris Buschelman *newmat = B; 2446c3d102feSKris Buschelman } 24479c4fc4c7SBarry Smith PetscFunctionReturn(0); 24489c4fc4c7SBarry Smith } 2449be1d678aSKris Buschelman EXTERN_C_END 24509c4fc4c7SBarry Smith 24510fd73130SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 2452be1d678aSKris Buschelman 2453be1d678aSKris Buschelman EXTERN_C_BEGIN 24540fd73130SBarry Smith #undef __FUNCT__ 24550fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2456f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 24570fd73130SBarry Smith { 24580fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2459ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 24600fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 24610fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 24620fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 24630fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2464910ba992SMatthew Knepley PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2465910ba992SMatthew Knepley PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 24660fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 24670fd73130SBarry Smith PetscErrorCode ierr; 24680fd73130SBarry Smith PetscScalar *vals,*ovals; 24690fd73130SBarry Smith 24700fd73130SBarry Smith PetscFunctionBegin; 2471899cda47SBarry Smith ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr); 2472899cda47SBarry Smith for (i=0; i<A->rmap.n/dof; i++) { 24730fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 24740fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 24750fd73130SBarry Smith for (j=0; j<dof; j++) { 24760fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 24770fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 24780fd73130SBarry Smith } 24790fd73130SBarry Smith } 2480899cda47SBarry Smith ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2481ceb03754SKris Buschelman ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 24820fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 24830fd73130SBarry Smith 24847a1a7badSBarry Smith ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 24859b5a856fSSatish Balay rstart = dof*maij->A->rmap.rstart; 24869b5a856fSSatish Balay cstart = dof*maij->A->cmap.rstart; 24870fd73130SBarry Smith garray = mpiaij->garray; 24880fd73130SBarry Smith 24890fd73130SBarry Smith ii = rstart; 2490899cda47SBarry Smith for (i=0; i<A->rmap.n/dof; i++) { 24910fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24920fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 24930fd73130SBarry Smith for (j=0; j<dof; j++) { 24940fd73130SBarry Smith for (k=0; k<ncols; k++) { 24950fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 24960fd73130SBarry Smith } 24970fd73130SBarry Smith for (k=0; k<oncols; k++) { 24980fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 24990fd73130SBarry Smith } 2500ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2501ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 25020fd73130SBarry Smith ii++; 25030fd73130SBarry Smith } 25040fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 25050fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 25060fd73130SBarry Smith } 25070fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 25080fd73130SBarry Smith 2509ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2510ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2511ceb03754SKris Buschelman 2512ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 25138ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2514c3d102feSKris Buschelman } else { 2515ceb03754SKris Buschelman *newmat = B; 2516c3d102feSKris Buschelman } 25170fd73130SBarry Smith PetscFunctionReturn(0); 25180fd73130SBarry Smith } 2519be1d678aSKris Buschelman EXTERN_C_END 25200fd73130SBarry Smith 25210fd73130SBarry Smith 2522bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 25235983afb6SSatish Balay /*MC 25240bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 25250bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 25260bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 25270bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 25280bad9183SKris Buschelman 25290bad9183SKris Buschelman Operations provided: 25300fd73130SBarry Smith + MatMult 25310bad9183SKris Buschelman . MatMultTranspose 25320bad9183SKris Buschelman . MatMultAdd 25330bad9183SKris Buschelman . MatMultTransposeAdd 25340fd73130SBarry Smith - MatView 25350bad9183SKris Buschelman 25360bad9183SKris Buschelman Level: advanced 25370bad9183SKris Buschelman 25380bad9183SKris Buschelman M*/ 25394a2ae208SSatish Balay #undef __FUNCT__ 25404a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 2541be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 254282b1193eSBarry Smith { 2543dfbe8321SBarry Smith PetscErrorCode ierr; 2544b24ad042SBarry Smith PetscMPIInt size; 2545b24ad042SBarry Smith PetscInt n; 25464cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 254782b1193eSBarry Smith Mat B; 254882b1193eSBarry Smith 254982b1193eSBarry Smith PetscFunctionBegin; 2550d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2551d72c5c08SBarry Smith 2552d72c5c08SBarry Smith if (dof == 1) { 2553d72c5c08SBarry Smith *maij = A; 2554d72c5c08SBarry Smith } else { 2555f69a0ea3SMatthew Knepley ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 2556899cda47SBarry Smith ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr); 2557cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 2558d72c5c08SBarry Smith 2559f4a53059SBarry Smith ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2560f4a53059SBarry Smith if (size == 1) { 2561b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 25624cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 25630fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 2564b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2565b9b97703SBarry Smith b->dof = dof; 25664cb9d9b8SBarry Smith b->AIJ = A; 2567d72c5c08SBarry Smith if (dof == 2) { 2568cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 2569cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2570cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2571cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2572bcc973b7SBarry Smith } else if (dof == 3) { 2573bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 2574bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2575bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2576bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2577bcc973b7SBarry Smith } else if (dof == 4) { 2578bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 2579bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2580bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2581bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2582f9fae5adSBarry Smith } else if (dof == 5) { 2583f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 2584f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2585f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2586f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 25876cd98798SBarry Smith } else if (dof == 6) { 25886cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 25896cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 25906cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 25916cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2592ed8eea03SBarry Smith } else if (dof == 7) { 2593ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 2594ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2595ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2596ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 259766ed3db0SBarry Smith } else if (dof == 8) { 259866ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 259966ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 260066ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 260166ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 26022b692628SMatthew Knepley } else if (dof == 9) { 26032b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 26042b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 26052b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 26062b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2607b9cda22cSBarry Smith } else if (dof == 10) { 2608b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 2609b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2610b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2611b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 26122f7816d4SBarry Smith } else if (dof == 16) { 26132f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 26142f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 26152f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 26162f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 261782b1193eSBarry Smith } else { 261877431f27SBarry Smith SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 261982b1193eSBarry Smith } 26207ba1a0bfSKris Buschelman B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 26217ba1a0bfSKris Buschelman B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 26229c4fc4c7SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2623f4a53059SBarry Smith } else { 2624f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2625f4a53059SBarry Smith IS from,to; 2626f4a53059SBarry Smith Vec gvec; 2627b24ad042SBarry Smith PetscInt *garray,i; 2628f4a53059SBarry Smith 2629b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2630d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 26310fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 2632b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2633b9b97703SBarry Smith b->dof = dof; 2634b9b97703SBarry Smith b->A = A; 26354cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 26364cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 26374cb9d9b8SBarry Smith 2638f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2639f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 264013288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2641f4a53059SBarry Smith 2642f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 2643b24ad042SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2644f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2645f4a53059SBarry Smith ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2646f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 2647f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2648f4a53059SBarry Smith 2649f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 2650899cda47SBarry Smith ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr); 265113288a74SBarry Smith ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2652f4a53059SBarry Smith 2653f4a53059SBarry Smith /* generate the scatter context */ 2654f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2655f4a53059SBarry Smith 2656f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 2657f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 2658f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 2659f4a53059SBarry Smith 2660f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 26614cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 26624cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 26634cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 26640fd73130SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2665f4a53059SBarry Smith } 2666cd3bbe55SBarry Smith *maij = B; 26670fd73130SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 2668d72c5c08SBarry Smith } 266982b1193eSBarry Smith PetscFunctionReturn(0); 267082b1193eSBarry Smith } 267182b1193eSBarry Smith 267282b1193eSBarry Smith 267382b1193eSBarry Smith 267482b1193eSBarry Smith 267582b1193eSBarry Smith 267682b1193eSBarry Smith 267782b1193eSBarry Smith 267882b1193eSBarry Smith 267982b1193eSBarry Smith 268082b1193eSBarry Smith 268182b1193eSBarry Smith 268282b1193eSBarry Smith 2683