1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 382b1193eSBarry Smith /* 4cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 5cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 6cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 7cd3bbe55SBarry Smith independently. 8cd3bbe55SBarry Smith 9cd3bbe55SBarry Smith We provide: 10cd3bbe55SBarry Smith MatMult() 11cd3bbe55SBarry Smith MatMultTranspose() 12cd3bbe55SBarry Smith MatMultTransposeAdd() 13cd3bbe55SBarry Smith MatMultAdd() 14cd3bbe55SBarry Smith and 15cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 16f4a53059SBarry Smith 17f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1882b1193eSBarry Smith */ 1982b1193eSBarry Smith 20be911618SKris Buschelman #include "src/mat/impls/maij/maij.h" 217ba1a0bfSKris Buschelman #include "src/mat/utils/freespace.h" 221d8d5f9aSSatish Balay #include "private/vecimpl.h" 2382b1193eSBarry Smith 244a2ae208SSatish Balay #undef __FUNCT__ 254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 26be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27b9b97703SBarry Smith { 28dfbe8321SBarry Smith PetscErrorCode ierr; 29b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 30b9b97703SBarry Smith 31b9b97703SBarry Smith PetscFunctionBegin; 32b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34b9b97703SBarry Smith if (ismpimaij) { 35b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->A; 38b9b97703SBarry Smith } else if (isseqmaij) { 39b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40b9b97703SBarry Smith 41b9b97703SBarry Smith *B = b->AIJ; 42b9b97703SBarry Smith } else { 43b9b97703SBarry Smith *B = A; 44b9b97703SBarry Smith } 45b9b97703SBarry Smith PetscFunctionReturn(0); 46b9b97703SBarry Smith } 47b9b97703SBarry Smith 484a2ae208SSatish Balay #undef __FUNCT__ 494a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 50be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51b9b97703SBarry Smith { 52dfbe8321SBarry Smith PetscErrorCode ierr; 53b9b97703SBarry Smith Mat Aij; 54b9b97703SBarry Smith 55b9b97703SBarry Smith PetscFunctionBegin; 56b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58b9b97703SBarry Smith PetscFunctionReturn(0); 59b9b97703SBarry Smith } 60b9b97703SBarry Smith 614a2ae208SSatish Balay #undef __FUNCT__ 624a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 63dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 6482b1193eSBarry Smith { 65dfbe8321SBarry Smith PetscErrorCode ierr; 664cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6782b1193eSBarry Smith 6882b1193eSBarry Smith PetscFunctionBegin; 69cd3bbe55SBarry Smith if (b->AIJ) { 70cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 7182b1193eSBarry Smith } 724cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 734cb9d9b8SBarry Smith PetscFunctionReturn(0); 744cb9d9b8SBarry Smith } 754cb9d9b8SBarry Smith 764a2ae208SSatish Balay #undef __FUNCT__ 770fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 780fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 790fd73130SBarry Smith { 800fd73130SBarry Smith PetscErrorCode ierr; 810fd73130SBarry Smith Mat B; 820fd73130SBarry Smith 830fd73130SBarry Smith PetscFunctionBegin; 84ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 850fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 860fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 870fd73130SBarry Smith PetscFunctionReturn(0); 880fd73130SBarry Smith } 890fd73130SBarry Smith 900fd73130SBarry Smith #undef __FUNCT__ 910fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 920fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 930fd73130SBarry Smith { 940fd73130SBarry Smith PetscErrorCode ierr; 950fd73130SBarry Smith Mat B; 960fd73130SBarry Smith 970fd73130SBarry Smith PetscFunctionBegin; 98ceb03754SKris Buschelman ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 990fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1000fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1010fd73130SBarry Smith PetscFunctionReturn(0); 1020fd73130SBarry Smith } 1030fd73130SBarry Smith 1040fd73130SBarry Smith #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 106dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1074cb9d9b8SBarry Smith { 108dfbe8321SBarry Smith PetscErrorCode ierr; 1094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1104cb9d9b8SBarry Smith 1114cb9d9b8SBarry Smith PetscFunctionBegin; 1124cb9d9b8SBarry Smith if (b->AIJ) { 1134cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 1144cb9d9b8SBarry Smith } 115f4a53059SBarry Smith if (b->OAIJ) { 116f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117f4a53059SBarry Smith } 118b9b97703SBarry Smith if (b->A) { 119b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 120b9b97703SBarry Smith } 121f4a53059SBarry Smith if (b->ctx) { 122f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123f4a53059SBarry Smith } 124f4a53059SBarry Smith if (b->w) { 125f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 126f4a53059SBarry Smith } 127cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 128dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 129b9b97703SBarry Smith PetscFunctionReturn(0); 130b9b97703SBarry Smith } 131b9b97703SBarry Smith 1320bad9183SKris Buschelman /*MC 133fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1340bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1350bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1360bad9183SKris Buschelman 1370bad9183SKris Buschelman Operations provided: 1380bad9183SKris Buschelman . MatMult 1390bad9183SKris Buschelman . MatMultTranspose 1400bad9183SKris Buschelman . MatMultAdd 1410bad9183SKris Buschelman . MatMultTransposeAdd 1420bad9183SKris Buschelman 1430bad9183SKris Buschelman Level: advanced 1440bad9183SKris Buschelman 1450bad9183SKris Buschelman .seealso: MatCreateSeqDense 1460bad9183SKris Buschelman M*/ 1470bad9183SKris Buschelman 14882b1193eSBarry Smith EXTERN_C_BEGIN 1494a2ae208SSatish Balay #undef __FUNCT__ 1504a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 151be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 15282b1193eSBarry Smith { 153dfbe8321SBarry Smith PetscErrorCode ierr; 1544cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15564b52464SHong Zhang PetscMPIInt size; 15682b1193eSBarry Smith 15782b1193eSBarry Smith PetscFunctionBegin; 158b0a32e0cSBarry Smith ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 159b0a32e0cSBarry Smith A->data = (void*)b; 160cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 161cd3bbe55SBarry Smith A->factor = 0; 162cd3bbe55SBarry Smith A->mapping = 0; 163f4a53059SBarry Smith 164cd3bbe55SBarry Smith b->AIJ = 0; 165cd3bbe55SBarry Smith b->dof = 0; 166f4a53059SBarry Smith b->OAIJ = 0; 167f4a53059SBarry Smith b->ctx = 0; 168f4a53059SBarry Smith b->w = 0; 16964b52464SHong Zhang ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 17064b52464SHong Zhang if (size == 1){ 17164b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 17264b52464SHong Zhang } else { 17364b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 17464b52464SHong Zhang } 17582b1193eSBarry Smith PetscFunctionReturn(0); 17682b1193eSBarry Smith } 17782b1193eSBarry Smith EXTERN_C_END 17882b1193eSBarry Smith 179cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1804a2ae208SSatish Balay #undef __FUNCT__ 181b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 182dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 18382b1193eSBarry Smith { 1844cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 185bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 187dfbe8321SBarry Smith PetscErrorCode ierr; 188899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 189b24ad042SBarry Smith PetscInt n,i,jrow,j; 19082b1193eSBarry Smith 191bcc973b7SBarry Smith PetscFunctionBegin; 1921ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1931ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 194bcc973b7SBarry Smith idx = a->j; 195bcc973b7SBarry Smith v = a->a; 196bcc973b7SBarry Smith ii = a->i; 197bcc973b7SBarry Smith 198bcc973b7SBarry Smith for (i=0; i<m; i++) { 199bcc973b7SBarry Smith jrow = ii[i]; 200bcc973b7SBarry Smith n = ii[i+1] - jrow; 201bcc973b7SBarry Smith sum1 = 0.0; 202bcc973b7SBarry Smith sum2 = 0.0; 203bcc973b7SBarry Smith for (j=0; j<n; j++) { 204bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 205bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 206bcc973b7SBarry Smith jrow++; 207bcc973b7SBarry Smith } 208bcc973b7SBarry Smith y[2*i] = sum1; 209bcc973b7SBarry Smith y[2*i+1] = sum2; 210bcc973b7SBarry Smith } 211bcc973b7SBarry Smith 212efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 2131ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2141ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 21582b1193eSBarry Smith PetscFunctionReturn(0); 21682b1193eSBarry Smith } 217bcc973b7SBarry Smith 2184a2ae208SSatish Balay #undef __FUNCT__ 219b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 220dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 22182b1193eSBarry Smith { 2224cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 223bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 22487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 225dfbe8321SBarry Smith PetscErrorCode ierr; 226899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 22782b1193eSBarry Smith 228bcc973b7SBarry Smith PetscFunctionBegin; 2292dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 2301ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2311ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 232bfec09a0SHong Zhang 233bcc973b7SBarry Smith for (i=0; i<m; i++) { 234bfec09a0SHong Zhang idx = a->j + a->i[i] ; 235bfec09a0SHong Zhang v = a->a + a->i[i] ; 236bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 237bcc973b7SBarry Smith alpha1 = x[2*i]; 238bcc973b7SBarry Smith alpha2 = x[2*i+1]; 239bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 240bcc973b7SBarry Smith } 241899cda47SBarry Smith ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 2421ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2431ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 24482b1193eSBarry Smith PetscFunctionReturn(0); 24582b1193eSBarry Smith } 246bcc973b7SBarry Smith 2474a2ae208SSatish Balay #undef __FUNCT__ 248b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 249dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 25082b1193eSBarry Smith { 2514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 252bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 25387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 254dfbe8321SBarry Smith PetscErrorCode ierr; 255899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 256b24ad042SBarry Smith PetscInt n,i,jrow,j; 25782b1193eSBarry Smith 258bcc973b7SBarry Smith PetscFunctionBegin; 259f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2601ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2611ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 262bcc973b7SBarry Smith idx = a->j; 263bcc973b7SBarry Smith v = a->a; 264bcc973b7SBarry Smith ii = a->i; 265bcc973b7SBarry Smith 266bcc973b7SBarry Smith for (i=0; i<m; i++) { 267bcc973b7SBarry Smith jrow = ii[i]; 268bcc973b7SBarry Smith n = ii[i+1] - jrow; 269bcc973b7SBarry Smith sum1 = 0.0; 270bcc973b7SBarry Smith sum2 = 0.0; 271bcc973b7SBarry Smith for (j=0; j<n; j++) { 272bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 273bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 274bcc973b7SBarry Smith jrow++; 275bcc973b7SBarry Smith } 276bcc973b7SBarry Smith y[2*i] += sum1; 277bcc973b7SBarry Smith y[2*i+1] += sum2; 278bcc973b7SBarry Smith } 279bcc973b7SBarry Smith 280efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 2811ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2821ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 283bcc973b7SBarry Smith PetscFunctionReturn(0); 28482b1193eSBarry Smith } 2854a2ae208SSatish Balay #undef __FUNCT__ 286b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 287dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 28882b1193eSBarry Smith { 2894cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 290bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 29187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 292dfbe8321SBarry Smith PetscErrorCode ierr; 293899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 29482b1193eSBarry Smith 295bcc973b7SBarry Smith PetscFunctionBegin; 296f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2971ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2981ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 299bfec09a0SHong Zhang 300bcc973b7SBarry Smith for (i=0; i<m; i++) { 301bfec09a0SHong Zhang idx = a->j + a->i[i] ; 302bfec09a0SHong Zhang v = a->a + a->i[i] ; 303bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 304bcc973b7SBarry Smith alpha1 = x[2*i]; 305bcc973b7SBarry Smith alpha2 = x[2*i+1]; 306bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 307bcc973b7SBarry Smith } 308899cda47SBarry Smith ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 3091ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3101ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 311bcc973b7SBarry Smith PetscFunctionReturn(0); 31282b1193eSBarry Smith } 313cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3144a2ae208SSatish Balay #undef __FUNCT__ 315b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 316dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 317bcc973b7SBarry Smith { 3184cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 319bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 32087828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 321dfbe8321SBarry Smith PetscErrorCode ierr; 322899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 323b24ad042SBarry Smith PetscInt n,i,jrow,j; 32482b1193eSBarry Smith 325bcc973b7SBarry Smith PetscFunctionBegin; 3261ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3271ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 328bcc973b7SBarry Smith idx = a->j; 329bcc973b7SBarry Smith v = a->a; 330bcc973b7SBarry Smith ii = a->i; 331bcc973b7SBarry Smith 332bcc973b7SBarry Smith for (i=0; i<m; i++) { 333bcc973b7SBarry Smith jrow = ii[i]; 334bcc973b7SBarry Smith n = ii[i+1] - jrow; 335bcc973b7SBarry Smith sum1 = 0.0; 336bcc973b7SBarry Smith sum2 = 0.0; 337bcc973b7SBarry Smith sum3 = 0.0; 338bcc973b7SBarry Smith for (j=0; j<n; j++) { 339bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 340bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 341bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 342bcc973b7SBarry Smith jrow++; 343bcc973b7SBarry Smith } 344bcc973b7SBarry Smith y[3*i] = sum1; 345bcc973b7SBarry Smith y[3*i+1] = sum2; 346bcc973b7SBarry Smith y[3*i+2] = sum3; 347bcc973b7SBarry Smith } 348bcc973b7SBarry Smith 349efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 3501ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3511ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 352bcc973b7SBarry Smith PetscFunctionReturn(0); 353bcc973b7SBarry Smith } 354bcc973b7SBarry Smith 3554a2ae208SSatish Balay #undef __FUNCT__ 356b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 357dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 358bcc973b7SBarry Smith { 3594cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 360bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 36187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 362dfbe8321SBarry Smith PetscErrorCode ierr; 363899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 364bcc973b7SBarry Smith 365bcc973b7SBarry Smith PetscFunctionBegin; 3662dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 3671ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3681ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 369bfec09a0SHong Zhang 370bcc973b7SBarry Smith for (i=0; i<m; i++) { 371bfec09a0SHong Zhang idx = a->j + a->i[i]; 372bfec09a0SHong Zhang v = a->a + a->i[i]; 373bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 374bcc973b7SBarry Smith alpha1 = x[3*i]; 375bcc973b7SBarry Smith alpha2 = x[3*i+1]; 376bcc973b7SBarry Smith alpha3 = x[3*i+2]; 377bcc973b7SBarry Smith while (n-->0) { 378bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 379bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 380bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 381bcc973b7SBarry Smith idx++; v++; 382bcc973b7SBarry Smith } 383bcc973b7SBarry Smith } 384899cda47SBarry Smith ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr); 3851ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3861ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 387bcc973b7SBarry Smith PetscFunctionReturn(0); 388bcc973b7SBarry Smith } 389bcc973b7SBarry Smith 3904a2ae208SSatish Balay #undef __FUNCT__ 391b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 392dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 393bcc973b7SBarry Smith { 3944cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 395bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 39687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 397dfbe8321SBarry Smith PetscErrorCode ierr; 398899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 399b24ad042SBarry Smith PetscInt n,i,jrow,j; 400bcc973b7SBarry Smith 401bcc973b7SBarry Smith PetscFunctionBegin; 402f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4031ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4041ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 405bcc973b7SBarry Smith idx = a->j; 406bcc973b7SBarry Smith v = a->a; 407bcc973b7SBarry Smith ii = a->i; 408bcc973b7SBarry Smith 409bcc973b7SBarry Smith for (i=0; i<m; i++) { 410bcc973b7SBarry Smith jrow = ii[i]; 411bcc973b7SBarry Smith n = ii[i+1] - jrow; 412bcc973b7SBarry Smith sum1 = 0.0; 413bcc973b7SBarry Smith sum2 = 0.0; 414bcc973b7SBarry Smith sum3 = 0.0; 415bcc973b7SBarry Smith for (j=0; j<n; j++) { 416bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 417bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 418bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 419bcc973b7SBarry Smith jrow++; 420bcc973b7SBarry Smith } 421bcc973b7SBarry Smith y[3*i] += sum1; 422bcc973b7SBarry Smith y[3*i+1] += sum2; 423bcc973b7SBarry Smith y[3*i+2] += sum3; 424bcc973b7SBarry Smith } 425bcc973b7SBarry Smith 426efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4271ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4281ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 429bcc973b7SBarry Smith PetscFunctionReturn(0); 430bcc973b7SBarry Smith } 4314a2ae208SSatish Balay #undef __FUNCT__ 432b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 433dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 434bcc973b7SBarry Smith { 4354cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 436bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 43787828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 438dfbe8321SBarry Smith PetscErrorCode ierr; 439899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 440bcc973b7SBarry Smith 441bcc973b7SBarry Smith PetscFunctionBegin; 442f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4431ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4441ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 445bcc973b7SBarry Smith for (i=0; i<m; i++) { 446bfec09a0SHong Zhang idx = a->j + a->i[i] ; 447bfec09a0SHong Zhang v = a->a + a->i[i] ; 448bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 449bcc973b7SBarry Smith alpha1 = x[3*i]; 450bcc973b7SBarry Smith alpha2 = x[3*i+1]; 451bcc973b7SBarry Smith alpha3 = x[3*i+2]; 452bcc973b7SBarry Smith while (n-->0) { 453bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 454bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 455bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 456bcc973b7SBarry Smith idx++; v++; 457bcc973b7SBarry Smith } 458bcc973b7SBarry Smith } 459efee365bSSatish Balay ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 4601ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4611ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 462bcc973b7SBarry Smith PetscFunctionReturn(0); 463bcc973b7SBarry Smith } 464bcc973b7SBarry Smith 465bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4664a2ae208SSatish Balay #undef __FUNCT__ 467b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 468dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 469bcc973b7SBarry Smith { 4704cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 471bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 47287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 473dfbe8321SBarry Smith PetscErrorCode ierr; 474899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 475b24ad042SBarry Smith PetscInt n,i,jrow,j; 476bcc973b7SBarry Smith 477bcc973b7SBarry Smith PetscFunctionBegin; 4781ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4791ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 480bcc973b7SBarry Smith idx = a->j; 481bcc973b7SBarry Smith v = a->a; 482bcc973b7SBarry Smith ii = a->i; 483bcc973b7SBarry Smith 484bcc973b7SBarry Smith for (i=0; i<m; i++) { 485bcc973b7SBarry Smith jrow = ii[i]; 486bcc973b7SBarry Smith n = ii[i+1] - jrow; 487bcc973b7SBarry Smith sum1 = 0.0; 488bcc973b7SBarry Smith sum2 = 0.0; 489bcc973b7SBarry Smith sum3 = 0.0; 490bcc973b7SBarry Smith sum4 = 0.0; 491bcc973b7SBarry Smith for (j=0; j<n; j++) { 492bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 493bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 494bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 495bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 496bcc973b7SBarry Smith jrow++; 497bcc973b7SBarry Smith } 498bcc973b7SBarry Smith y[4*i] = sum1; 499bcc973b7SBarry Smith y[4*i+1] = sum2; 500bcc973b7SBarry Smith y[4*i+2] = sum3; 501bcc973b7SBarry Smith y[4*i+3] = sum4; 502bcc973b7SBarry Smith } 503bcc973b7SBarry Smith 504efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 5051ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5061ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 507bcc973b7SBarry Smith PetscFunctionReturn(0); 508bcc973b7SBarry Smith } 509bcc973b7SBarry Smith 5104a2ae208SSatish Balay #undef __FUNCT__ 511b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 512dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 513bcc973b7SBarry Smith { 5144cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 515bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 51687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 517dfbe8321SBarry Smith PetscErrorCode ierr; 518899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 519bcc973b7SBarry Smith 520bcc973b7SBarry Smith PetscFunctionBegin; 5212dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 5221ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5231ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 524bcc973b7SBarry Smith for (i=0; i<m; i++) { 525bfec09a0SHong Zhang idx = a->j + a->i[i] ; 526bfec09a0SHong Zhang v = a->a + a->i[i] ; 527bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 528bcc973b7SBarry Smith alpha1 = x[4*i]; 529bcc973b7SBarry Smith alpha2 = x[4*i+1]; 530bcc973b7SBarry Smith alpha3 = x[4*i+2]; 531bcc973b7SBarry Smith alpha4 = x[4*i+3]; 532bcc973b7SBarry Smith while (n-->0) { 533bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 534bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 535bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 536bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 537bcc973b7SBarry Smith idx++; v++; 538bcc973b7SBarry Smith } 539bcc973b7SBarry Smith } 540899cda47SBarry Smith ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 5411ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5421ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 543bcc973b7SBarry Smith PetscFunctionReturn(0); 544bcc973b7SBarry Smith } 545bcc973b7SBarry Smith 5464a2ae208SSatish Balay #undef __FUNCT__ 547b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 548dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 549bcc973b7SBarry Smith { 5504cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 551f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 55287828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 553dfbe8321SBarry Smith PetscErrorCode ierr; 554899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 555b24ad042SBarry Smith PetscInt n,i,jrow,j; 556f9fae5adSBarry Smith 557f9fae5adSBarry Smith PetscFunctionBegin; 558f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5591ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5601ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 561f9fae5adSBarry Smith idx = a->j; 562f9fae5adSBarry Smith v = a->a; 563f9fae5adSBarry Smith ii = a->i; 564f9fae5adSBarry Smith 565f9fae5adSBarry Smith for (i=0; i<m; i++) { 566f9fae5adSBarry Smith jrow = ii[i]; 567f9fae5adSBarry Smith n = ii[i+1] - jrow; 568f9fae5adSBarry Smith sum1 = 0.0; 569f9fae5adSBarry Smith sum2 = 0.0; 570f9fae5adSBarry Smith sum3 = 0.0; 571f9fae5adSBarry Smith sum4 = 0.0; 572f9fae5adSBarry Smith for (j=0; j<n; j++) { 573f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 574f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 575f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 576f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 577f9fae5adSBarry Smith jrow++; 578f9fae5adSBarry Smith } 579f9fae5adSBarry Smith y[4*i] += sum1; 580f9fae5adSBarry Smith y[4*i+1] += sum2; 581f9fae5adSBarry Smith y[4*i+2] += sum3; 582f9fae5adSBarry Smith y[4*i+3] += sum4; 583f9fae5adSBarry Smith } 584f9fae5adSBarry Smith 585efee365bSSatish Balay ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 5861ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5871ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 588f9fae5adSBarry Smith PetscFunctionReturn(0); 589bcc973b7SBarry Smith } 5904a2ae208SSatish Balay #undef __FUNCT__ 591b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 592dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 593bcc973b7SBarry Smith { 5944cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 595f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 59687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 597dfbe8321SBarry Smith PetscErrorCode ierr; 598899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 599f9fae5adSBarry Smith 600f9fae5adSBarry Smith PetscFunctionBegin; 601f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6021ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6031ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 604bfec09a0SHong Zhang 605f9fae5adSBarry Smith for (i=0; i<m; i++) { 606bfec09a0SHong Zhang idx = a->j + a->i[i] ; 607bfec09a0SHong Zhang v = a->a + a->i[i] ; 608f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 609f9fae5adSBarry Smith alpha1 = x[4*i]; 610f9fae5adSBarry Smith alpha2 = x[4*i+1]; 611f9fae5adSBarry Smith alpha3 = x[4*i+2]; 612f9fae5adSBarry Smith alpha4 = x[4*i+3]; 613f9fae5adSBarry Smith while (n-->0) { 614f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 615f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 616f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 617f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 618f9fae5adSBarry Smith idx++; v++; 619f9fae5adSBarry Smith } 620f9fae5adSBarry Smith } 621899cda47SBarry Smith ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 6221ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6231ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 624f9fae5adSBarry Smith PetscFunctionReturn(0); 625f9fae5adSBarry Smith } 626f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6276cd98798SBarry Smith 6284a2ae208SSatish Balay #undef __FUNCT__ 629b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 630dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 631f9fae5adSBarry Smith { 6324cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 633f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 63487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 635dfbe8321SBarry Smith PetscErrorCode ierr; 636899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 637b24ad042SBarry Smith PetscInt n,i,jrow,j; 638f9fae5adSBarry Smith 639f9fae5adSBarry Smith PetscFunctionBegin; 6401ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6411ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 642f9fae5adSBarry Smith idx = a->j; 643f9fae5adSBarry Smith v = a->a; 644f9fae5adSBarry Smith ii = a->i; 645f9fae5adSBarry Smith 646f9fae5adSBarry Smith for (i=0; i<m; i++) { 647f9fae5adSBarry Smith jrow = ii[i]; 648f9fae5adSBarry Smith n = ii[i+1] - jrow; 649f9fae5adSBarry Smith sum1 = 0.0; 650f9fae5adSBarry Smith sum2 = 0.0; 651f9fae5adSBarry Smith sum3 = 0.0; 652f9fae5adSBarry Smith sum4 = 0.0; 653f9fae5adSBarry Smith sum5 = 0.0; 654f9fae5adSBarry Smith for (j=0; j<n; j++) { 655f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 656f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 657f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 658f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 659f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 660f9fae5adSBarry Smith jrow++; 661f9fae5adSBarry Smith } 662f9fae5adSBarry Smith y[5*i] = sum1; 663f9fae5adSBarry Smith y[5*i+1] = sum2; 664f9fae5adSBarry Smith y[5*i+2] = sum3; 665f9fae5adSBarry Smith y[5*i+3] = sum4; 666f9fae5adSBarry Smith y[5*i+4] = sum5; 667f9fae5adSBarry Smith } 668f9fae5adSBarry Smith 669efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 6701ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6711ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 672f9fae5adSBarry Smith PetscFunctionReturn(0); 673f9fae5adSBarry Smith } 674f9fae5adSBarry Smith 6754a2ae208SSatish Balay #undef __FUNCT__ 676b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 677dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 678f9fae5adSBarry Smith { 6794cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 680f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 68187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 682dfbe8321SBarry Smith PetscErrorCode ierr; 683899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 684f9fae5adSBarry Smith 685f9fae5adSBarry Smith PetscFunctionBegin; 6862dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 6871ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6881ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 689bfec09a0SHong Zhang 690f9fae5adSBarry Smith for (i=0; i<m; i++) { 691bfec09a0SHong Zhang idx = a->j + a->i[i] ; 692bfec09a0SHong Zhang v = a->a + a->i[i] ; 693f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 694f9fae5adSBarry Smith alpha1 = x[5*i]; 695f9fae5adSBarry Smith alpha2 = x[5*i+1]; 696f9fae5adSBarry Smith alpha3 = x[5*i+2]; 697f9fae5adSBarry Smith alpha4 = x[5*i+3]; 698f9fae5adSBarry Smith alpha5 = x[5*i+4]; 699f9fae5adSBarry Smith while (n-->0) { 700f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 701f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 702f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 703f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 704f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 705f9fae5adSBarry Smith idx++; v++; 706f9fae5adSBarry Smith } 707f9fae5adSBarry Smith } 708899cda47SBarry Smith ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr); 7091ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7101ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 711f9fae5adSBarry Smith PetscFunctionReturn(0); 712f9fae5adSBarry Smith } 713f9fae5adSBarry Smith 7144a2ae208SSatish Balay #undef __FUNCT__ 715b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 716dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 717f9fae5adSBarry Smith { 7184cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 719f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 72087828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 721dfbe8321SBarry Smith PetscErrorCode ierr; 722899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 723b24ad042SBarry Smith PetscInt n,i,jrow,j; 724f9fae5adSBarry Smith 725f9fae5adSBarry Smith PetscFunctionBegin; 726f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7271ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7281ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 729f9fae5adSBarry Smith idx = a->j; 730f9fae5adSBarry Smith v = a->a; 731f9fae5adSBarry Smith ii = a->i; 732f9fae5adSBarry Smith 733f9fae5adSBarry Smith for (i=0; i<m; i++) { 734f9fae5adSBarry Smith jrow = ii[i]; 735f9fae5adSBarry Smith n = ii[i+1] - jrow; 736f9fae5adSBarry Smith sum1 = 0.0; 737f9fae5adSBarry Smith sum2 = 0.0; 738f9fae5adSBarry Smith sum3 = 0.0; 739f9fae5adSBarry Smith sum4 = 0.0; 740f9fae5adSBarry Smith sum5 = 0.0; 741f9fae5adSBarry Smith for (j=0; j<n; j++) { 742f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 743f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 744f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 745f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 746f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 747f9fae5adSBarry Smith jrow++; 748f9fae5adSBarry Smith } 749f9fae5adSBarry Smith y[5*i] += sum1; 750f9fae5adSBarry Smith y[5*i+1] += sum2; 751f9fae5adSBarry Smith y[5*i+2] += sum3; 752f9fae5adSBarry Smith y[5*i+3] += sum4; 753f9fae5adSBarry Smith y[5*i+4] += sum5; 754f9fae5adSBarry Smith } 755f9fae5adSBarry Smith 756efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7571ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7581ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 759f9fae5adSBarry Smith PetscFunctionReturn(0); 760f9fae5adSBarry Smith } 761f9fae5adSBarry Smith 7624a2ae208SSatish Balay #undef __FUNCT__ 763b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 764dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 765f9fae5adSBarry Smith { 7664cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 767f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 76887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 769dfbe8321SBarry Smith PetscErrorCode ierr; 770899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 771f9fae5adSBarry Smith 772f9fae5adSBarry Smith PetscFunctionBegin; 773f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7741ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7751ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 776bfec09a0SHong Zhang 777f9fae5adSBarry Smith for (i=0; i<m; i++) { 778bfec09a0SHong Zhang idx = a->j + a->i[i] ; 779bfec09a0SHong Zhang v = a->a + a->i[i] ; 780f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 781f9fae5adSBarry Smith alpha1 = x[5*i]; 782f9fae5adSBarry Smith alpha2 = x[5*i+1]; 783f9fae5adSBarry Smith alpha3 = x[5*i+2]; 784f9fae5adSBarry Smith alpha4 = x[5*i+3]; 785f9fae5adSBarry Smith alpha5 = x[5*i+4]; 786f9fae5adSBarry Smith while (n-->0) { 787f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 788f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 789f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 790f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 791f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 792f9fae5adSBarry Smith idx++; v++; 793f9fae5adSBarry Smith } 794f9fae5adSBarry Smith } 795efee365bSSatish Balay ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 7961ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7971ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 798f9fae5adSBarry Smith PetscFunctionReturn(0); 799bcc973b7SBarry Smith } 800bcc973b7SBarry Smith 8016cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 8024a2ae208SSatish Balay #undef __FUNCT__ 803b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 804dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8056cd98798SBarry Smith { 8066cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8076cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 80887828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 809dfbe8321SBarry Smith PetscErrorCode ierr; 810899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 811b24ad042SBarry Smith PetscInt n,i,jrow,j; 8126cd98798SBarry Smith 8136cd98798SBarry Smith PetscFunctionBegin; 8141ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8151ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8166cd98798SBarry Smith idx = a->j; 8176cd98798SBarry Smith v = a->a; 8186cd98798SBarry Smith ii = a->i; 8196cd98798SBarry Smith 8206cd98798SBarry Smith for (i=0; i<m; i++) { 8216cd98798SBarry Smith jrow = ii[i]; 8226cd98798SBarry Smith n = ii[i+1] - jrow; 8236cd98798SBarry Smith sum1 = 0.0; 8246cd98798SBarry Smith sum2 = 0.0; 8256cd98798SBarry Smith sum3 = 0.0; 8266cd98798SBarry Smith sum4 = 0.0; 8276cd98798SBarry Smith sum5 = 0.0; 8286cd98798SBarry Smith sum6 = 0.0; 8296cd98798SBarry Smith for (j=0; j<n; j++) { 8306cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8316cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8326cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8336cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8346cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8356cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8366cd98798SBarry Smith jrow++; 8376cd98798SBarry Smith } 8386cd98798SBarry Smith y[6*i] = sum1; 8396cd98798SBarry Smith y[6*i+1] = sum2; 8406cd98798SBarry Smith y[6*i+2] = sum3; 8416cd98798SBarry Smith y[6*i+3] = sum4; 8426cd98798SBarry Smith y[6*i+4] = sum5; 8436cd98798SBarry Smith y[6*i+5] = sum6; 8446cd98798SBarry Smith } 8456cd98798SBarry Smith 846efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 8471ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8481ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8496cd98798SBarry Smith PetscFunctionReturn(0); 8506cd98798SBarry Smith } 8516cd98798SBarry Smith 8524a2ae208SSatish Balay #undef __FUNCT__ 853b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 854dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8556cd98798SBarry Smith { 8566cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8576cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 85887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 859dfbe8321SBarry Smith PetscErrorCode ierr; 860899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 8616cd98798SBarry Smith 8626cd98798SBarry Smith PetscFunctionBegin; 8632dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 8641ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8651ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 866bfec09a0SHong Zhang 8676cd98798SBarry Smith for (i=0; i<m; i++) { 868bfec09a0SHong Zhang idx = a->j + a->i[i] ; 869bfec09a0SHong Zhang v = a->a + a->i[i] ; 8706cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8716cd98798SBarry Smith alpha1 = x[6*i]; 8726cd98798SBarry Smith alpha2 = x[6*i+1]; 8736cd98798SBarry Smith alpha3 = x[6*i+2]; 8746cd98798SBarry Smith alpha4 = x[6*i+3]; 8756cd98798SBarry Smith alpha5 = x[6*i+4]; 8766cd98798SBarry Smith alpha6 = x[6*i+5]; 8776cd98798SBarry Smith while (n-->0) { 8786cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8796cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8806cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8816cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8826cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8836cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8846cd98798SBarry Smith idx++; v++; 8856cd98798SBarry Smith } 8866cd98798SBarry Smith } 887899cda47SBarry Smith ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr); 8881ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8891ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8906cd98798SBarry Smith PetscFunctionReturn(0); 8916cd98798SBarry Smith } 8926cd98798SBarry Smith 8934a2ae208SSatish Balay #undef __FUNCT__ 894b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 895dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 8966cd98798SBarry Smith { 8976cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8986cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 89987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 900dfbe8321SBarry Smith PetscErrorCode ierr; 901899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 902b24ad042SBarry Smith PetscInt n,i,jrow,j; 9036cd98798SBarry Smith 9046cd98798SBarry Smith PetscFunctionBegin; 9056cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9061ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9071ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9086cd98798SBarry Smith idx = a->j; 9096cd98798SBarry Smith v = a->a; 9106cd98798SBarry Smith ii = a->i; 9116cd98798SBarry Smith 9126cd98798SBarry Smith for (i=0; i<m; i++) { 9136cd98798SBarry Smith jrow = ii[i]; 9146cd98798SBarry Smith n = ii[i+1] - jrow; 9156cd98798SBarry Smith sum1 = 0.0; 9166cd98798SBarry Smith sum2 = 0.0; 9176cd98798SBarry Smith sum3 = 0.0; 9186cd98798SBarry Smith sum4 = 0.0; 9196cd98798SBarry Smith sum5 = 0.0; 9206cd98798SBarry Smith sum6 = 0.0; 9216cd98798SBarry Smith for (j=0; j<n; j++) { 9226cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9236cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9246cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9256cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9266cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9276cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9286cd98798SBarry Smith jrow++; 9296cd98798SBarry Smith } 9306cd98798SBarry Smith y[6*i] += sum1; 9316cd98798SBarry Smith y[6*i+1] += sum2; 9326cd98798SBarry Smith y[6*i+2] += sum3; 9336cd98798SBarry Smith y[6*i+3] += sum4; 9346cd98798SBarry Smith y[6*i+4] += sum5; 9356cd98798SBarry Smith y[6*i+5] += sum6; 9366cd98798SBarry Smith } 9376cd98798SBarry Smith 938efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9391ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9401ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9416cd98798SBarry Smith PetscFunctionReturn(0); 9426cd98798SBarry Smith } 9436cd98798SBarry Smith 9444a2ae208SSatish Balay #undef __FUNCT__ 945b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 946dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9476cd98798SBarry Smith { 9486cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9496cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 95087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 951dfbe8321SBarry Smith PetscErrorCode ierr; 952899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 9536cd98798SBarry Smith 9546cd98798SBarry Smith PetscFunctionBegin; 9556cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9561ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9571ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 958bfec09a0SHong Zhang 9596cd98798SBarry Smith for (i=0; i<m; i++) { 960bfec09a0SHong Zhang idx = a->j + a->i[i] ; 961bfec09a0SHong Zhang v = a->a + a->i[i] ; 9626cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9636cd98798SBarry Smith alpha1 = x[6*i]; 9646cd98798SBarry Smith alpha2 = x[6*i+1]; 9656cd98798SBarry Smith alpha3 = x[6*i+2]; 9666cd98798SBarry Smith alpha4 = x[6*i+3]; 9676cd98798SBarry Smith alpha5 = x[6*i+4]; 9686cd98798SBarry Smith alpha6 = x[6*i+5]; 9696cd98798SBarry Smith while (n-->0) { 9706cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9716cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9726cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9736cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9746cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9756cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9766cd98798SBarry Smith idx++; v++; 9776cd98798SBarry Smith } 9786cd98798SBarry Smith } 979efee365bSSatish Balay ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 9801ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9811ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9826cd98798SBarry Smith PetscFunctionReturn(0); 9836cd98798SBarry Smith } 9846cd98798SBarry Smith 98566ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 98666ed3db0SBarry Smith #undef __FUNCT__ 987ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 988ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 989ed8eea03SBarry Smith { 990ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 991ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 992ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 993ed8eea03SBarry Smith PetscErrorCode ierr; 994899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 995ed8eea03SBarry Smith PetscInt n,i,jrow,j; 996ed8eea03SBarry Smith 997ed8eea03SBarry Smith PetscFunctionBegin; 998ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 999ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1000ed8eea03SBarry Smith idx = a->j; 1001ed8eea03SBarry Smith v = a->a; 1002ed8eea03SBarry Smith ii = a->i; 1003ed8eea03SBarry Smith 1004ed8eea03SBarry Smith for (i=0; i<m; i++) { 1005ed8eea03SBarry Smith jrow = ii[i]; 1006ed8eea03SBarry Smith n = ii[i+1] - jrow; 1007ed8eea03SBarry Smith sum1 = 0.0; 1008ed8eea03SBarry Smith sum2 = 0.0; 1009ed8eea03SBarry Smith sum3 = 0.0; 1010ed8eea03SBarry Smith sum4 = 0.0; 1011ed8eea03SBarry Smith sum5 = 0.0; 1012ed8eea03SBarry Smith sum6 = 0.0; 1013ed8eea03SBarry Smith sum7 = 0.0; 1014ed8eea03SBarry Smith for (j=0; j<n; j++) { 1015ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1016ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1017ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1018ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1019ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1020ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1021ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1022ed8eea03SBarry Smith jrow++; 1023ed8eea03SBarry Smith } 1024ed8eea03SBarry Smith y[7*i] = sum1; 1025ed8eea03SBarry Smith y[7*i+1] = sum2; 1026ed8eea03SBarry Smith y[7*i+2] = sum3; 1027ed8eea03SBarry Smith y[7*i+3] = sum4; 1028ed8eea03SBarry Smith y[7*i+4] = sum5; 1029ed8eea03SBarry Smith y[7*i+5] = sum6; 1030ed8eea03SBarry Smith y[7*i+6] = sum7; 1031ed8eea03SBarry Smith } 1032ed8eea03SBarry Smith 1033efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1034ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1035ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1036ed8eea03SBarry Smith PetscFunctionReturn(0); 1037ed8eea03SBarry Smith } 1038ed8eea03SBarry Smith 1039ed8eea03SBarry Smith #undef __FUNCT__ 1040ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1041ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1042ed8eea03SBarry Smith { 1043ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1044ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1045ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1046ed8eea03SBarry Smith PetscErrorCode ierr; 1047899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1048ed8eea03SBarry Smith 1049ed8eea03SBarry Smith PetscFunctionBegin; 10502dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 1051ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1052ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1053ed8eea03SBarry Smith 1054ed8eea03SBarry Smith for (i=0; i<m; i++) { 1055ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1056ed8eea03SBarry Smith v = a->a + a->i[i] ; 1057ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1058ed8eea03SBarry Smith alpha1 = x[7*i]; 1059ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1060ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1061ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1062ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1063ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1064ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1065ed8eea03SBarry Smith while (n-->0) { 1066ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1067ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1068ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1069ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1070ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1071ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1072ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1073ed8eea03SBarry Smith idx++; v++; 1074ed8eea03SBarry Smith } 1075ed8eea03SBarry Smith } 1076899cda47SBarry Smith ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr); 1077ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1078ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1079ed8eea03SBarry Smith PetscFunctionReturn(0); 1080ed8eea03SBarry Smith } 1081ed8eea03SBarry Smith 1082ed8eea03SBarry Smith #undef __FUNCT__ 1083ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1084ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1085ed8eea03SBarry Smith { 1086ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1087ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1088ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1089ed8eea03SBarry Smith PetscErrorCode ierr; 1090899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1091ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1092ed8eea03SBarry Smith 1093ed8eea03SBarry Smith PetscFunctionBegin; 1094ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1095ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1096ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1097ed8eea03SBarry Smith idx = a->j; 1098ed8eea03SBarry Smith v = a->a; 1099ed8eea03SBarry Smith ii = a->i; 1100ed8eea03SBarry Smith 1101ed8eea03SBarry Smith for (i=0; i<m; i++) { 1102ed8eea03SBarry Smith jrow = ii[i]; 1103ed8eea03SBarry Smith n = ii[i+1] - jrow; 1104ed8eea03SBarry Smith sum1 = 0.0; 1105ed8eea03SBarry Smith sum2 = 0.0; 1106ed8eea03SBarry Smith sum3 = 0.0; 1107ed8eea03SBarry Smith sum4 = 0.0; 1108ed8eea03SBarry Smith sum5 = 0.0; 1109ed8eea03SBarry Smith sum6 = 0.0; 1110ed8eea03SBarry Smith sum7 = 0.0; 1111ed8eea03SBarry Smith for (j=0; j<n; j++) { 1112ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1113ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1114ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1115ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1116ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1117ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1118ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1119ed8eea03SBarry Smith jrow++; 1120ed8eea03SBarry Smith } 1121ed8eea03SBarry Smith y[7*i] += sum1; 1122ed8eea03SBarry Smith y[7*i+1] += sum2; 1123ed8eea03SBarry Smith y[7*i+2] += sum3; 1124ed8eea03SBarry Smith y[7*i+3] += sum4; 1125ed8eea03SBarry Smith y[7*i+4] += sum5; 1126ed8eea03SBarry Smith y[7*i+5] += sum6; 1127ed8eea03SBarry Smith y[7*i+6] += sum7; 1128ed8eea03SBarry Smith } 1129ed8eea03SBarry Smith 1130efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1131ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1132ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1133ed8eea03SBarry Smith PetscFunctionReturn(0); 1134ed8eea03SBarry Smith } 1135ed8eea03SBarry Smith 1136ed8eea03SBarry Smith #undef __FUNCT__ 1137ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1138ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1139ed8eea03SBarry Smith { 1140ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1141ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1142ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1143ed8eea03SBarry Smith PetscErrorCode ierr; 1144899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1145ed8eea03SBarry Smith 1146ed8eea03SBarry Smith PetscFunctionBegin; 1147ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1148ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1149ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1150ed8eea03SBarry Smith for (i=0; i<m; i++) { 1151ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1152ed8eea03SBarry Smith v = a->a + a->i[i] ; 1153ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1154ed8eea03SBarry Smith alpha1 = x[7*i]; 1155ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1156ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1157ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1158ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1159ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1160ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1161ed8eea03SBarry Smith while (n-->0) { 1162ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1163ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1164ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1165ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1166ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1167ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1168ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1169ed8eea03SBarry Smith idx++; v++; 1170ed8eea03SBarry Smith } 1171ed8eea03SBarry Smith } 1172efee365bSSatish Balay ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1173ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1174ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1175ed8eea03SBarry Smith PetscFunctionReturn(0); 1176ed8eea03SBarry Smith } 1177ed8eea03SBarry Smith 1178ed8eea03SBarry Smith #undef __FUNCT__ 117966ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1180dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 118166ed3db0SBarry Smith { 118266ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 118366ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 118487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1185dfbe8321SBarry Smith PetscErrorCode ierr; 1186899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1187b24ad042SBarry Smith PetscInt n,i,jrow,j; 118866ed3db0SBarry Smith 118966ed3db0SBarry Smith PetscFunctionBegin; 11901ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 11911ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 119266ed3db0SBarry Smith idx = a->j; 119366ed3db0SBarry Smith v = a->a; 119466ed3db0SBarry Smith ii = a->i; 119566ed3db0SBarry Smith 119666ed3db0SBarry Smith for (i=0; i<m; i++) { 119766ed3db0SBarry Smith jrow = ii[i]; 119866ed3db0SBarry Smith n = ii[i+1] - jrow; 119966ed3db0SBarry Smith sum1 = 0.0; 120066ed3db0SBarry Smith sum2 = 0.0; 120166ed3db0SBarry Smith sum3 = 0.0; 120266ed3db0SBarry Smith sum4 = 0.0; 120366ed3db0SBarry Smith sum5 = 0.0; 120466ed3db0SBarry Smith sum6 = 0.0; 120566ed3db0SBarry Smith sum7 = 0.0; 120666ed3db0SBarry Smith sum8 = 0.0; 120766ed3db0SBarry Smith for (j=0; j<n; j++) { 120866ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 120966ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 121066ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 121166ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 121266ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 121366ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 121466ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 121566ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 121666ed3db0SBarry Smith jrow++; 121766ed3db0SBarry Smith } 121866ed3db0SBarry Smith y[8*i] = sum1; 121966ed3db0SBarry Smith y[8*i+1] = sum2; 122066ed3db0SBarry Smith y[8*i+2] = sum3; 122166ed3db0SBarry Smith y[8*i+3] = sum4; 122266ed3db0SBarry Smith y[8*i+4] = sum5; 122366ed3db0SBarry Smith y[8*i+5] = sum6; 122466ed3db0SBarry Smith y[8*i+6] = sum7; 122566ed3db0SBarry Smith y[8*i+7] = sum8; 122666ed3db0SBarry Smith } 122766ed3db0SBarry Smith 1228efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 12291ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12301ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 123166ed3db0SBarry Smith PetscFunctionReturn(0); 123266ed3db0SBarry Smith } 123366ed3db0SBarry Smith 123466ed3db0SBarry Smith #undef __FUNCT__ 123566ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1236dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 123766ed3db0SBarry Smith { 123866ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 123966ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 124087828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1241dfbe8321SBarry Smith PetscErrorCode ierr; 1242899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 124366ed3db0SBarry Smith 124466ed3db0SBarry Smith PetscFunctionBegin; 12452dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 12461ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12471ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1248bfec09a0SHong Zhang 124966ed3db0SBarry Smith for (i=0; i<m; i++) { 1250bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1251bfec09a0SHong Zhang v = a->a + a->i[i] ; 125266ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 125366ed3db0SBarry Smith alpha1 = x[8*i]; 125466ed3db0SBarry Smith alpha2 = x[8*i+1]; 125566ed3db0SBarry Smith alpha3 = x[8*i+2]; 125666ed3db0SBarry Smith alpha4 = x[8*i+3]; 125766ed3db0SBarry Smith alpha5 = x[8*i+4]; 125866ed3db0SBarry Smith alpha6 = x[8*i+5]; 125966ed3db0SBarry Smith alpha7 = x[8*i+6]; 126066ed3db0SBarry Smith alpha8 = x[8*i+7]; 126166ed3db0SBarry Smith while (n-->0) { 126266ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 126366ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 126466ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 126566ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 126666ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 126766ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 126866ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 126966ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 127066ed3db0SBarry Smith idx++; v++; 127166ed3db0SBarry Smith } 127266ed3db0SBarry Smith } 1273899cda47SBarry Smith ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr); 12741ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12751ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 127666ed3db0SBarry Smith PetscFunctionReturn(0); 127766ed3db0SBarry Smith } 127866ed3db0SBarry Smith 127966ed3db0SBarry Smith #undef __FUNCT__ 128066ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1281dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 128266ed3db0SBarry Smith { 128366ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 128466ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 128587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1286dfbe8321SBarry Smith PetscErrorCode ierr; 1287899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1288b24ad042SBarry Smith PetscInt n,i,jrow,j; 128966ed3db0SBarry Smith 129066ed3db0SBarry Smith PetscFunctionBegin; 129166ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12921ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12931ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 129466ed3db0SBarry Smith idx = a->j; 129566ed3db0SBarry Smith v = a->a; 129666ed3db0SBarry Smith ii = a->i; 129766ed3db0SBarry Smith 129866ed3db0SBarry Smith for (i=0; i<m; i++) { 129966ed3db0SBarry Smith jrow = ii[i]; 130066ed3db0SBarry Smith n = ii[i+1] - jrow; 130166ed3db0SBarry Smith sum1 = 0.0; 130266ed3db0SBarry Smith sum2 = 0.0; 130366ed3db0SBarry Smith sum3 = 0.0; 130466ed3db0SBarry Smith sum4 = 0.0; 130566ed3db0SBarry Smith sum5 = 0.0; 130666ed3db0SBarry Smith sum6 = 0.0; 130766ed3db0SBarry Smith sum7 = 0.0; 130866ed3db0SBarry Smith sum8 = 0.0; 130966ed3db0SBarry Smith for (j=0; j<n; j++) { 131066ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 131166ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 131266ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 131366ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 131466ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 131566ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 131666ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 131766ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 131866ed3db0SBarry Smith jrow++; 131966ed3db0SBarry Smith } 132066ed3db0SBarry Smith y[8*i] += sum1; 132166ed3db0SBarry Smith y[8*i+1] += sum2; 132266ed3db0SBarry Smith y[8*i+2] += sum3; 132366ed3db0SBarry Smith y[8*i+3] += sum4; 132466ed3db0SBarry Smith y[8*i+4] += sum5; 132566ed3db0SBarry Smith y[8*i+5] += sum6; 132666ed3db0SBarry Smith y[8*i+6] += sum7; 132766ed3db0SBarry Smith y[8*i+7] += sum8; 132866ed3db0SBarry Smith } 132966ed3db0SBarry Smith 1330efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13311ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13321ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 133366ed3db0SBarry Smith PetscFunctionReturn(0); 133466ed3db0SBarry Smith } 133566ed3db0SBarry Smith 133666ed3db0SBarry Smith #undef __FUNCT__ 133766ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1338dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 133966ed3db0SBarry Smith { 134066ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 134166ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 134287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1343dfbe8321SBarry Smith PetscErrorCode ierr; 1344899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 134566ed3db0SBarry Smith 134666ed3db0SBarry Smith PetscFunctionBegin; 134766ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13481ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13491ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 135066ed3db0SBarry Smith for (i=0; i<m; i++) { 1351bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1352bfec09a0SHong Zhang v = a->a + a->i[i] ; 135366ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 135466ed3db0SBarry Smith alpha1 = x[8*i]; 135566ed3db0SBarry Smith alpha2 = x[8*i+1]; 135666ed3db0SBarry Smith alpha3 = x[8*i+2]; 135766ed3db0SBarry Smith alpha4 = x[8*i+3]; 135866ed3db0SBarry Smith alpha5 = x[8*i+4]; 135966ed3db0SBarry Smith alpha6 = x[8*i+5]; 136066ed3db0SBarry Smith alpha7 = x[8*i+6]; 136166ed3db0SBarry Smith alpha8 = x[8*i+7]; 136266ed3db0SBarry Smith while (n-->0) { 136366ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 136466ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 136566ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 136666ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 136766ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 136866ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 136966ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 137066ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 137166ed3db0SBarry Smith idx++; v++; 137266ed3db0SBarry Smith } 137366ed3db0SBarry Smith } 1374efee365bSSatish Balay ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 13751ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13761ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 13772f7816d4SBarry Smith PetscFunctionReturn(0); 13782f7816d4SBarry Smith } 13792f7816d4SBarry Smith 13802b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 13812b692628SMatthew Knepley #undef __FUNCT__ 13822b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1383dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 13842b692628SMatthew Knepley { 13852b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13862b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13872b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1388dfbe8321SBarry Smith PetscErrorCode ierr; 1389899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1390b24ad042SBarry Smith PetscInt n,i,jrow,j; 13912b692628SMatthew Knepley 13922b692628SMatthew Knepley PetscFunctionBegin; 13931ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13941ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 13952b692628SMatthew Knepley idx = a->j; 13962b692628SMatthew Knepley v = a->a; 13972b692628SMatthew Knepley ii = a->i; 13982b692628SMatthew Knepley 13992b692628SMatthew Knepley for (i=0; i<m; i++) { 14002b692628SMatthew Knepley jrow = ii[i]; 14012b692628SMatthew Knepley n = ii[i+1] - jrow; 14022b692628SMatthew Knepley sum1 = 0.0; 14032b692628SMatthew Knepley sum2 = 0.0; 14042b692628SMatthew Knepley sum3 = 0.0; 14052b692628SMatthew Knepley sum4 = 0.0; 14062b692628SMatthew Knepley sum5 = 0.0; 14072b692628SMatthew Knepley sum6 = 0.0; 14082b692628SMatthew Knepley sum7 = 0.0; 14092b692628SMatthew Knepley sum8 = 0.0; 14102b692628SMatthew Knepley sum9 = 0.0; 14112b692628SMatthew Knepley for (j=0; j<n; j++) { 14122b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14132b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14142b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14152b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14162b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14172b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14182b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14192b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14202b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14212b692628SMatthew Knepley jrow++; 14222b692628SMatthew Knepley } 14232b692628SMatthew Knepley y[9*i] = sum1; 14242b692628SMatthew Knepley y[9*i+1] = sum2; 14252b692628SMatthew Knepley y[9*i+2] = sum3; 14262b692628SMatthew Knepley y[9*i+3] = sum4; 14272b692628SMatthew Knepley y[9*i+4] = sum5; 14282b692628SMatthew Knepley y[9*i+5] = sum6; 14292b692628SMatthew Knepley y[9*i+6] = sum7; 14302b692628SMatthew Knepley y[9*i+7] = sum8; 14312b692628SMatthew Knepley y[9*i+8] = sum9; 14322b692628SMatthew Knepley } 14332b692628SMatthew Knepley 1434efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 14351ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14361ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14372b692628SMatthew Knepley PetscFunctionReturn(0); 14382b692628SMatthew Knepley } 14392b692628SMatthew Knepley 1440b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1441b9cda22cSBarry Smith 14422b692628SMatthew Knepley #undef __FUNCT__ 14432b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1444dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14452b692628SMatthew Knepley { 14462b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14472b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14482b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1449dfbe8321SBarry Smith PetscErrorCode ierr; 1450899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 14512b692628SMatthew Knepley 14522b692628SMatthew Knepley PetscFunctionBegin; 14532dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 14541ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14551ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14562b692628SMatthew Knepley 14572b692628SMatthew Knepley for (i=0; i<m; i++) { 14582b692628SMatthew Knepley idx = a->j + a->i[i] ; 14592b692628SMatthew Knepley v = a->a + a->i[i] ; 14602b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14612b692628SMatthew Knepley alpha1 = x[9*i]; 14622b692628SMatthew Knepley alpha2 = x[9*i+1]; 14632b692628SMatthew Knepley alpha3 = x[9*i+2]; 14642b692628SMatthew Knepley alpha4 = x[9*i+3]; 14652b692628SMatthew Knepley alpha5 = x[9*i+4]; 14662b692628SMatthew Knepley alpha6 = x[9*i+5]; 14672b692628SMatthew Knepley alpha7 = x[9*i+6]; 14682b692628SMatthew Knepley alpha8 = x[9*i+7]; 14692f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14702b692628SMatthew Knepley while (n-->0) { 14712b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 14722b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 14732b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 14742b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 14752b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 14762b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 14772b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 14782b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 14792b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 14802b692628SMatthew Knepley idx++; v++; 14812b692628SMatthew Knepley } 14822b692628SMatthew Knepley } 1483899cda47SBarry Smith ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr); 14841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14851ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14862b692628SMatthew Knepley PetscFunctionReturn(0); 14872b692628SMatthew Knepley } 14882b692628SMatthew Knepley 14892b692628SMatthew Knepley #undef __FUNCT__ 14902b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1491dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 14922b692628SMatthew Knepley { 14932b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14942b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14952b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1496dfbe8321SBarry Smith PetscErrorCode ierr; 1497899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1498b24ad042SBarry Smith PetscInt n,i,jrow,j; 14992b692628SMatthew Knepley 15002b692628SMatthew Knepley PetscFunctionBegin; 15012b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15021ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15031ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15042b692628SMatthew Knepley idx = a->j; 15052b692628SMatthew Knepley v = a->a; 15062b692628SMatthew Knepley ii = a->i; 15072b692628SMatthew Knepley 15082b692628SMatthew Knepley for (i=0; i<m; i++) { 15092b692628SMatthew Knepley jrow = ii[i]; 15102b692628SMatthew Knepley n = ii[i+1] - jrow; 15112b692628SMatthew Knepley sum1 = 0.0; 15122b692628SMatthew Knepley sum2 = 0.0; 15132b692628SMatthew Knepley sum3 = 0.0; 15142b692628SMatthew Knepley sum4 = 0.0; 15152b692628SMatthew Knepley sum5 = 0.0; 15162b692628SMatthew Knepley sum6 = 0.0; 15172b692628SMatthew Knepley sum7 = 0.0; 15182b692628SMatthew Knepley sum8 = 0.0; 15192b692628SMatthew Knepley sum9 = 0.0; 15202b692628SMatthew Knepley for (j=0; j<n; j++) { 15212b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15222b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15232b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15242b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15252b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15262b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15272b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15282b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15292b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15302b692628SMatthew Knepley jrow++; 15312b692628SMatthew Knepley } 15322b692628SMatthew Knepley y[9*i] += sum1; 15332b692628SMatthew Knepley y[9*i+1] += sum2; 15342b692628SMatthew Knepley y[9*i+2] += sum3; 15352b692628SMatthew Knepley y[9*i+3] += sum4; 15362b692628SMatthew Knepley y[9*i+4] += sum5; 15372b692628SMatthew Knepley y[9*i+5] += sum6; 15382b692628SMatthew Knepley y[9*i+6] += sum7; 15392b692628SMatthew Knepley y[9*i+7] += sum8; 15402b692628SMatthew Knepley y[9*i+8] += sum9; 15412b692628SMatthew Knepley } 15422b692628SMatthew Knepley 1543efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15441ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15451ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15462b692628SMatthew Knepley PetscFunctionReturn(0); 15472b692628SMatthew Knepley } 15482b692628SMatthew Knepley 15492b692628SMatthew Knepley #undef __FUNCT__ 15502b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1551dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15522b692628SMatthew Knepley { 15532b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15542b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15552b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1556dfbe8321SBarry Smith PetscErrorCode ierr; 1557899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 15582b692628SMatthew Knepley 15592b692628SMatthew Knepley PetscFunctionBegin; 15602b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15611ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15621ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15632b692628SMatthew Knepley for (i=0; i<m; i++) { 15642b692628SMatthew Knepley idx = a->j + a->i[i] ; 15652b692628SMatthew Knepley v = a->a + a->i[i] ; 15662b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15672b692628SMatthew Knepley alpha1 = x[9*i]; 15682b692628SMatthew Knepley alpha2 = x[9*i+1]; 15692b692628SMatthew Knepley alpha3 = x[9*i+2]; 15702b692628SMatthew Knepley alpha4 = x[9*i+3]; 15712b692628SMatthew Knepley alpha5 = x[9*i+4]; 15722b692628SMatthew Knepley alpha6 = x[9*i+5]; 15732b692628SMatthew Knepley alpha7 = x[9*i+6]; 15742b692628SMatthew Knepley alpha8 = x[9*i+7]; 15752b692628SMatthew Knepley alpha9 = x[9*i+8]; 15762b692628SMatthew Knepley while (n-->0) { 15772b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15782b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15792b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15802b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15812b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15822b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15832b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15842b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15852b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15862b692628SMatthew Knepley idx++; v++; 15872b692628SMatthew Knepley } 15882b692628SMatthew Knepley } 1589efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15901ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15911ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15922b692628SMatthew Knepley PetscFunctionReturn(0); 15932b692628SMatthew Knepley } 1594b9cda22cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 1595b9cda22cSBarry Smith #undef __FUNCT__ 1596b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1597b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1598b9cda22cSBarry Smith { 1599b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1600b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1601b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1602b9cda22cSBarry Smith PetscErrorCode ierr; 1603899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1604b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1605b9cda22cSBarry Smith 1606b9cda22cSBarry Smith PetscFunctionBegin; 1607b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1608b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1609b9cda22cSBarry Smith idx = a->j; 1610b9cda22cSBarry Smith v = a->a; 1611b9cda22cSBarry Smith ii = a->i; 1612b9cda22cSBarry Smith 1613b9cda22cSBarry Smith for (i=0; i<m; i++) { 1614b9cda22cSBarry Smith jrow = ii[i]; 1615b9cda22cSBarry Smith n = ii[i+1] - jrow; 1616b9cda22cSBarry Smith sum1 = 0.0; 1617b9cda22cSBarry Smith sum2 = 0.0; 1618b9cda22cSBarry Smith sum3 = 0.0; 1619b9cda22cSBarry Smith sum4 = 0.0; 1620b9cda22cSBarry Smith sum5 = 0.0; 1621b9cda22cSBarry Smith sum6 = 0.0; 1622b9cda22cSBarry Smith sum7 = 0.0; 1623b9cda22cSBarry Smith sum8 = 0.0; 1624b9cda22cSBarry Smith sum9 = 0.0; 1625b9cda22cSBarry Smith sum10 = 0.0; 1626b9cda22cSBarry Smith for (j=0; j<n; j++) { 1627b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1628b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1629b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1630b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1631b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1632b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1633b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1634b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1635b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1636b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1637b9cda22cSBarry Smith jrow++; 1638b9cda22cSBarry Smith } 1639b9cda22cSBarry Smith y[10*i] = sum1; 1640b9cda22cSBarry Smith y[10*i+1] = sum2; 1641b9cda22cSBarry Smith y[10*i+2] = sum3; 1642b9cda22cSBarry Smith y[10*i+3] = sum4; 1643b9cda22cSBarry Smith y[10*i+4] = sum5; 1644b9cda22cSBarry Smith y[10*i+5] = sum6; 1645b9cda22cSBarry Smith y[10*i+6] = sum7; 1646b9cda22cSBarry Smith y[10*i+7] = sum8; 1647b9cda22cSBarry Smith y[10*i+8] = sum9; 1648b9cda22cSBarry Smith y[10*i+9] = sum10; 1649b9cda22cSBarry Smith } 1650b9cda22cSBarry Smith 1651b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1652b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1653b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1654b9cda22cSBarry Smith PetscFunctionReturn(0); 1655b9cda22cSBarry Smith } 1656b9cda22cSBarry Smith 1657b9cda22cSBarry Smith #undef __FUNCT__ 1658b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1659b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1660b9cda22cSBarry Smith { 1661b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1662b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1663b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1664b9cda22cSBarry Smith PetscErrorCode ierr; 1665899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1666b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1667b9cda22cSBarry Smith 1668b9cda22cSBarry Smith PetscFunctionBegin; 1669b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1670b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1671b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1672b9cda22cSBarry Smith idx = a->j; 1673b9cda22cSBarry Smith v = a->a; 1674b9cda22cSBarry Smith ii = a->i; 1675b9cda22cSBarry Smith 1676b9cda22cSBarry Smith for (i=0; i<m; i++) { 1677b9cda22cSBarry Smith jrow = ii[i]; 1678b9cda22cSBarry Smith n = ii[i+1] - jrow; 1679b9cda22cSBarry Smith sum1 = 0.0; 1680b9cda22cSBarry Smith sum2 = 0.0; 1681b9cda22cSBarry Smith sum3 = 0.0; 1682b9cda22cSBarry Smith sum4 = 0.0; 1683b9cda22cSBarry Smith sum5 = 0.0; 1684b9cda22cSBarry Smith sum6 = 0.0; 1685b9cda22cSBarry Smith sum7 = 0.0; 1686b9cda22cSBarry Smith sum8 = 0.0; 1687b9cda22cSBarry Smith sum9 = 0.0; 1688b9cda22cSBarry Smith sum10 = 0.0; 1689b9cda22cSBarry Smith for (j=0; j<n; j++) { 1690b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1691b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1692b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1693b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1694b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1695b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1696b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1697b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1698b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1699b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1700b9cda22cSBarry Smith jrow++; 1701b9cda22cSBarry Smith } 1702b9cda22cSBarry Smith y[10*i] += sum1; 1703b9cda22cSBarry Smith y[10*i+1] += sum2; 1704b9cda22cSBarry Smith y[10*i+2] += sum3; 1705b9cda22cSBarry Smith y[10*i+3] += sum4; 1706b9cda22cSBarry Smith y[10*i+4] += sum5; 1707b9cda22cSBarry Smith y[10*i+5] += sum6; 1708b9cda22cSBarry Smith y[10*i+6] += sum7; 1709b9cda22cSBarry Smith y[10*i+7] += sum8; 1710b9cda22cSBarry Smith y[10*i+8] += sum9; 1711b9cda22cSBarry Smith y[10*i+9] += sum10; 1712b9cda22cSBarry Smith } 1713b9cda22cSBarry Smith 1714b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1715b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1716b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1717b9cda22cSBarry Smith PetscFunctionReturn(0); 1718b9cda22cSBarry Smith } 1719b9cda22cSBarry Smith 1720b9cda22cSBarry Smith #undef __FUNCT__ 1721b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1722b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1723b9cda22cSBarry Smith { 1724b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1725b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1726b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1727b9cda22cSBarry Smith PetscErrorCode ierr; 1728899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1729b9cda22cSBarry Smith 1730b9cda22cSBarry Smith PetscFunctionBegin; 1731b9cda22cSBarry Smith ierr = VecSet(yy,zero);CHKERRQ(ierr); 1732b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1733b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1734b9cda22cSBarry Smith 1735b9cda22cSBarry Smith for (i=0; i<m; i++) { 1736b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1737b9cda22cSBarry Smith v = a->a + a->i[i] ; 1738b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1739e29fdc22SBarry Smith alpha1 = x[10*i]; 1740e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1741e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1742e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1743e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1744e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1745e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1746e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1747e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1748b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1749b9cda22cSBarry Smith while (n-->0) { 1750e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1751e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1752e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1753e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1754e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1755e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1756e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1757e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1758e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1759b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1760b9cda22cSBarry Smith idx++; v++; 1761b9cda22cSBarry Smith } 1762b9cda22cSBarry Smith } 1763899cda47SBarry Smith ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr); 1764b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1765b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1766b9cda22cSBarry Smith PetscFunctionReturn(0); 1767b9cda22cSBarry Smith } 1768b9cda22cSBarry Smith 1769b9cda22cSBarry Smith #undef __FUNCT__ 1770b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1771b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1772b9cda22cSBarry Smith { 1773b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1774b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1775b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1776b9cda22cSBarry Smith PetscErrorCode ierr; 1777899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1778b9cda22cSBarry Smith 1779b9cda22cSBarry Smith PetscFunctionBegin; 1780b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1781b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1782b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1783b9cda22cSBarry Smith for (i=0; i<m; i++) { 1784b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1785b9cda22cSBarry Smith v = a->a + a->i[i] ; 1786b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1787b9cda22cSBarry Smith alpha1 = x[10*i]; 1788b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1789b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1790b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1791b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1792b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1793b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1794b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1795b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1796b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1797b9cda22cSBarry Smith while (n-->0) { 1798b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1799b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1800b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1801b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1802b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1803b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1804b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1805b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1806b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1807b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1808b9cda22cSBarry Smith idx++; v++; 1809b9cda22cSBarry Smith } 1810b9cda22cSBarry Smith } 1811b9cda22cSBarry Smith ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1812b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1813b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1814b9cda22cSBarry Smith PetscFunctionReturn(0); 1815b9cda22cSBarry Smith } 1816b9cda22cSBarry Smith 18172b692628SMatthew Knepley 18182f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 18192f7816d4SBarry Smith #undef __FUNCT__ 18202f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 1821dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 18222f7816d4SBarry Smith { 18232f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 18242f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18252f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 18262f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1827dfbe8321SBarry Smith PetscErrorCode ierr; 1828899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1829b24ad042SBarry Smith PetscInt n,i,jrow,j; 18302f7816d4SBarry Smith 18312f7816d4SBarry Smith PetscFunctionBegin; 18321ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 18331ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 18342f7816d4SBarry Smith idx = a->j; 18352f7816d4SBarry Smith v = a->a; 18362f7816d4SBarry Smith ii = a->i; 18372f7816d4SBarry Smith 18382f7816d4SBarry Smith for (i=0; i<m; i++) { 18392f7816d4SBarry Smith jrow = ii[i]; 18402f7816d4SBarry Smith n = ii[i+1] - jrow; 18412f7816d4SBarry Smith sum1 = 0.0; 18422f7816d4SBarry Smith sum2 = 0.0; 18432f7816d4SBarry Smith sum3 = 0.0; 18442f7816d4SBarry Smith sum4 = 0.0; 18452f7816d4SBarry Smith sum5 = 0.0; 18462f7816d4SBarry Smith sum6 = 0.0; 18472f7816d4SBarry Smith sum7 = 0.0; 18482f7816d4SBarry Smith sum8 = 0.0; 18492f7816d4SBarry Smith sum9 = 0.0; 18502f7816d4SBarry Smith sum10 = 0.0; 18512f7816d4SBarry Smith sum11 = 0.0; 18522f7816d4SBarry Smith sum12 = 0.0; 18532f7816d4SBarry Smith sum13 = 0.0; 18542f7816d4SBarry Smith sum14 = 0.0; 18552f7816d4SBarry Smith sum15 = 0.0; 18562f7816d4SBarry Smith sum16 = 0.0; 18572f7816d4SBarry Smith for (j=0; j<n; j++) { 18582f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 18592f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 18602f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 18612f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 18622f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 18632f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 18642f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 18652f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 18662f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 18672f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 18682f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 18692f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 18702f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 18712f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 18722f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 18732f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 18742f7816d4SBarry Smith jrow++; 18752f7816d4SBarry Smith } 18762f7816d4SBarry Smith y[16*i] = sum1; 18772f7816d4SBarry Smith y[16*i+1] = sum2; 18782f7816d4SBarry Smith y[16*i+2] = sum3; 18792f7816d4SBarry Smith y[16*i+3] = sum4; 18802f7816d4SBarry Smith y[16*i+4] = sum5; 18812f7816d4SBarry Smith y[16*i+5] = sum6; 18822f7816d4SBarry Smith y[16*i+6] = sum7; 18832f7816d4SBarry Smith y[16*i+7] = sum8; 18842f7816d4SBarry Smith y[16*i+8] = sum9; 18852f7816d4SBarry Smith y[16*i+9] = sum10; 18862f7816d4SBarry Smith y[16*i+10] = sum11; 18872f7816d4SBarry Smith y[16*i+11] = sum12; 18882f7816d4SBarry Smith y[16*i+12] = sum13; 18892f7816d4SBarry Smith y[16*i+13] = sum14; 18902f7816d4SBarry Smith y[16*i+14] = sum15; 18912f7816d4SBarry Smith y[16*i+15] = sum16; 18922f7816d4SBarry Smith } 18932f7816d4SBarry Smith 1894efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 18951ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 18961ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 18972f7816d4SBarry Smith PetscFunctionReturn(0); 18982f7816d4SBarry Smith } 18992f7816d4SBarry Smith 19002f7816d4SBarry Smith #undef __FUNCT__ 19012f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1902dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 19032f7816d4SBarry Smith { 19042f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 19052f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19062f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 19072f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1908dfbe8321SBarry Smith PetscErrorCode ierr; 1909899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 19102f7816d4SBarry Smith 19112f7816d4SBarry Smith PetscFunctionBegin; 19122dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 19131ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19141ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1915bfec09a0SHong Zhang 19162f7816d4SBarry Smith for (i=0; i<m; i++) { 1917bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1918bfec09a0SHong Zhang v = a->a + a->i[i] ; 19192f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 19202f7816d4SBarry Smith alpha1 = x[16*i]; 19212f7816d4SBarry Smith alpha2 = x[16*i+1]; 19222f7816d4SBarry Smith alpha3 = x[16*i+2]; 19232f7816d4SBarry Smith alpha4 = x[16*i+3]; 19242f7816d4SBarry Smith alpha5 = x[16*i+4]; 19252f7816d4SBarry Smith alpha6 = x[16*i+5]; 19262f7816d4SBarry Smith alpha7 = x[16*i+6]; 19272f7816d4SBarry Smith alpha8 = x[16*i+7]; 19282f7816d4SBarry Smith alpha9 = x[16*i+8]; 19292f7816d4SBarry Smith alpha10 = x[16*i+9]; 19302f7816d4SBarry Smith alpha11 = x[16*i+10]; 19312f7816d4SBarry Smith alpha12 = x[16*i+11]; 19322f7816d4SBarry Smith alpha13 = x[16*i+12]; 19332f7816d4SBarry Smith alpha14 = x[16*i+13]; 19342f7816d4SBarry Smith alpha15 = x[16*i+14]; 19352f7816d4SBarry Smith alpha16 = x[16*i+15]; 19362f7816d4SBarry Smith while (n-->0) { 19372f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 19382f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 19392f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 19402f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 19412f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 19422f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 19432f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 19442f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 19452f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 19462f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 19472f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 19482f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 19492f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 19502f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 19512f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 19522f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 19532f7816d4SBarry Smith idx++; v++; 19542f7816d4SBarry Smith } 19552f7816d4SBarry Smith } 1956899cda47SBarry Smith ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr); 19571ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 19581ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 19592f7816d4SBarry Smith PetscFunctionReturn(0); 19602f7816d4SBarry Smith } 19612f7816d4SBarry Smith 19622f7816d4SBarry Smith #undef __FUNCT__ 19632f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1964dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 19652f7816d4SBarry Smith { 19662f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 19672f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19682f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 19692f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1970dfbe8321SBarry Smith PetscErrorCode ierr; 1971899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1972b24ad042SBarry Smith PetscInt n,i,jrow,j; 19732f7816d4SBarry Smith 19742f7816d4SBarry Smith PetscFunctionBegin; 19752f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19761ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19771ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 19782f7816d4SBarry Smith idx = a->j; 19792f7816d4SBarry Smith v = a->a; 19802f7816d4SBarry Smith ii = a->i; 19812f7816d4SBarry Smith 19822f7816d4SBarry Smith for (i=0; i<m; i++) { 19832f7816d4SBarry Smith jrow = ii[i]; 19842f7816d4SBarry Smith n = ii[i+1] - jrow; 19852f7816d4SBarry Smith sum1 = 0.0; 19862f7816d4SBarry Smith sum2 = 0.0; 19872f7816d4SBarry Smith sum3 = 0.0; 19882f7816d4SBarry Smith sum4 = 0.0; 19892f7816d4SBarry Smith sum5 = 0.0; 19902f7816d4SBarry Smith sum6 = 0.0; 19912f7816d4SBarry Smith sum7 = 0.0; 19922f7816d4SBarry Smith sum8 = 0.0; 19932f7816d4SBarry Smith sum9 = 0.0; 19942f7816d4SBarry Smith sum10 = 0.0; 19952f7816d4SBarry Smith sum11 = 0.0; 19962f7816d4SBarry Smith sum12 = 0.0; 19972f7816d4SBarry Smith sum13 = 0.0; 19982f7816d4SBarry Smith sum14 = 0.0; 19992f7816d4SBarry Smith sum15 = 0.0; 20002f7816d4SBarry Smith sum16 = 0.0; 20012f7816d4SBarry Smith for (j=0; j<n; j++) { 20022f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 20032f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 20042f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 20052f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 20062f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 20072f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 20082f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 20092f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 20102f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 20112f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 20122f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 20132f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 20142f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 20152f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 20162f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 20172f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 20182f7816d4SBarry Smith jrow++; 20192f7816d4SBarry Smith } 20202f7816d4SBarry Smith y[16*i] += sum1; 20212f7816d4SBarry Smith y[16*i+1] += sum2; 20222f7816d4SBarry Smith y[16*i+2] += sum3; 20232f7816d4SBarry Smith y[16*i+3] += sum4; 20242f7816d4SBarry Smith y[16*i+4] += sum5; 20252f7816d4SBarry Smith y[16*i+5] += sum6; 20262f7816d4SBarry Smith y[16*i+6] += sum7; 20272f7816d4SBarry Smith y[16*i+7] += sum8; 20282f7816d4SBarry Smith y[16*i+8] += sum9; 20292f7816d4SBarry Smith y[16*i+9] += sum10; 20302f7816d4SBarry Smith y[16*i+10] += sum11; 20312f7816d4SBarry Smith y[16*i+11] += sum12; 20322f7816d4SBarry Smith y[16*i+12] += sum13; 20332f7816d4SBarry Smith y[16*i+13] += sum14; 20342f7816d4SBarry Smith y[16*i+14] += sum15; 20352f7816d4SBarry Smith y[16*i+15] += sum16; 20362f7816d4SBarry Smith } 20372f7816d4SBarry Smith 2038efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 20391ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 20401ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 20412f7816d4SBarry Smith PetscFunctionReturn(0); 20422f7816d4SBarry Smith } 20432f7816d4SBarry Smith 20442f7816d4SBarry Smith #undef __FUNCT__ 20452f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2046dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 20472f7816d4SBarry Smith { 20482f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20492f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 20502f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 20512f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2052dfbe8321SBarry Smith PetscErrorCode ierr; 2053899cda47SBarry Smith PetscInt m = b->AIJ->rmap.n,n,i,*idx; 20542f7816d4SBarry Smith 20552f7816d4SBarry Smith PetscFunctionBegin; 20562f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20571ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 20581ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 20592f7816d4SBarry Smith for (i=0; i<m; i++) { 2060bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2061bfec09a0SHong Zhang v = a->a + a->i[i] ; 20622f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 20632f7816d4SBarry Smith alpha1 = x[16*i]; 20642f7816d4SBarry Smith alpha2 = x[16*i+1]; 20652f7816d4SBarry Smith alpha3 = x[16*i+2]; 20662f7816d4SBarry Smith alpha4 = x[16*i+3]; 20672f7816d4SBarry Smith alpha5 = x[16*i+4]; 20682f7816d4SBarry Smith alpha6 = x[16*i+5]; 20692f7816d4SBarry Smith alpha7 = x[16*i+6]; 20702f7816d4SBarry Smith alpha8 = x[16*i+7]; 20712f7816d4SBarry Smith alpha9 = x[16*i+8]; 20722f7816d4SBarry Smith alpha10 = x[16*i+9]; 20732f7816d4SBarry Smith alpha11 = x[16*i+10]; 20742f7816d4SBarry Smith alpha12 = x[16*i+11]; 20752f7816d4SBarry Smith alpha13 = x[16*i+12]; 20762f7816d4SBarry Smith alpha14 = x[16*i+13]; 20772f7816d4SBarry Smith alpha15 = x[16*i+14]; 20782f7816d4SBarry Smith alpha16 = x[16*i+15]; 20792f7816d4SBarry Smith while (n-->0) { 20802f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 20812f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 20822f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 20832f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 20842f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 20852f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 20862f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 20872f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 20882f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 20892f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 20902f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 20912f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 20922f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 20932f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 20942f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 20952f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 20962f7816d4SBarry Smith idx++; v++; 20972f7816d4SBarry Smith } 20982f7816d4SBarry Smith } 2099efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 21001ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 21011ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 210266ed3db0SBarry Smith PetscFunctionReturn(0); 210366ed3db0SBarry Smith } 210466ed3db0SBarry Smith 2105f4a53059SBarry Smith /*===================================================================================*/ 21064a2ae208SSatish Balay #undef __FUNCT__ 21074a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2108dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2109f4a53059SBarry Smith { 21104cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2111dfbe8321SBarry Smith PetscErrorCode ierr; 2112f4a53059SBarry Smith 2113b24ad042SBarry Smith PetscFunctionBegin; 2114f4a53059SBarry Smith /* start the scatter */ 2115f4a53059SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 21164cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2117f4a53059SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 21184cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2119f4a53059SBarry Smith PetscFunctionReturn(0); 2120f4a53059SBarry Smith } 2121f4a53059SBarry Smith 21224a2ae208SSatish Balay #undef __FUNCT__ 21234a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2124dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 21254cb9d9b8SBarry Smith { 21264cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2127dfbe8321SBarry Smith PetscErrorCode ierr; 2128b24ad042SBarry Smith 21294cb9d9b8SBarry Smith PetscFunctionBegin; 21304cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 21314cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2132a5ff213dSBarry Smith ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21334cb9d9b8SBarry Smith ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21344cb9d9b8SBarry Smith PetscFunctionReturn(0); 21354cb9d9b8SBarry Smith } 21364cb9d9b8SBarry Smith 21374a2ae208SSatish Balay #undef __FUNCT__ 21384a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2139dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 21404cb9d9b8SBarry Smith { 21414cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2142dfbe8321SBarry Smith PetscErrorCode ierr; 21434cb9d9b8SBarry Smith 2144b24ad042SBarry Smith PetscFunctionBegin; 21454cb9d9b8SBarry Smith /* start the scatter */ 21464cb9d9b8SBarry Smith ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2147d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 21484cb9d9b8SBarry Smith ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2149717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 21504cb9d9b8SBarry Smith PetscFunctionReturn(0); 21514cb9d9b8SBarry Smith } 21524cb9d9b8SBarry Smith 21534a2ae208SSatish Balay #undef __FUNCT__ 21544a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2155dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 21564cb9d9b8SBarry Smith { 21574cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2158dfbe8321SBarry Smith PetscErrorCode ierr; 2159b24ad042SBarry Smith 21604cb9d9b8SBarry Smith PetscFunctionBegin; 21614cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2162d72c5c08SBarry Smith ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2163d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2164d72c5c08SBarry Smith ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 21654cb9d9b8SBarry Smith PetscFunctionReturn(0); 21664cb9d9b8SBarry Smith } 21674cb9d9b8SBarry Smith 2168*95ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 21699c4fc4c7SBarry Smith #undef __FUNCT__ 21707ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 21717ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 21727ba1a0bfSKris Buschelman { 21737ba1a0bfSKris Buschelman /* This routine requires testing -- but it's getting better. */ 21747ba1a0bfSKris Buschelman PetscErrorCode ierr; 2175a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 21767ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 21777ba1a0bfSKris Buschelman Mat P=pp->AIJ; 21787ba1a0bfSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 21797ba1a0bfSKris Buschelman PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 21807ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2181899cda47SBarry Smith PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn; 21827ba1a0bfSKris Buschelman PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 21837ba1a0bfSKris Buschelman MatScalar *ca; 21847ba1a0bfSKris Buschelman 21857ba1a0bfSKris Buschelman PetscFunctionBegin; 21867ba1a0bfSKris Buschelman /* Start timer */ 21877ba1a0bfSKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 21887ba1a0bfSKris Buschelman 21897ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 21907ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 21917ba1a0bfSKris Buschelman 21927ba1a0bfSKris Buschelman cn = pn*ppdof; 21937ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 21947ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 21957ba1a0bfSKris Buschelman ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 21967ba1a0bfSKris Buschelman ci[0] = 0; 21977ba1a0bfSKris Buschelman 21987ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 21997ba1a0bfSKris Buschelman ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 22007ba1a0bfSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 22017ba1a0bfSKris Buschelman ptasparserow = ptadenserow + an; 22027ba1a0bfSKris Buschelman denserow = ptasparserow + an; 22037ba1a0bfSKris Buschelman sparserow = denserow + cn; 22047ba1a0bfSKris Buschelman 22057ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 22067ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 22077ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2208a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 22097ba1a0bfSKris Buschelman current_space = free_space; 22107ba1a0bfSKris Buschelman 22117ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 22127ba1a0bfSKris Buschelman for (i=0;i<pn;i++) { 22137ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 22147ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 22157ba1a0bfSKris Buschelman for (dof=0;dof<ppdof;dof++) { 22167ba1a0bfSKris Buschelman ptanzi = 0; 22177ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 22187ba1a0bfSKris Buschelman for (j=0;j<ptnzi;j++) { 22197ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 22207ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 22217ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 22227ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 22237ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 22247ba1a0bfSKris Buschelman for (k=0;k<anzj;k++) { 22257ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 22267ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 22277ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 22287ba1a0bfSKris Buschelman } 22297ba1a0bfSKris Buschelman } 22307ba1a0bfSKris Buschelman } 22317ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 22327ba1a0bfSKris Buschelman ptaj = ptasparserow; 22337ba1a0bfSKris Buschelman cnzi = 0; 22347ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 22357ba1a0bfSKris Buschelman /* Get offset within block of P */ 22367ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 22377ba1a0bfSKris Buschelman /* Get block row of P */ 22387ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 22397ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 22407ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 22417ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 22427ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 22437ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 22447ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 22457ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 22467ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 22477ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 22487ba1a0bfSKris Buschelman } 22497ba1a0bfSKris Buschelman } 22507ba1a0bfSKris Buschelman } 22517ba1a0bfSKris Buschelman 22527ba1a0bfSKris Buschelman /* sort sparserow */ 22537ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 22547ba1a0bfSKris Buschelman 22557ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 22567ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 22577ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 2258a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 22597ba1a0bfSKris Buschelman } 22607ba1a0bfSKris Buschelman 22617ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 22627ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 22637ba1a0bfSKris Buschelman current_space->array += cnzi; 22647ba1a0bfSKris Buschelman current_space->local_used += cnzi; 22657ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 22667ba1a0bfSKris Buschelman 22677ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 22687ba1a0bfSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 22697ba1a0bfSKris Buschelman } 22707ba1a0bfSKris Buschelman for (j=0;j<cnzi;j++) { 22717ba1a0bfSKris Buschelman denserow[sparserow[j]] = 0; 22727ba1a0bfSKris Buschelman } 22737ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 22747ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 22757ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 22767ba1a0bfSKris Buschelman } 22777ba1a0bfSKris Buschelman } 22787ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 22797ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 22807ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 22817ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2282a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 22837ba1a0bfSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 22847ba1a0bfSKris Buschelman 22857ba1a0bfSKris Buschelman /* Allocate space for ca */ 22867ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 22877ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 22887ba1a0bfSKris Buschelman 22897ba1a0bfSKris Buschelman /* put together the new matrix */ 22907ba1a0bfSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 22917ba1a0bfSKris Buschelman 22927ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 22937ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 22947ba1a0bfSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 2295e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2296e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 22977ba1a0bfSKris Buschelman c->nonew = 0; 22987ba1a0bfSKris Buschelman 22997ba1a0bfSKris Buschelman /* Clean up. */ 23007ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 23017ba1a0bfSKris Buschelman 23027ba1a0bfSKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 23037ba1a0bfSKris Buschelman PetscFunctionReturn(0); 23047ba1a0bfSKris Buschelman } 23057ba1a0bfSKris Buschelman 23067ba1a0bfSKris Buschelman #undef __FUNCT__ 23077ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 23087ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 23097ba1a0bfSKris Buschelman { 23107ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 23117ba1a0bfSKris Buschelman PetscErrorCode ierr; 23127ba1a0bfSKris Buschelman PetscInt flops=0; 23137ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 23147ba1a0bfSKris Buschelman Mat P=pp->AIJ; 23157ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 23167ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 23177ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 23187ba1a0bfSKris Buschelman PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 23197ba1a0bfSKris Buschelman PetscInt *ci=c->i,*cj=c->j,*cjj; 2320899cda47SBarry Smith PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof; 23217ba1a0bfSKris Buschelman PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 23227ba1a0bfSKris Buschelman MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 23237ba1a0bfSKris Buschelman 23247ba1a0bfSKris Buschelman PetscFunctionBegin; 23257ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 23267ba1a0bfSKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 23277ba1a0bfSKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 23287ba1a0bfSKris Buschelman 23297ba1a0bfSKris Buschelman apj = (PetscInt *)(apa + cn); 23307ba1a0bfSKris Buschelman apjdense = apj + cn; 23317ba1a0bfSKris Buschelman 23327ba1a0bfSKris Buschelman /* Clear old values in C */ 23337ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 23347ba1a0bfSKris Buschelman 23357ba1a0bfSKris Buschelman for (i=0;i<am;i++) { 23367ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 23377ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 23387ba1a0bfSKris Buschelman apnzj = 0; 23397ba1a0bfSKris Buschelman for (j=0;j<anzi;j++) { 23407ba1a0bfSKris Buschelman /* Get offset within block of P */ 23417ba1a0bfSKris Buschelman pshift = *aj%ppdof; 23427ba1a0bfSKris Buschelman /* Get block row of P */ 23437ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 23447ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 23457ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 23467ba1a0bfSKris Buschelman paj = pa + pi[prow]; 23477ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 23487ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 23497ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 23507ba1a0bfSKris Buschelman apjdense[poffset] = -1; 23517ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 23527ba1a0bfSKris Buschelman } 23537ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 23547ba1a0bfSKris Buschelman } 23557ba1a0bfSKris Buschelman flops += 2*pnzj; 23567ba1a0bfSKris Buschelman aa++; 23577ba1a0bfSKris Buschelman } 23587ba1a0bfSKris Buschelman 23597ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 23607ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 23617ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 23627ba1a0bfSKris Buschelman 23637ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 23647ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 23657ba1a0bfSKris Buschelman pshift = i%ppdof; 23667ba1a0bfSKris Buschelman poffset = pi[prow]; 23677ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 23687ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 23697ba1a0bfSKris Buschelman pJ = pj+poffset; 23707ba1a0bfSKris Buschelman pA = pa+poffset; 23717ba1a0bfSKris Buschelman for (j=0;j<pnzi;j++) { 23727ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 23737ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 23747ba1a0bfSKris Buschelman caj = ca + ci[crow]; 23757ba1a0bfSKris Buschelman pJ++; 23767ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 23777ba1a0bfSKris Buschelman for (k=0,nextap=0;nextap<apnzj;k++) { 23787ba1a0bfSKris Buschelman if (cjj[k]==apj[nextap]) { 23797ba1a0bfSKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 23807ba1a0bfSKris Buschelman } 23817ba1a0bfSKris Buschelman } 23827ba1a0bfSKris Buschelman flops += 2*apnzj; 23837ba1a0bfSKris Buschelman pA++; 23847ba1a0bfSKris Buschelman } 23857ba1a0bfSKris Buschelman 23867ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 23877ba1a0bfSKris Buschelman for (j=0;j<apnzj;j++) { 23887ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 23897ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 23907ba1a0bfSKris Buschelman } 23917ba1a0bfSKris Buschelman } 23927ba1a0bfSKris Buschelman 23937ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 23947ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 23957ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 23967ba1a0bfSKris Buschelman ierr = PetscFree(apa);CHKERRQ(ierr); 23977ba1a0bfSKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2398*95ddefa2SHong Zhang PetscFunctionReturn(0); 2399*95ddefa2SHong Zhang } 24007ba1a0bfSKris Buschelman 2401*95ddefa2SHong Zhang #undef __FUNCT__ 2402*95ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 2403*95ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2404*95ddefa2SHong Zhang { 2405*95ddefa2SHong Zhang PetscErrorCode ierr; 2406*95ddefa2SHong Zhang 2407*95ddefa2SHong Zhang PetscFunctionBegin; 2408*95ddefa2SHong Zhang /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 2409*95ddefa2SHong Zhang ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 2410*95ddefa2SHong Zhang ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 2411*95ddefa2SHong Zhang PetscFunctionReturn(0); 2412*95ddefa2SHong Zhang } 2413*95ddefa2SHong Zhang 2414*95ddefa2SHong Zhang #undef __FUNCT__ 2415*95ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 2416*95ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 2417*95ddefa2SHong Zhang { 2418*95ddefa2SHong Zhang PetscFunctionBegin; 2419*95ddefa2SHong Zhang SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 24207ba1a0bfSKris Buschelman PetscFunctionReturn(0); 24217ba1a0bfSKris Buschelman } 24227ba1a0bfSKris Buschelman 2423be1d678aSKris Buschelman EXTERN_C_BEGIN 24247ba1a0bfSKris Buschelman #undef __FUNCT__ 24250fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2426f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 24279c4fc4c7SBarry Smith { 24289c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2429ceb03754SKris Buschelman Mat a = b->AIJ,B; 24309c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 24319c4fc4c7SBarry Smith PetscErrorCode ierr; 24320fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2433ba8c8a56SBarry Smith PetscInt *cols; 2434ba8c8a56SBarry Smith PetscScalar *vals; 24359c4fc4c7SBarry Smith 24369c4fc4c7SBarry Smith PetscFunctionBegin; 24379c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 24387c307921SBarry Smith ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 24399c4fc4c7SBarry Smith for (i=0; i<m; i++) { 24409c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 24410fd73130SBarry Smith for (j=0; j<dof; j++) { 24420fd73130SBarry Smith ilen[dof*i+j] = aij->ilen[i]; 24439c4fc4c7SBarry Smith } 24449c4fc4c7SBarry Smith } 2445ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2446ceb03754SKris Buschelman ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 24479c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 24489c4fc4c7SBarry Smith ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 24499c4fc4c7SBarry Smith ii = 0; 24509c4fc4c7SBarry Smith for (i=0; i<m; i++) { 2451ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24520fd73130SBarry Smith for (j=0; j<dof; j++) { 24539c4fc4c7SBarry Smith for (k=0; k<ncols; k++) { 24540fd73130SBarry Smith icols[k] = dof*cols[k]+j; 24559c4fc4c7SBarry Smith } 2456ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 24579c4fc4c7SBarry Smith ii++; 24589c4fc4c7SBarry Smith } 2459ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 24609c4fc4c7SBarry Smith } 24619c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 2462ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2463ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464ceb03754SKris Buschelman 2465ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 24668ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2467c3d102feSKris Buschelman } else { 2468ceb03754SKris Buschelman *newmat = B; 2469c3d102feSKris Buschelman } 24709c4fc4c7SBarry Smith PetscFunctionReturn(0); 24719c4fc4c7SBarry Smith } 2472be1d678aSKris Buschelman EXTERN_C_END 24739c4fc4c7SBarry Smith 24740fd73130SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 2475be1d678aSKris Buschelman 2476be1d678aSKris Buschelman EXTERN_C_BEGIN 24770fd73130SBarry Smith #undef __FUNCT__ 24780fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2479f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 24800fd73130SBarry Smith { 24810fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2482ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 24830fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 24840fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 24850fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 24860fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2487910ba992SMatthew Knepley PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2488910ba992SMatthew Knepley PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 24890fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 24900fd73130SBarry Smith PetscErrorCode ierr; 24910fd73130SBarry Smith PetscScalar *vals,*ovals; 24920fd73130SBarry Smith 24930fd73130SBarry Smith PetscFunctionBegin; 2494899cda47SBarry Smith ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr); 2495899cda47SBarry Smith for (i=0; i<A->rmap.n/dof; i++) { 24960fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 24970fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 24980fd73130SBarry Smith for (j=0; j<dof; j++) { 24990fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 25000fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 25010fd73130SBarry Smith } 25020fd73130SBarry Smith } 2503899cda47SBarry Smith ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2504ceb03754SKris Buschelman ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 25050fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 25060fd73130SBarry Smith 25077a1a7badSBarry Smith ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 25089b5a856fSSatish Balay rstart = dof*maij->A->rmap.rstart; 25099b5a856fSSatish Balay cstart = dof*maij->A->cmap.rstart; 25100fd73130SBarry Smith garray = mpiaij->garray; 25110fd73130SBarry Smith 25120fd73130SBarry Smith ii = rstart; 2513899cda47SBarry Smith for (i=0; i<A->rmap.n/dof; i++) { 25140fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 25150fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 25160fd73130SBarry Smith for (j=0; j<dof; j++) { 25170fd73130SBarry Smith for (k=0; k<ncols; k++) { 25180fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 25190fd73130SBarry Smith } 25200fd73130SBarry Smith for (k=0; k<oncols; k++) { 25210fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 25220fd73130SBarry Smith } 2523ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2524ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 25250fd73130SBarry Smith ii++; 25260fd73130SBarry Smith } 25270fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 25280fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 25290fd73130SBarry Smith } 25300fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 25310fd73130SBarry Smith 2532ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2533ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2534ceb03754SKris Buschelman 2535ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 2536*95ddefa2SHong Zhang PetscInt refct = A->refct; /* save A->refct */ 2537*95ddefa2SHong Zhang A->refct = 1; 25388ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2539*95ddefa2SHong Zhang A->refct = refct; /* restore A->refct */ 2540c3d102feSKris Buschelman } else { 2541ceb03754SKris Buschelman *newmat = B; 2542c3d102feSKris Buschelman } 25430fd73130SBarry Smith PetscFunctionReturn(0); 25440fd73130SBarry Smith } 2545be1d678aSKris Buschelman EXTERN_C_END 25460fd73130SBarry Smith 25470fd73130SBarry Smith 2548bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 25495983afb6SSatish Balay /*MC 25500bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 25510bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 25520bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 25530bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 25540bad9183SKris Buschelman 25550bad9183SKris Buschelman Operations provided: 25560fd73130SBarry Smith + MatMult 25570bad9183SKris Buschelman . MatMultTranspose 25580bad9183SKris Buschelman . MatMultAdd 25590bad9183SKris Buschelman . MatMultTransposeAdd 25600fd73130SBarry Smith - MatView 25610bad9183SKris Buschelman 25620bad9183SKris Buschelman Level: advanced 25630bad9183SKris Buschelman 25640bad9183SKris Buschelman M*/ 25654a2ae208SSatish Balay #undef __FUNCT__ 25664a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 2567be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 256882b1193eSBarry Smith { 2569dfbe8321SBarry Smith PetscErrorCode ierr; 2570b24ad042SBarry Smith PetscMPIInt size; 2571b24ad042SBarry Smith PetscInt n; 25724cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 257382b1193eSBarry Smith Mat B; 257482b1193eSBarry Smith 257582b1193eSBarry Smith PetscFunctionBegin; 2576d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2577d72c5c08SBarry Smith 2578d72c5c08SBarry Smith if (dof == 1) { 2579d72c5c08SBarry Smith *maij = A; 2580d72c5c08SBarry Smith } else { 2581f69a0ea3SMatthew Knepley ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 2582899cda47SBarry Smith ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr); 2583cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 2584d72c5c08SBarry Smith 2585f4a53059SBarry Smith ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2586f4a53059SBarry Smith if (size == 1) { 2587b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 25884cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 25890fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 2590b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2591b9b97703SBarry Smith b->dof = dof; 25924cb9d9b8SBarry Smith b->AIJ = A; 2593d72c5c08SBarry Smith if (dof == 2) { 2594cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 2595cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2596cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2597cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2598bcc973b7SBarry Smith } else if (dof == 3) { 2599bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 2600bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2601bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2602bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2603bcc973b7SBarry Smith } else if (dof == 4) { 2604bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 2605bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2606bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2607bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2608f9fae5adSBarry Smith } else if (dof == 5) { 2609f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 2610f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2611f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2612f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 26136cd98798SBarry Smith } else if (dof == 6) { 26146cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 26156cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 26166cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 26176cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2618ed8eea03SBarry Smith } else if (dof == 7) { 2619ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 2620ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2621ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2622ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 262366ed3db0SBarry Smith } else if (dof == 8) { 262466ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 262566ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 262666ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 262766ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 26282b692628SMatthew Knepley } else if (dof == 9) { 26292b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 26302b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 26312b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 26322b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2633b9cda22cSBarry Smith } else if (dof == 10) { 2634b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 2635b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2636b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2637b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 26382f7816d4SBarry Smith } else if (dof == 16) { 26392f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 26402f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 26412f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 26422f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 264382b1193eSBarry Smith } else { 264477431f27SBarry Smith SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 264582b1193eSBarry Smith } 26467ba1a0bfSKris Buschelman B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 26477ba1a0bfSKris Buschelman B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 26489c4fc4c7SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2649f4a53059SBarry Smith } else { 2650f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2651f4a53059SBarry Smith IS from,to; 2652f4a53059SBarry Smith Vec gvec; 2653b24ad042SBarry Smith PetscInt *garray,i; 2654f4a53059SBarry Smith 2655b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2656d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 26570fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 2658b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2659b9b97703SBarry Smith b->dof = dof; 2660b9b97703SBarry Smith b->A = A; 26614cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 26624cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 26634cb9d9b8SBarry Smith 2664f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2665f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 266613288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2667f4a53059SBarry Smith 2668f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 2669b24ad042SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2670f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2671f4a53059SBarry Smith ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2672f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 2673f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2674f4a53059SBarry Smith 2675f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 2676899cda47SBarry Smith ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr); 267713288a74SBarry Smith ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2678f4a53059SBarry Smith 2679f4a53059SBarry Smith /* generate the scatter context */ 2680f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2681f4a53059SBarry Smith 2682f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 2683f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 2684f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 2685f4a53059SBarry Smith 2686f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 26874cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 26884cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 26894cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 2690*95ddefa2SHong Zhang B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 2691*95ddefa2SHong Zhang B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 26920fd73130SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2693f4a53059SBarry Smith } 2694cd3bbe55SBarry Smith *maij = B; 26950fd73130SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 2696d72c5c08SBarry Smith } 269782b1193eSBarry Smith PetscFunctionReturn(0); 269882b1193eSBarry Smith } 269982b1193eSBarry Smith 270082b1193eSBarry Smith 270182b1193eSBarry Smith 270282b1193eSBarry Smith 270382b1193eSBarry Smith 270482b1193eSBarry Smith 270582b1193eSBarry Smith 270682b1193eSBarry Smith 270782b1193eSBarry Smith 270882b1193eSBarry Smith 270982b1193eSBarry Smith 271082b1193eSBarry Smith 2711