1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 382b1193eSBarry Smith /* 4cd3bbe55SBarry Smith Defines the basic matrix operations for the MAIJ matrix storage format. 5cd3bbe55SBarry Smith This format is used for restriction and interpolation operations for 6cd3bbe55SBarry Smith multicomponent problems. It interpolates each component the same way 7cd3bbe55SBarry Smith independently. 8cd3bbe55SBarry Smith 9cd3bbe55SBarry Smith We provide: 10cd3bbe55SBarry Smith MatMult() 11cd3bbe55SBarry Smith MatMultTranspose() 12cd3bbe55SBarry Smith MatMultTransposeAdd() 13cd3bbe55SBarry Smith MatMultAdd() 14cd3bbe55SBarry Smith and 15cd3bbe55SBarry Smith MatCreateMAIJ(Mat,dof,Mat*) 16f4a53059SBarry Smith 17f4a53059SBarry Smith This single directory handles both the sequential and parallel codes 1882b1193eSBarry Smith */ 1982b1193eSBarry Smith 207c4f633dSBarry Smith #include "../src/mat/impls/maij/maij.h" 217c4f633dSBarry Smith #include "../src/mat/utils/freespace.h" 221d8d5f9aSSatish Balay #include "private/vecimpl.h" 2382b1193eSBarry Smith 244a2ae208SSatish Balay #undef __FUNCT__ 254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ" 26be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27b9b97703SBarry Smith { 28dfbe8321SBarry Smith PetscErrorCode ierr; 29b9b97703SBarry Smith PetscTruth ismpimaij,isseqmaij; 30b9b97703SBarry Smith 31b9b97703SBarry Smith PetscFunctionBegin; 32b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33b9b97703SBarry Smith ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34b9b97703SBarry Smith if (ismpimaij) { 35b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->A; 38b9b97703SBarry Smith } else if (isseqmaij) { 39b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40b9b97703SBarry Smith 41b9b97703SBarry Smith *B = b->AIJ; 42b9b97703SBarry Smith } else { 43b9b97703SBarry Smith *B = A; 44b9b97703SBarry Smith } 45b9b97703SBarry Smith PetscFunctionReturn(0); 46b9b97703SBarry Smith } 47b9b97703SBarry Smith 484a2ae208SSatish Balay #undef __FUNCT__ 494a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension" 50be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51b9b97703SBarry Smith { 52dfbe8321SBarry Smith PetscErrorCode ierr; 53b9b97703SBarry Smith Mat Aij; 54b9b97703SBarry Smith 55b9b97703SBarry Smith PetscFunctionBegin; 56b9b97703SBarry Smith ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57b9b97703SBarry Smith ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58b9b97703SBarry Smith PetscFunctionReturn(0); 59b9b97703SBarry Smith } 60b9b97703SBarry Smith 614a2ae208SSatish Balay #undef __FUNCT__ 624a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ" 63dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 6482b1193eSBarry Smith { 65dfbe8321SBarry Smith PetscErrorCode ierr; 664cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 6782b1193eSBarry Smith 6882b1193eSBarry Smith PetscFunctionBegin; 69cd3bbe55SBarry Smith if (b->AIJ) { 70cd3bbe55SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 7182b1193eSBarry Smith } 724cb9d9b8SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 734cb9d9b8SBarry Smith PetscFunctionReturn(0); 744cb9d9b8SBarry Smith } 754cb9d9b8SBarry Smith 764a2ae208SSatish Balay #undef __FUNCT__ 770fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ" 780fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 790fd73130SBarry Smith { 800fd73130SBarry Smith PetscErrorCode ierr; 810fd73130SBarry Smith Mat B; 820fd73130SBarry Smith 830fd73130SBarry Smith PetscFunctionBegin; 84ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 850fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 860fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 870fd73130SBarry Smith PetscFunctionReturn(0); 880fd73130SBarry Smith } 890fd73130SBarry Smith 900fd73130SBarry Smith #undef __FUNCT__ 910fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ" 920fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 930fd73130SBarry Smith { 940fd73130SBarry Smith PetscErrorCode ierr; 950fd73130SBarry Smith Mat B; 960fd73130SBarry Smith 970fd73130SBarry Smith PetscFunctionBegin; 98ceb03754SKris Buschelman ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 990fd73130SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1000fd73130SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 1010fd73130SBarry Smith PetscFunctionReturn(0); 1020fd73130SBarry Smith } 1030fd73130SBarry Smith 1040fd73130SBarry Smith #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ" 106dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 1074cb9d9b8SBarry Smith { 108dfbe8321SBarry Smith PetscErrorCode ierr; 1094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1104cb9d9b8SBarry Smith 1114cb9d9b8SBarry Smith PetscFunctionBegin; 1124cb9d9b8SBarry Smith if (b->AIJ) { 1134cb9d9b8SBarry Smith ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 1144cb9d9b8SBarry Smith } 115f4a53059SBarry Smith if (b->OAIJ) { 116f4a53059SBarry Smith ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117f4a53059SBarry Smith } 118b9b97703SBarry Smith if (b->A) { 119b9b97703SBarry Smith ierr = MatDestroy(b->A);CHKERRQ(ierr); 120b9b97703SBarry Smith } 121f4a53059SBarry Smith if (b->ctx) { 122f4a53059SBarry Smith ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123f4a53059SBarry Smith } 124f4a53059SBarry Smith if (b->w) { 125f4a53059SBarry Smith ierr = VecDestroy(b->w);CHKERRQ(ierr); 126f4a53059SBarry Smith } 127cd3bbe55SBarry Smith ierr = PetscFree(b);CHKERRQ(ierr); 128dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 129b9b97703SBarry Smith PetscFunctionReturn(0); 130b9b97703SBarry Smith } 131b9b97703SBarry Smith 1320bad9183SKris Buschelman /*MC 133fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1340bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 1350bad9183SKris Buschelman The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 1360bad9183SKris Buschelman 1370bad9183SKris Buschelman Operations provided: 1380bad9183SKris Buschelman . MatMult 1390bad9183SKris Buschelman . MatMultTranspose 1400bad9183SKris Buschelman . MatMultAdd 1410bad9183SKris Buschelman . MatMultTransposeAdd 1420bad9183SKris Buschelman 1430bad9183SKris Buschelman Level: advanced 1440bad9183SKris Buschelman 1450bad9183SKris Buschelman .seealso: MatCreateSeqDense 1460bad9183SKris Buschelman M*/ 1470bad9183SKris Buschelman 14882b1193eSBarry Smith EXTERN_C_BEGIN 1494a2ae208SSatish Balay #undef __FUNCT__ 1504a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ" 151be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 15282b1193eSBarry Smith { 153dfbe8321SBarry Smith PetscErrorCode ierr; 1544cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15564b52464SHong Zhang PetscMPIInt size; 15682b1193eSBarry Smith 15782b1193eSBarry Smith PetscFunctionBegin; 15838f2d2fdSLisandro Dalcin ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 159b0a32e0cSBarry Smith A->data = (void*)b; 160cd3bbe55SBarry Smith ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 161cd3bbe55SBarry Smith A->mapping = 0; 162f4a53059SBarry Smith 163cd3bbe55SBarry Smith b->AIJ = 0; 164cd3bbe55SBarry Smith b->dof = 0; 165f4a53059SBarry Smith b->OAIJ = 0; 166f4a53059SBarry Smith b->ctx = 0; 167f4a53059SBarry Smith b->w = 0; 1687adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 16964b52464SHong Zhang if (size == 1){ 17064b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 17164b52464SHong Zhang } else { 17264b52464SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 17364b52464SHong Zhang } 17482b1193eSBarry Smith PetscFunctionReturn(0); 17582b1193eSBarry Smith } 17682b1193eSBarry Smith EXTERN_C_END 17782b1193eSBarry Smith 178cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 1794a2ae208SSatish Balay #undef __FUNCT__ 180b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2" 181dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 18282b1193eSBarry Smith { 1834cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 184bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18587828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 186dfbe8321SBarry Smith PetscErrorCode ierr; 187d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 188b24ad042SBarry Smith PetscInt n,i,jrow,j; 18982b1193eSBarry Smith 190bcc973b7SBarry Smith PetscFunctionBegin; 1911ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1921ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 193bcc973b7SBarry Smith idx = a->j; 194bcc973b7SBarry Smith v = a->a; 195bcc973b7SBarry Smith ii = a->i; 196bcc973b7SBarry Smith 197bcc973b7SBarry Smith for (i=0; i<m; i++) { 198bcc973b7SBarry Smith jrow = ii[i]; 199bcc973b7SBarry Smith n = ii[i+1] - jrow; 200bcc973b7SBarry Smith sum1 = 0.0; 201bcc973b7SBarry Smith sum2 = 0.0; 202b3c51c66SMatthew Knepley nonzerorow += (n>0); 203bcc973b7SBarry Smith for (j=0; j<n; j++) { 204bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 205bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 206bcc973b7SBarry Smith jrow++; 207bcc973b7SBarry Smith } 208bcc973b7SBarry Smith y[2*i] = sum1; 209bcc973b7SBarry Smith y[2*i+1] = sum2; 210bcc973b7SBarry Smith } 211bcc973b7SBarry Smith 212*dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 2131ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2141ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 21582b1193eSBarry Smith PetscFunctionReturn(0); 21682b1193eSBarry Smith } 217bcc973b7SBarry Smith 2184a2ae208SSatish Balay #undef __FUNCT__ 219b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 220dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 22182b1193eSBarry Smith { 2224cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 223bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 22487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 225dfbe8321SBarry Smith PetscErrorCode ierr; 226d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 22782b1193eSBarry Smith 228bcc973b7SBarry Smith PetscFunctionBegin; 2292dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 2301ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2311ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 232bfec09a0SHong Zhang 233bcc973b7SBarry Smith for (i=0; i<m; i++) { 234bfec09a0SHong Zhang idx = a->j + a->i[i] ; 235bfec09a0SHong Zhang v = a->a + a->i[i] ; 236bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 237bcc973b7SBarry Smith alpha1 = x[2*i]; 238bcc973b7SBarry Smith alpha2 = x[2*i+1]; 239bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 240bcc973b7SBarry Smith } 241*dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 2421ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2431ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 24482b1193eSBarry Smith PetscFunctionReturn(0); 24582b1193eSBarry Smith } 246bcc973b7SBarry Smith 2474a2ae208SSatish Balay #undef __FUNCT__ 248b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 249dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 25082b1193eSBarry Smith { 2514cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 252bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 25387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2; 254dfbe8321SBarry Smith PetscErrorCode ierr; 255d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 256b24ad042SBarry Smith PetscInt n,i,jrow,j; 25782b1193eSBarry Smith 258bcc973b7SBarry Smith PetscFunctionBegin; 259f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2601ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2611ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 262bcc973b7SBarry Smith idx = a->j; 263bcc973b7SBarry Smith v = a->a; 264bcc973b7SBarry Smith ii = a->i; 265bcc973b7SBarry Smith 266bcc973b7SBarry Smith for (i=0; i<m; i++) { 267bcc973b7SBarry Smith jrow = ii[i]; 268bcc973b7SBarry Smith n = ii[i+1] - jrow; 269bcc973b7SBarry Smith sum1 = 0.0; 270bcc973b7SBarry Smith sum2 = 0.0; 271bcc973b7SBarry Smith for (j=0; j<n; j++) { 272bcc973b7SBarry Smith sum1 += v[jrow]*x[2*idx[jrow]]; 273bcc973b7SBarry Smith sum2 += v[jrow]*x[2*idx[jrow]+1]; 274bcc973b7SBarry Smith jrow++; 275bcc973b7SBarry Smith } 276bcc973b7SBarry Smith y[2*i] += sum1; 277bcc973b7SBarry Smith y[2*i+1] += sum2; 278bcc973b7SBarry Smith } 279bcc973b7SBarry Smith 280*dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 2811ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2821ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 283bcc973b7SBarry Smith PetscFunctionReturn(0); 28482b1193eSBarry Smith } 2854a2ae208SSatish Balay #undef __FUNCT__ 286b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 287dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 28882b1193eSBarry Smith { 2894cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 290bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 29187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2; 292dfbe8321SBarry Smith PetscErrorCode ierr; 293d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 29482b1193eSBarry Smith 295bcc973b7SBarry Smith PetscFunctionBegin; 296f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2971ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2981ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 299bfec09a0SHong Zhang 300bcc973b7SBarry Smith for (i=0; i<m; i++) { 301bfec09a0SHong Zhang idx = a->j + a->i[i] ; 302bfec09a0SHong Zhang v = a->a + a->i[i] ; 303bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 304bcc973b7SBarry Smith alpha1 = x[2*i]; 305bcc973b7SBarry Smith alpha2 = x[2*i+1]; 306bcc973b7SBarry Smith while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 307bcc973b7SBarry Smith } 308*dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 3091ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3101ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 311bcc973b7SBarry Smith PetscFunctionReturn(0); 31282b1193eSBarry Smith } 313cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 3144a2ae208SSatish Balay #undef __FUNCT__ 315b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3" 316dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 317bcc973b7SBarry Smith { 3184cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 319bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 32087828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 321dfbe8321SBarry Smith PetscErrorCode ierr; 322d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 323b24ad042SBarry Smith PetscInt n,i,jrow,j; 32482b1193eSBarry Smith 325bcc973b7SBarry Smith PetscFunctionBegin; 3261ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3271ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 328bcc973b7SBarry Smith idx = a->j; 329bcc973b7SBarry Smith v = a->a; 330bcc973b7SBarry Smith ii = a->i; 331bcc973b7SBarry Smith 332bcc973b7SBarry Smith for (i=0; i<m; i++) { 333bcc973b7SBarry Smith jrow = ii[i]; 334bcc973b7SBarry Smith n = ii[i+1] - jrow; 335bcc973b7SBarry Smith sum1 = 0.0; 336bcc973b7SBarry Smith sum2 = 0.0; 337bcc973b7SBarry Smith sum3 = 0.0; 338b3c51c66SMatthew Knepley nonzerorow += (n>0); 339bcc973b7SBarry Smith for (j=0; j<n; j++) { 340bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 341bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 342bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 343bcc973b7SBarry Smith jrow++; 344bcc973b7SBarry Smith } 345bcc973b7SBarry Smith y[3*i] = sum1; 346bcc973b7SBarry Smith y[3*i+1] = sum2; 347bcc973b7SBarry Smith y[3*i+2] = sum3; 348bcc973b7SBarry Smith } 349bcc973b7SBarry Smith 350*dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 3511ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3521ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 353bcc973b7SBarry Smith PetscFunctionReturn(0); 354bcc973b7SBarry Smith } 355bcc973b7SBarry Smith 3564a2ae208SSatish Balay #undef __FUNCT__ 357b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 358dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 359bcc973b7SBarry Smith { 3604cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 361bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 36287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 363dfbe8321SBarry Smith PetscErrorCode ierr; 364d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 365bcc973b7SBarry Smith 366bcc973b7SBarry Smith PetscFunctionBegin; 3672dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 3681ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3691ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 370bfec09a0SHong Zhang 371bcc973b7SBarry Smith for (i=0; i<m; i++) { 372bfec09a0SHong Zhang idx = a->j + a->i[i]; 373bfec09a0SHong Zhang v = a->a + a->i[i]; 374bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 375bcc973b7SBarry Smith alpha1 = x[3*i]; 376bcc973b7SBarry Smith alpha2 = x[3*i+1]; 377bcc973b7SBarry Smith alpha3 = x[3*i+2]; 378bcc973b7SBarry Smith while (n-->0) { 379bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 380bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 381bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 382bcc973b7SBarry Smith idx++; v++; 383bcc973b7SBarry Smith } 384bcc973b7SBarry Smith } 385*dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 3861ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3871ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 388bcc973b7SBarry Smith PetscFunctionReturn(0); 389bcc973b7SBarry Smith } 390bcc973b7SBarry Smith 3914a2ae208SSatish Balay #undef __FUNCT__ 392b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 393dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 394bcc973b7SBarry Smith { 3954cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 396bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 39787828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3; 398dfbe8321SBarry Smith PetscErrorCode ierr; 399d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 400b24ad042SBarry Smith PetscInt n,i,jrow,j; 401bcc973b7SBarry Smith 402bcc973b7SBarry Smith PetscFunctionBegin; 403f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4041ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4051ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 406bcc973b7SBarry Smith idx = a->j; 407bcc973b7SBarry Smith v = a->a; 408bcc973b7SBarry Smith ii = a->i; 409bcc973b7SBarry Smith 410bcc973b7SBarry Smith for (i=0; i<m; i++) { 411bcc973b7SBarry Smith jrow = ii[i]; 412bcc973b7SBarry Smith n = ii[i+1] - jrow; 413bcc973b7SBarry Smith sum1 = 0.0; 414bcc973b7SBarry Smith sum2 = 0.0; 415bcc973b7SBarry Smith sum3 = 0.0; 416bcc973b7SBarry Smith for (j=0; j<n; j++) { 417bcc973b7SBarry Smith sum1 += v[jrow]*x[3*idx[jrow]]; 418bcc973b7SBarry Smith sum2 += v[jrow]*x[3*idx[jrow]+1]; 419bcc973b7SBarry Smith sum3 += v[jrow]*x[3*idx[jrow]+2]; 420bcc973b7SBarry Smith jrow++; 421bcc973b7SBarry Smith } 422bcc973b7SBarry Smith y[3*i] += sum1; 423bcc973b7SBarry Smith y[3*i+1] += sum2; 424bcc973b7SBarry Smith y[3*i+2] += sum3; 425bcc973b7SBarry Smith } 426bcc973b7SBarry Smith 427*dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4281ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4291ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 430bcc973b7SBarry Smith PetscFunctionReturn(0); 431bcc973b7SBarry Smith } 4324a2ae208SSatish Balay #undef __FUNCT__ 433b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 434dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 435bcc973b7SBarry Smith { 4364cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 437bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 43887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 439dfbe8321SBarry Smith PetscErrorCode ierr; 440d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 441bcc973b7SBarry Smith 442bcc973b7SBarry Smith PetscFunctionBegin; 443f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 4441ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4451ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 446bcc973b7SBarry Smith for (i=0; i<m; i++) { 447bfec09a0SHong Zhang idx = a->j + a->i[i] ; 448bfec09a0SHong Zhang v = a->a + a->i[i] ; 449bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 450bcc973b7SBarry Smith alpha1 = x[3*i]; 451bcc973b7SBarry Smith alpha2 = x[3*i+1]; 452bcc973b7SBarry Smith alpha3 = x[3*i+2]; 453bcc973b7SBarry Smith while (n-->0) { 454bcc973b7SBarry Smith y[3*(*idx)] += alpha1*(*v); 455bcc973b7SBarry Smith y[3*(*idx)+1] += alpha2*(*v); 456bcc973b7SBarry Smith y[3*(*idx)+2] += alpha3*(*v); 457bcc973b7SBarry Smith idx++; v++; 458bcc973b7SBarry Smith } 459bcc973b7SBarry Smith } 460*dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 4611ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4621ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 463bcc973b7SBarry Smith PetscFunctionReturn(0); 464bcc973b7SBarry Smith } 465bcc973b7SBarry Smith 466bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/ 4674a2ae208SSatish Balay #undef __FUNCT__ 468b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4" 469dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 470bcc973b7SBarry Smith { 4714cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 472bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 47387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 474dfbe8321SBarry Smith PetscErrorCode ierr; 475d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 476b24ad042SBarry Smith PetscInt n,i,jrow,j; 477bcc973b7SBarry Smith 478bcc973b7SBarry Smith PetscFunctionBegin; 4791ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4801ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 481bcc973b7SBarry Smith idx = a->j; 482bcc973b7SBarry Smith v = a->a; 483bcc973b7SBarry Smith ii = a->i; 484bcc973b7SBarry Smith 485bcc973b7SBarry Smith for (i=0; i<m; i++) { 486bcc973b7SBarry Smith jrow = ii[i]; 487bcc973b7SBarry Smith n = ii[i+1] - jrow; 488bcc973b7SBarry Smith sum1 = 0.0; 489bcc973b7SBarry Smith sum2 = 0.0; 490bcc973b7SBarry Smith sum3 = 0.0; 491bcc973b7SBarry Smith sum4 = 0.0; 492b3c51c66SMatthew Knepley nonzerorow += (n>0); 493bcc973b7SBarry Smith for (j=0; j<n; j++) { 494bcc973b7SBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 495bcc973b7SBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 496bcc973b7SBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 497bcc973b7SBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 498bcc973b7SBarry Smith jrow++; 499bcc973b7SBarry Smith } 500bcc973b7SBarry Smith y[4*i] = sum1; 501bcc973b7SBarry Smith y[4*i+1] = sum2; 502bcc973b7SBarry Smith y[4*i+2] = sum3; 503bcc973b7SBarry Smith y[4*i+3] = sum4; 504bcc973b7SBarry Smith } 505bcc973b7SBarry Smith 506*dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 5071ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5081ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 509bcc973b7SBarry Smith PetscFunctionReturn(0); 510bcc973b7SBarry Smith } 511bcc973b7SBarry Smith 5124a2ae208SSatish Balay #undef __FUNCT__ 513b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 514dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 515bcc973b7SBarry Smith { 5164cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 517bcc973b7SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 51887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 519dfbe8321SBarry Smith PetscErrorCode ierr; 520d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 521bcc973b7SBarry Smith 522bcc973b7SBarry Smith PetscFunctionBegin; 5232dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 5241ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5251ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 526bcc973b7SBarry Smith for (i=0; i<m; i++) { 527bfec09a0SHong Zhang idx = a->j + a->i[i] ; 528bfec09a0SHong Zhang v = a->a + a->i[i] ; 529bcc973b7SBarry Smith n = a->i[i+1] - a->i[i]; 530bcc973b7SBarry Smith alpha1 = x[4*i]; 531bcc973b7SBarry Smith alpha2 = x[4*i+1]; 532bcc973b7SBarry Smith alpha3 = x[4*i+2]; 533bcc973b7SBarry Smith alpha4 = x[4*i+3]; 534bcc973b7SBarry Smith while (n-->0) { 535bcc973b7SBarry Smith y[4*(*idx)] += alpha1*(*v); 536bcc973b7SBarry Smith y[4*(*idx)+1] += alpha2*(*v); 537bcc973b7SBarry Smith y[4*(*idx)+2] += alpha3*(*v); 538bcc973b7SBarry Smith y[4*(*idx)+3] += alpha4*(*v); 539bcc973b7SBarry Smith idx++; v++; 540bcc973b7SBarry Smith } 541bcc973b7SBarry Smith } 542*dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 5431ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5441ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 545bcc973b7SBarry Smith PetscFunctionReturn(0); 546bcc973b7SBarry Smith } 547bcc973b7SBarry Smith 5484a2ae208SSatish Balay #undef __FUNCT__ 549b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 550dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 551bcc973b7SBarry Smith { 5524cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 553f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 55487828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 555dfbe8321SBarry Smith PetscErrorCode ierr; 556d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 557b24ad042SBarry Smith PetscInt n,i,jrow,j; 558f9fae5adSBarry Smith 559f9fae5adSBarry Smith PetscFunctionBegin; 560f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 5611ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5621ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 563f9fae5adSBarry Smith idx = a->j; 564f9fae5adSBarry Smith v = a->a; 565f9fae5adSBarry Smith ii = a->i; 566f9fae5adSBarry Smith 567f9fae5adSBarry Smith for (i=0; i<m; i++) { 568f9fae5adSBarry Smith jrow = ii[i]; 569f9fae5adSBarry Smith n = ii[i+1] - jrow; 570f9fae5adSBarry Smith sum1 = 0.0; 571f9fae5adSBarry Smith sum2 = 0.0; 572f9fae5adSBarry Smith sum3 = 0.0; 573f9fae5adSBarry Smith sum4 = 0.0; 574f9fae5adSBarry Smith for (j=0; j<n; j++) { 575f9fae5adSBarry Smith sum1 += v[jrow]*x[4*idx[jrow]]; 576f9fae5adSBarry Smith sum2 += v[jrow]*x[4*idx[jrow]+1]; 577f9fae5adSBarry Smith sum3 += v[jrow]*x[4*idx[jrow]+2]; 578f9fae5adSBarry Smith sum4 += v[jrow]*x[4*idx[jrow]+3]; 579f9fae5adSBarry Smith jrow++; 580f9fae5adSBarry Smith } 581f9fae5adSBarry Smith y[4*i] += sum1; 582f9fae5adSBarry Smith y[4*i+1] += sum2; 583f9fae5adSBarry Smith y[4*i+2] += sum3; 584f9fae5adSBarry Smith y[4*i+3] += sum4; 585f9fae5adSBarry Smith } 586f9fae5adSBarry Smith 587*dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 5881ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5891ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 590f9fae5adSBarry Smith PetscFunctionReturn(0); 591bcc973b7SBarry Smith } 5924a2ae208SSatish Balay #undef __FUNCT__ 593b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 594dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 595bcc973b7SBarry Smith { 5964cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 597f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 59887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 599dfbe8321SBarry Smith PetscErrorCode ierr; 600d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 601f9fae5adSBarry Smith 602f9fae5adSBarry Smith PetscFunctionBegin; 603f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 6041ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6051ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 606bfec09a0SHong Zhang 607f9fae5adSBarry Smith for (i=0; i<m; i++) { 608bfec09a0SHong Zhang idx = a->j + a->i[i] ; 609bfec09a0SHong Zhang v = a->a + a->i[i] ; 610f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 611f9fae5adSBarry Smith alpha1 = x[4*i]; 612f9fae5adSBarry Smith alpha2 = x[4*i+1]; 613f9fae5adSBarry Smith alpha3 = x[4*i+2]; 614f9fae5adSBarry Smith alpha4 = x[4*i+3]; 615f9fae5adSBarry Smith while (n-->0) { 616f9fae5adSBarry Smith y[4*(*idx)] += alpha1*(*v); 617f9fae5adSBarry Smith y[4*(*idx)+1] += alpha2*(*v); 618f9fae5adSBarry Smith y[4*(*idx)+2] += alpha3*(*v); 619f9fae5adSBarry Smith y[4*(*idx)+3] += alpha4*(*v); 620f9fae5adSBarry Smith idx++; v++; 621f9fae5adSBarry Smith } 622f9fae5adSBarry Smith } 623*dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 6241ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6251ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 626f9fae5adSBarry Smith PetscFunctionReturn(0); 627f9fae5adSBarry Smith } 628f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/ 6296cd98798SBarry Smith 6304a2ae208SSatish Balay #undef __FUNCT__ 631b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5" 632dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 633f9fae5adSBarry Smith { 6344cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 635f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 63687828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 637dfbe8321SBarry Smith PetscErrorCode ierr; 638d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 639b24ad042SBarry Smith PetscInt n,i,jrow,j; 640f9fae5adSBarry Smith 641f9fae5adSBarry Smith PetscFunctionBegin; 6421ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6431ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 644f9fae5adSBarry Smith idx = a->j; 645f9fae5adSBarry Smith v = a->a; 646f9fae5adSBarry Smith ii = a->i; 647f9fae5adSBarry Smith 648f9fae5adSBarry Smith for (i=0; i<m; i++) { 649f9fae5adSBarry Smith jrow = ii[i]; 650f9fae5adSBarry Smith n = ii[i+1] - jrow; 651f9fae5adSBarry Smith sum1 = 0.0; 652f9fae5adSBarry Smith sum2 = 0.0; 653f9fae5adSBarry Smith sum3 = 0.0; 654f9fae5adSBarry Smith sum4 = 0.0; 655f9fae5adSBarry Smith sum5 = 0.0; 656b3c51c66SMatthew Knepley nonzerorow += (n>0); 657f9fae5adSBarry Smith for (j=0; j<n; j++) { 658f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 659f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 660f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 661f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 662f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 663f9fae5adSBarry Smith jrow++; 664f9fae5adSBarry Smith } 665f9fae5adSBarry Smith y[5*i] = sum1; 666f9fae5adSBarry Smith y[5*i+1] = sum2; 667f9fae5adSBarry Smith y[5*i+2] = sum3; 668f9fae5adSBarry Smith y[5*i+3] = sum4; 669f9fae5adSBarry Smith y[5*i+4] = sum5; 670f9fae5adSBarry Smith } 671f9fae5adSBarry Smith 672*dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 6731ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6741ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 675f9fae5adSBarry Smith PetscFunctionReturn(0); 676f9fae5adSBarry Smith } 677f9fae5adSBarry Smith 6784a2ae208SSatish Balay #undef __FUNCT__ 679b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 680dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 681f9fae5adSBarry Smith { 6824cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 683f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 68487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 685dfbe8321SBarry Smith PetscErrorCode ierr; 686d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 687f9fae5adSBarry Smith 688f9fae5adSBarry Smith PetscFunctionBegin; 6892dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 6901ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6911ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 692bfec09a0SHong Zhang 693f9fae5adSBarry Smith for (i=0; i<m; i++) { 694bfec09a0SHong Zhang idx = a->j + a->i[i] ; 695bfec09a0SHong Zhang v = a->a + a->i[i] ; 696f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 697f9fae5adSBarry Smith alpha1 = x[5*i]; 698f9fae5adSBarry Smith alpha2 = x[5*i+1]; 699f9fae5adSBarry Smith alpha3 = x[5*i+2]; 700f9fae5adSBarry Smith alpha4 = x[5*i+3]; 701f9fae5adSBarry Smith alpha5 = x[5*i+4]; 702f9fae5adSBarry Smith while (n-->0) { 703f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 704f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 705f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 706f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 707f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 708f9fae5adSBarry Smith idx++; v++; 709f9fae5adSBarry Smith } 710f9fae5adSBarry Smith } 711*dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7121ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7131ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 714f9fae5adSBarry Smith PetscFunctionReturn(0); 715f9fae5adSBarry Smith } 716f9fae5adSBarry Smith 7174a2ae208SSatish Balay #undef __FUNCT__ 718b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 719dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 720f9fae5adSBarry Smith { 7214cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 722f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 72387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 724dfbe8321SBarry Smith PetscErrorCode ierr; 725d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 726b24ad042SBarry Smith PetscInt n,i,jrow,j; 727f9fae5adSBarry Smith 728f9fae5adSBarry Smith PetscFunctionBegin; 729f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7301ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7311ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 732f9fae5adSBarry Smith idx = a->j; 733f9fae5adSBarry Smith v = a->a; 734f9fae5adSBarry Smith ii = a->i; 735f9fae5adSBarry Smith 736f9fae5adSBarry Smith for (i=0; i<m; i++) { 737f9fae5adSBarry Smith jrow = ii[i]; 738f9fae5adSBarry Smith n = ii[i+1] - jrow; 739f9fae5adSBarry Smith sum1 = 0.0; 740f9fae5adSBarry Smith sum2 = 0.0; 741f9fae5adSBarry Smith sum3 = 0.0; 742f9fae5adSBarry Smith sum4 = 0.0; 743f9fae5adSBarry Smith sum5 = 0.0; 744f9fae5adSBarry Smith for (j=0; j<n; j++) { 745f9fae5adSBarry Smith sum1 += v[jrow]*x[5*idx[jrow]]; 746f9fae5adSBarry Smith sum2 += v[jrow]*x[5*idx[jrow]+1]; 747f9fae5adSBarry Smith sum3 += v[jrow]*x[5*idx[jrow]+2]; 748f9fae5adSBarry Smith sum4 += v[jrow]*x[5*idx[jrow]+3]; 749f9fae5adSBarry Smith sum5 += v[jrow]*x[5*idx[jrow]+4]; 750f9fae5adSBarry Smith jrow++; 751f9fae5adSBarry Smith } 752f9fae5adSBarry Smith y[5*i] += sum1; 753f9fae5adSBarry Smith y[5*i+1] += sum2; 754f9fae5adSBarry Smith y[5*i+2] += sum3; 755f9fae5adSBarry Smith y[5*i+3] += sum4; 756f9fae5adSBarry Smith y[5*i+4] += sum5; 757f9fae5adSBarry Smith } 758f9fae5adSBarry Smith 759*dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7601ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7611ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 762f9fae5adSBarry Smith PetscFunctionReturn(0); 763f9fae5adSBarry Smith } 764f9fae5adSBarry Smith 7654a2ae208SSatish Balay #undef __FUNCT__ 766b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 767dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 768f9fae5adSBarry Smith { 7694cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 770f9fae5adSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 77187828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 772dfbe8321SBarry Smith PetscErrorCode ierr; 773d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 774f9fae5adSBarry Smith 775f9fae5adSBarry Smith PetscFunctionBegin; 776f9fae5adSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 7771ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7781ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 779bfec09a0SHong Zhang 780f9fae5adSBarry Smith for (i=0; i<m; i++) { 781bfec09a0SHong Zhang idx = a->j + a->i[i] ; 782bfec09a0SHong Zhang v = a->a + a->i[i] ; 783f9fae5adSBarry Smith n = a->i[i+1] - a->i[i]; 784f9fae5adSBarry Smith alpha1 = x[5*i]; 785f9fae5adSBarry Smith alpha2 = x[5*i+1]; 786f9fae5adSBarry Smith alpha3 = x[5*i+2]; 787f9fae5adSBarry Smith alpha4 = x[5*i+3]; 788f9fae5adSBarry Smith alpha5 = x[5*i+4]; 789f9fae5adSBarry Smith while (n-->0) { 790f9fae5adSBarry Smith y[5*(*idx)] += alpha1*(*v); 791f9fae5adSBarry Smith y[5*(*idx)+1] += alpha2*(*v); 792f9fae5adSBarry Smith y[5*(*idx)+2] += alpha3*(*v); 793f9fae5adSBarry Smith y[5*(*idx)+3] += alpha4*(*v); 794f9fae5adSBarry Smith y[5*(*idx)+4] += alpha5*(*v); 795f9fae5adSBarry Smith idx++; v++; 796f9fae5adSBarry Smith } 797f9fae5adSBarry Smith } 798*dc0b31edSSatish Balay ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 7991ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8001ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 801f9fae5adSBarry Smith PetscFunctionReturn(0); 802bcc973b7SBarry Smith } 803bcc973b7SBarry Smith 8046cd98798SBarry Smith /* ------------------------------------------------------------------------------*/ 8054a2ae208SSatish Balay #undef __FUNCT__ 806b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6" 807dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8086cd98798SBarry Smith { 8096cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8106cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 81187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 812dfbe8321SBarry Smith PetscErrorCode ierr; 813d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 814b24ad042SBarry Smith PetscInt n,i,jrow,j; 8156cd98798SBarry Smith 8166cd98798SBarry Smith PetscFunctionBegin; 8171ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8181ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 8196cd98798SBarry Smith idx = a->j; 8206cd98798SBarry Smith v = a->a; 8216cd98798SBarry Smith ii = a->i; 8226cd98798SBarry Smith 8236cd98798SBarry Smith for (i=0; i<m; i++) { 8246cd98798SBarry Smith jrow = ii[i]; 8256cd98798SBarry Smith n = ii[i+1] - jrow; 8266cd98798SBarry Smith sum1 = 0.0; 8276cd98798SBarry Smith sum2 = 0.0; 8286cd98798SBarry Smith sum3 = 0.0; 8296cd98798SBarry Smith sum4 = 0.0; 8306cd98798SBarry Smith sum5 = 0.0; 8316cd98798SBarry Smith sum6 = 0.0; 832b3c51c66SMatthew Knepley nonzerorow += (n>0); 8336cd98798SBarry Smith for (j=0; j<n; j++) { 8346cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 8356cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 8366cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 8376cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 8386cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 8396cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 8406cd98798SBarry Smith jrow++; 8416cd98798SBarry Smith } 8426cd98798SBarry Smith y[6*i] = sum1; 8436cd98798SBarry Smith y[6*i+1] = sum2; 8446cd98798SBarry Smith y[6*i+2] = sum3; 8456cd98798SBarry Smith y[6*i+3] = sum4; 8466cd98798SBarry Smith y[6*i+4] = sum5; 8476cd98798SBarry Smith y[6*i+5] = sum6; 8486cd98798SBarry Smith } 8496cd98798SBarry Smith 850*dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 8511ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8521ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8536cd98798SBarry Smith PetscFunctionReturn(0); 8546cd98798SBarry Smith } 8556cd98798SBarry Smith 8564a2ae208SSatish Balay #undef __FUNCT__ 857b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 858dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 8596cd98798SBarry Smith { 8606cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 8616cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 86287828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 863dfbe8321SBarry Smith PetscErrorCode ierr; 864d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 8656cd98798SBarry Smith 8666cd98798SBarry Smith PetscFunctionBegin; 8672dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 8681ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8691ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 870bfec09a0SHong Zhang 8716cd98798SBarry Smith for (i=0; i<m; i++) { 872bfec09a0SHong Zhang idx = a->j + a->i[i] ; 873bfec09a0SHong Zhang v = a->a + a->i[i] ; 8746cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 8756cd98798SBarry Smith alpha1 = x[6*i]; 8766cd98798SBarry Smith alpha2 = x[6*i+1]; 8776cd98798SBarry Smith alpha3 = x[6*i+2]; 8786cd98798SBarry Smith alpha4 = x[6*i+3]; 8796cd98798SBarry Smith alpha5 = x[6*i+4]; 8806cd98798SBarry Smith alpha6 = x[6*i+5]; 8816cd98798SBarry Smith while (n-->0) { 8826cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 8836cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 8846cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 8856cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 8866cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 8876cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 8886cd98798SBarry Smith idx++; v++; 8896cd98798SBarry Smith } 8906cd98798SBarry Smith } 891*dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 8921ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8931ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 8946cd98798SBarry Smith PetscFunctionReturn(0); 8956cd98798SBarry Smith } 8966cd98798SBarry Smith 8974a2ae208SSatish Balay #undef __FUNCT__ 898b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 899dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9006cd98798SBarry Smith { 9016cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9026cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 90387828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 904dfbe8321SBarry Smith PetscErrorCode ierr; 905d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 906b24ad042SBarry Smith PetscInt n,i,jrow,j; 9076cd98798SBarry Smith 9086cd98798SBarry Smith PetscFunctionBegin; 9096cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9101ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9111ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 9126cd98798SBarry Smith idx = a->j; 9136cd98798SBarry Smith v = a->a; 9146cd98798SBarry Smith ii = a->i; 9156cd98798SBarry Smith 9166cd98798SBarry Smith for (i=0; i<m; i++) { 9176cd98798SBarry Smith jrow = ii[i]; 9186cd98798SBarry Smith n = ii[i+1] - jrow; 9196cd98798SBarry Smith sum1 = 0.0; 9206cd98798SBarry Smith sum2 = 0.0; 9216cd98798SBarry Smith sum3 = 0.0; 9226cd98798SBarry Smith sum4 = 0.0; 9236cd98798SBarry Smith sum5 = 0.0; 9246cd98798SBarry Smith sum6 = 0.0; 9256cd98798SBarry Smith for (j=0; j<n; j++) { 9266cd98798SBarry Smith sum1 += v[jrow]*x[6*idx[jrow]]; 9276cd98798SBarry Smith sum2 += v[jrow]*x[6*idx[jrow]+1]; 9286cd98798SBarry Smith sum3 += v[jrow]*x[6*idx[jrow]+2]; 9296cd98798SBarry Smith sum4 += v[jrow]*x[6*idx[jrow]+3]; 9306cd98798SBarry Smith sum5 += v[jrow]*x[6*idx[jrow]+4]; 9316cd98798SBarry Smith sum6 += v[jrow]*x[6*idx[jrow]+5]; 9326cd98798SBarry Smith jrow++; 9336cd98798SBarry Smith } 9346cd98798SBarry Smith y[6*i] += sum1; 9356cd98798SBarry Smith y[6*i+1] += sum2; 9366cd98798SBarry Smith y[6*i+2] += sum3; 9376cd98798SBarry Smith y[6*i+3] += sum4; 9386cd98798SBarry Smith y[6*i+4] += sum5; 9396cd98798SBarry Smith y[6*i+5] += sum6; 9406cd98798SBarry Smith } 9416cd98798SBarry Smith 942*dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9431ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9441ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9456cd98798SBarry Smith PetscFunctionReturn(0); 9466cd98798SBarry Smith } 9476cd98798SBarry Smith 9484a2ae208SSatish Balay #undef __FUNCT__ 949b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 950dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 9516cd98798SBarry Smith { 9526cd98798SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 9536cd98798SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 95487828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 955dfbe8321SBarry Smith PetscErrorCode ierr; 956d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 9576cd98798SBarry Smith 9586cd98798SBarry Smith PetscFunctionBegin; 9596cd98798SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 9601ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9611ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 962bfec09a0SHong Zhang 9636cd98798SBarry Smith for (i=0; i<m; i++) { 964bfec09a0SHong Zhang idx = a->j + a->i[i] ; 965bfec09a0SHong Zhang v = a->a + a->i[i] ; 9666cd98798SBarry Smith n = a->i[i+1] - a->i[i]; 9676cd98798SBarry Smith alpha1 = x[6*i]; 9686cd98798SBarry Smith alpha2 = x[6*i+1]; 9696cd98798SBarry Smith alpha3 = x[6*i+2]; 9706cd98798SBarry Smith alpha4 = x[6*i+3]; 9716cd98798SBarry Smith alpha5 = x[6*i+4]; 9726cd98798SBarry Smith alpha6 = x[6*i+5]; 9736cd98798SBarry Smith while (n-->0) { 9746cd98798SBarry Smith y[6*(*idx)] += alpha1*(*v); 9756cd98798SBarry Smith y[6*(*idx)+1] += alpha2*(*v); 9766cd98798SBarry Smith y[6*(*idx)+2] += alpha3*(*v); 9776cd98798SBarry Smith y[6*(*idx)+3] += alpha4*(*v); 9786cd98798SBarry Smith y[6*(*idx)+4] += alpha5*(*v); 9796cd98798SBarry Smith y[6*(*idx)+5] += alpha6*(*v); 9806cd98798SBarry Smith idx++; v++; 9816cd98798SBarry Smith } 9826cd98798SBarry Smith } 983*dc0b31edSSatish Balay ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 9841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9851ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 9866cd98798SBarry Smith PetscFunctionReturn(0); 9876cd98798SBarry Smith } 9886cd98798SBarry Smith 98966ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/ 99066ed3db0SBarry Smith #undef __FUNCT__ 991ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7" 992ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 993ed8eea03SBarry Smith { 994ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 995ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 996ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 997ed8eea03SBarry Smith PetscErrorCode ierr; 998d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 999ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1000ed8eea03SBarry Smith 1001ed8eea03SBarry Smith PetscFunctionBegin; 1002ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1003ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1004ed8eea03SBarry Smith idx = a->j; 1005ed8eea03SBarry Smith v = a->a; 1006ed8eea03SBarry Smith ii = a->i; 1007ed8eea03SBarry Smith 1008ed8eea03SBarry Smith for (i=0; i<m; i++) { 1009ed8eea03SBarry Smith jrow = ii[i]; 1010ed8eea03SBarry Smith n = ii[i+1] - jrow; 1011ed8eea03SBarry Smith sum1 = 0.0; 1012ed8eea03SBarry Smith sum2 = 0.0; 1013ed8eea03SBarry Smith sum3 = 0.0; 1014ed8eea03SBarry Smith sum4 = 0.0; 1015ed8eea03SBarry Smith sum5 = 0.0; 1016ed8eea03SBarry Smith sum6 = 0.0; 1017ed8eea03SBarry Smith sum7 = 0.0; 1018b3c51c66SMatthew Knepley nonzerorow += (n>0); 1019ed8eea03SBarry Smith for (j=0; j<n; j++) { 1020ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1021ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1022ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1023ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1024ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1025ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1026ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1027ed8eea03SBarry Smith jrow++; 1028ed8eea03SBarry Smith } 1029ed8eea03SBarry Smith y[7*i] = sum1; 1030ed8eea03SBarry Smith y[7*i+1] = sum2; 1031ed8eea03SBarry Smith y[7*i+2] = sum3; 1032ed8eea03SBarry Smith y[7*i+3] = sum4; 1033ed8eea03SBarry Smith y[7*i+4] = sum5; 1034ed8eea03SBarry Smith y[7*i+5] = sum6; 1035ed8eea03SBarry Smith y[7*i+6] = sum7; 1036ed8eea03SBarry Smith } 1037ed8eea03SBarry Smith 1038*dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1039ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1040ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1041ed8eea03SBarry Smith PetscFunctionReturn(0); 1042ed8eea03SBarry Smith } 1043ed8eea03SBarry Smith 1044ed8eea03SBarry Smith #undef __FUNCT__ 1045ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1046ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1047ed8eea03SBarry Smith { 1048ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1049ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1050ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1051ed8eea03SBarry Smith PetscErrorCode ierr; 1052d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1053ed8eea03SBarry Smith 1054ed8eea03SBarry Smith PetscFunctionBegin; 10552dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 1056ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1057ed8eea03SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1058ed8eea03SBarry Smith 1059ed8eea03SBarry Smith for (i=0; i<m; i++) { 1060ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1061ed8eea03SBarry Smith v = a->a + a->i[i] ; 1062ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1063ed8eea03SBarry Smith alpha1 = x[7*i]; 1064ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1065ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1066ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1067ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1068ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1069ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1070ed8eea03SBarry Smith while (n-->0) { 1071ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1072ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1073ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1074ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1075ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1076ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1077ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1078ed8eea03SBarry Smith idx++; v++; 1079ed8eea03SBarry Smith } 1080ed8eea03SBarry Smith } 1081*dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1082ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1083ed8eea03SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1084ed8eea03SBarry Smith PetscFunctionReturn(0); 1085ed8eea03SBarry Smith } 1086ed8eea03SBarry Smith 1087ed8eea03SBarry Smith #undef __FUNCT__ 1088ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1089ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1090ed8eea03SBarry Smith { 1091ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1092ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1093ed8eea03SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1094ed8eea03SBarry Smith PetscErrorCode ierr; 1095d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1096ed8eea03SBarry Smith PetscInt n,i,jrow,j; 1097ed8eea03SBarry Smith 1098ed8eea03SBarry Smith PetscFunctionBegin; 1099ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1100ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1101ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1102ed8eea03SBarry Smith idx = a->j; 1103ed8eea03SBarry Smith v = a->a; 1104ed8eea03SBarry Smith ii = a->i; 1105ed8eea03SBarry Smith 1106ed8eea03SBarry Smith for (i=0; i<m; i++) { 1107ed8eea03SBarry Smith jrow = ii[i]; 1108ed8eea03SBarry Smith n = ii[i+1] - jrow; 1109ed8eea03SBarry Smith sum1 = 0.0; 1110ed8eea03SBarry Smith sum2 = 0.0; 1111ed8eea03SBarry Smith sum3 = 0.0; 1112ed8eea03SBarry Smith sum4 = 0.0; 1113ed8eea03SBarry Smith sum5 = 0.0; 1114ed8eea03SBarry Smith sum6 = 0.0; 1115ed8eea03SBarry Smith sum7 = 0.0; 1116ed8eea03SBarry Smith for (j=0; j<n; j++) { 1117ed8eea03SBarry Smith sum1 += v[jrow]*x[7*idx[jrow]]; 1118ed8eea03SBarry Smith sum2 += v[jrow]*x[7*idx[jrow]+1]; 1119ed8eea03SBarry Smith sum3 += v[jrow]*x[7*idx[jrow]+2]; 1120ed8eea03SBarry Smith sum4 += v[jrow]*x[7*idx[jrow]+3]; 1121ed8eea03SBarry Smith sum5 += v[jrow]*x[7*idx[jrow]+4]; 1122ed8eea03SBarry Smith sum6 += v[jrow]*x[7*idx[jrow]+5]; 1123ed8eea03SBarry Smith sum7 += v[jrow]*x[7*idx[jrow]+6]; 1124ed8eea03SBarry Smith jrow++; 1125ed8eea03SBarry Smith } 1126ed8eea03SBarry Smith y[7*i] += sum1; 1127ed8eea03SBarry Smith y[7*i+1] += sum2; 1128ed8eea03SBarry Smith y[7*i+2] += sum3; 1129ed8eea03SBarry Smith y[7*i+3] += sum4; 1130ed8eea03SBarry Smith y[7*i+4] += sum5; 1131ed8eea03SBarry Smith y[7*i+5] += sum6; 1132ed8eea03SBarry Smith y[7*i+6] += sum7; 1133ed8eea03SBarry Smith } 1134ed8eea03SBarry Smith 1135*dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1136ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1137ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1138ed8eea03SBarry Smith PetscFunctionReturn(0); 1139ed8eea03SBarry Smith } 1140ed8eea03SBarry Smith 1141ed8eea03SBarry Smith #undef __FUNCT__ 1142ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1143ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1144ed8eea03SBarry Smith { 1145ed8eea03SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1146ed8eea03SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1147ed8eea03SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1148ed8eea03SBarry Smith PetscErrorCode ierr; 1149d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1150ed8eea03SBarry Smith 1151ed8eea03SBarry Smith PetscFunctionBegin; 1152ed8eea03SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1153ed8eea03SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1154ed8eea03SBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1155ed8eea03SBarry Smith for (i=0; i<m; i++) { 1156ed8eea03SBarry Smith idx = a->j + a->i[i] ; 1157ed8eea03SBarry Smith v = a->a + a->i[i] ; 1158ed8eea03SBarry Smith n = a->i[i+1] - a->i[i]; 1159ed8eea03SBarry Smith alpha1 = x[7*i]; 1160ed8eea03SBarry Smith alpha2 = x[7*i+1]; 1161ed8eea03SBarry Smith alpha3 = x[7*i+2]; 1162ed8eea03SBarry Smith alpha4 = x[7*i+3]; 1163ed8eea03SBarry Smith alpha5 = x[7*i+4]; 1164ed8eea03SBarry Smith alpha6 = x[7*i+5]; 1165ed8eea03SBarry Smith alpha7 = x[7*i+6]; 1166ed8eea03SBarry Smith while (n-->0) { 1167ed8eea03SBarry Smith y[7*(*idx)] += alpha1*(*v); 1168ed8eea03SBarry Smith y[7*(*idx)+1] += alpha2*(*v); 1169ed8eea03SBarry Smith y[7*(*idx)+2] += alpha3*(*v); 1170ed8eea03SBarry Smith y[7*(*idx)+3] += alpha4*(*v); 1171ed8eea03SBarry Smith y[7*(*idx)+4] += alpha5*(*v); 1172ed8eea03SBarry Smith y[7*(*idx)+5] += alpha6*(*v); 1173ed8eea03SBarry Smith y[7*(*idx)+6] += alpha7*(*v); 1174ed8eea03SBarry Smith idx++; v++; 1175ed8eea03SBarry Smith } 1176ed8eea03SBarry Smith } 1177*dc0b31edSSatish Balay ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1178ed8eea03SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1179ed8eea03SBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1180ed8eea03SBarry Smith PetscFunctionReturn(0); 1181ed8eea03SBarry Smith } 1182ed8eea03SBarry Smith 1183ed8eea03SBarry Smith #undef __FUNCT__ 118466ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8" 1185dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 118666ed3db0SBarry Smith { 118766ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 118866ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 118987828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1190dfbe8321SBarry Smith PetscErrorCode ierr; 1191d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1192b24ad042SBarry Smith PetscInt n,i,jrow,j; 119366ed3db0SBarry Smith 119466ed3db0SBarry Smith PetscFunctionBegin; 11951ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 11961ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 119766ed3db0SBarry Smith idx = a->j; 119866ed3db0SBarry Smith v = a->a; 119966ed3db0SBarry Smith ii = a->i; 120066ed3db0SBarry Smith 120166ed3db0SBarry Smith for (i=0; i<m; i++) { 120266ed3db0SBarry Smith jrow = ii[i]; 120366ed3db0SBarry Smith n = ii[i+1] - jrow; 120466ed3db0SBarry Smith sum1 = 0.0; 120566ed3db0SBarry Smith sum2 = 0.0; 120666ed3db0SBarry Smith sum3 = 0.0; 120766ed3db0SBarry Smith sum4 = 0.0; 120866ed3db0SBarry Smith sum5 = 0.0; 120966ed3db0SBarry Smith sum6 = 0.0; 121066ed3db0SBarry Smith sum7 = 0.0; 121166ed3db0SBarry Smith sum8 = 0.0; 1212b3c51c66SMatthew Knepley nonzerorow += (n>0); 121366ed3db0SBarry Smith for (j=0; j<n; j++) { 121466ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 121566ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 121666ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 121766ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 121866ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 121966ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 122066ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 122166ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 122266ed3db0SBarry Smith jrow++; 122366ed3db0SBarry Smith } 122466ed3db0SBarry Smith y[8*i] = sum1; 122566ed3db0SBarry Smith y[8*i+1] = sum2; 122666ed3db0SBarry Smith y[8*i+2] = sum3; 122766ed3db0SBarry Smith y[8*i+3] = sum4; 122866ed3db0SBarry Smith y[8*i+4] = sum5; 122966ed3db0SBarry Smith y[8*i+5] = sum6; 123066ed3db0SBarry Smith y[8*i+6] = sum7; 123166ed3db0SBarry Smith y[8*i+7] = sum8; 123266ed3db0SBarry Smith } 123366ed3db0SBarry Smith 1234*dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 12351ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12361ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 123766ed3db0SBarry Smith PetscFunctionReturn(0); 123866ed3db0SBarry Smith } 123966ed3db0SBarry Smith 124066ed3db0SBarry Smith #undef __FUNCT__ 124166ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1242dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 124366ed3db0SBarry Smith { 124466ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 124566ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 124687828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1247dfbe8321SBarry Smith PetscErrorCode ierr; 1248d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 124966ed3db0SBarry Smith 125066ed3db0SBarry Smith PetscFunctionBegin; 12512dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 12521ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12531ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1254bfec09a0SHong Zhang 125566ed3db0SBarry Smith for (i=0; i<m; i++) { 1256bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1257bfec09a0SHong Zhang v = a->a + a->i[i] ; 125866ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 125966ed3db0SBarry Smith alpha1 = x[8*i]; 126066ed3db0SBarry Smith alpha2 = x[8*i+1]; 126166ed3db0SBarry Smith alpha3 = x[8*i+2]; 126266ed3db0SBarry Smith alpha4 = x[8*i+3]; 126366ed3db0SBarry Smith alpha5 = x[8*i+4]; 126466ed3db0SBarry Smith alpha6 = x[8*i+5]; 126566ed3db0SBarry Smith alpha7 = x[8*i+6]; 126666ed3db0SBarry Smith alpha8 = x[8*i+7]; 126766ed3db0SBarry Smith while (n-->0) { 126866ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 126966ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 127066ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 127166ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 127266ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 127366ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 127466ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 127566ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 127666ed3db0SBarry Smith idx++; v++; 127766ed3db0SBarry Smith } 127866ed3db0SBarry Smith } 1279*dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 12801ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12811ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 128266ed3db0SBarry Smith PetscFunctionReturn(0); 128366ed3db0SBarry Smith } 128466ed3db0SBarry Smith 128566ed3db0SBarry Smith #undef __FUNCT__ 128666ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1287dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 128866ed3db0SBarry Smith { 128966ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 129066ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 129187828ca2SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1292dfbe8321SBarry Smith PetscErrorCode ierr; 1293d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1294b24ad042SBarry Smith PetscInt n,i,jrow,j; 129566ed3db0SBarry Smith 129666ed3db0SBarry Smith PetscFunctionBegin; 129766ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 12981ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12991ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 130066ed3db0SBarry Smith idx = a->j; 130166ed3db0SBarry Smith v = a->a; 130266ed3db0SBarry Smith ii = a->i; 130366ed3db0SBarry Smith 130466ed3db0SBarry Smith for (i=0; i<m; i++) { 130566ed3db0SBarry Smith jrow = ii[i]; 130666ed3db0SBarry Smith n = ii[i+1] - jrow; 130766ed3db0SBarry Smith sum1 = 0.0; 130866ed3db0SBarry Smith sum2 = 0.0; 130966ed3db0SBarry Smith sum3 = 0.0; 131066ed3db0SBarry Smith sum4 = 0.0; 131166ed3db0SBarry Smith sum5 = 0.0; 131266ed3db0SBarry Smith sum6 = 0.0; 131366ed3db0SBarry Smith sum7 = 0.0; 131466ed3db0SBarry Smith sum8 = 0.0; 131566ed3db0SBarry Smith for (j=0; j<n; j++) { 131666ed3db0SBarry Smith sum1 += v[jrow]*x[8*idx[jrow]]; 131766ed3db0SBarry Smith sum2 += v[jrow]*x[8*idx[jrow]+1]; 131866ed3db0SBarry Smith sum3 += v[jrow]*x[8*idx[jrow]+2]; 131966ed3db0SBarry Smith sum4 += v[jrow]*x[8*idx[jrow]+3]; 132066ed3db0SBarry Smith sum5 += v[jrow]*x[8*idx[jrow]+4]; 132166ed3db0SBarry Smith sum6 += v[jrow]*x[8*idx[jrow]+5]; 132266ed3db0SBarry Smith sum7 += v[jrow]*x[8*idx[jrow]+6]; 132366ed3db0SBarry Smith sum8 += v[jrow]*x[8*idx[jrow]+7]; 132466ed3db0SBarry Smith jrow++; 132566ed3db0SBarry Smith } 132666ed3db0SBarry Smith y[8*i] += sum1; 132766ed3db0SBarry Smith y[8*i+1] += sum2; 132866ed3db0SBarry Smith y[8*i+2] += sum3; 132966ed3db0SBarry Smith y[8*i+3] += sum4; 133066ed3db0SBarry Smith y[8*i+4] += sum5; 133166ed3db0SBarry Smith y[8*i+5] += sum6; 133266ed3db0SBarry Smith y[8*i+6] += sum7; 133366ed3db0SBarry Smith y[8*i+7] += sum8; 133466ed3db0SBarry Smith } 133566ed3db0SBarry Smith 1336*dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13371ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13381ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 133966ed3db0SBarry Smith PetscFunctionReturn(0); 134066ed3db0SBarry Smith } 134166ed3db0SBarry Smith 134266ed3db0SBarry Smith #undef __FUNCT__ 134366ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1344dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 134566ed3db0SBarry Smith { 134666ed3db0SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 134766ed3db0SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 134887828ca2SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1349dfbe8321SBarry Smith PetscErrorCode ierr; 1350d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 135166ed3db0SBarry Smith 135266ed3db0SBarry Smith PetscFunctionBegin; 135366ed3db0SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 13541ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13551ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 135666ed3db0SBarry Smith for (i=0; i<m; i++) { 1357bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1358bfec09a0SHong Zhang v = a->a + a->i[i] ; 135966ed3db0SBarry Smith n = a->i[i+1] - a->i[i]; 136066ed3db0SBarry Smith alpha1 = x[8*i]; 136166ed3db0SBarry Smith alpha2 = x[8*i+1]; 136266ed3db0SBarry Smith alpha3 = x[8*i+2]; 136366ed3db0SBarry Smith alpha4 = x[8*i+3]; 136466ed3db0SBarry Smith alpha5 = x[8*i+4]; 136566ed3db0SBarry Smith alpha6 = x[8*i+5]; 136666ed3db0SBarry Smith alpha7 = x[8*i+6]; 136766ed3db0SBarry Smith alpha8 = x[8*i+7]; 136866ed3db0SBarry Smith while (n-->0) { 136966ed3db0SBarry Smith y[8*(*idx)] += alpha1*(*v); 137066ed3db0SBarry Smith y[8*(*idx)+1] += alpha2*(*v); 137166ed3db0SBarry Smith y[8*(*idx)+2] += alpha3*(*v); 137266ed3db0SBarry Smith y[8*(*idx)+3] += alpha4*(*v); 137366ed3db0SBarry Smith y[8*(*idx)+4] += alpha5*(*v); 137466ed3db0SBarry Smith y[8*(*idx)+5] += alpha6*(*v); 137566ed3db0SBarry Smith y[8*(*idx)+6] += alpha7*(*v); 137666ed3db0SBarry Smith y[8*(*idx)+7] += alpha8*(*v); 137766ed3db0SBarry Smith idx++; v++; 137866ed3db0SBarry Smith } 137966ed3db0SBarry Smith } 1380*dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 13811ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13821ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 13832f7816d4SBarry Smith PetscFunctionReturn(0); 13842f7816d4SBarry Smith } 13852f7816d4SBarry Smith 13862b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/ 13872b692628SMatthew Knepley #undef __FUNCT__ 13882b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9" 1389dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 13902b692628SMatthew Knepley { 13912b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 13922b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 13932b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1394dfbe8321SBarry Smith PetscErrorCode ierr; 1395d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1396b24ad042SBarry Smith PetscInt n,i,jrow,j; 13972b692628SMatthew Knepley 13982b692628SMatthew Knepley PetscFunctionBegin; 13991ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14001ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14012b692628SMatthew Knepley idx = a->j; 14022b692628SMatthew Knepley v = a->a; 14032b692628SMatthew Knepley ii = a->i; 14042b692628SMatthew Knepley 14052b692628SMatthew Knepley for (i=0; i<m; i++) { 14062b692628SMatthew Knepley jrow = ii[i]; 14072b692628SMatthew Knepley n = ii[i+1] - jrow; 14082b692628SMatthew Knepley sum1 = 0.0; 14092b692628SMatthew Knepley sum2 = 0.0; 14102b692628SMatthew Knepley sum3 = 0.0; 14112b692628SMatthew Knepley sum4 = 0.0; 14122b692628SMatthew Knepley sum5 = 0.0; 14132b692628SMatthew Knepley sum6 = 0.0; 14142b692628SMatthew Knepley sum7 = 0.0; 14152b692628SMatthew Knepley sum8 = 0.0; 14162b692628SMatthew Knepley sum9 = 0.0; 1417b3c51c66SMatthew Knepley nonzerorow += (n>0); 14182b692628SMatthew Knepley for (j=0; j<n; j++) { 14192b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 14202b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 14212b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 14222b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 14232b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 14242b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 14252b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 14262b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 14272b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 14282b692628SMatthew Knepley jrow++; 14292b692628SMatthew Knepley } 14302b692628SMatthew Knepley y[9*i] = sum1; 14312b692628SMatthew Knepley y[9*i+1] = sum2; 14322b692628SMatthew Knepley y[9*i+2] = sum3; 14332b692628SMatthew Knepley y[9*i+3] = sum4; 14342b692628SMatthew Knepley y[9*i+4] = sum5; 14352b692628SMatthew Knepley y[9*i+5] = sum6; 14362b692628SMatthew Knepley y[9*i+6] = sum7; 14372b692628SMatthew Knepley y[9*i+7] = sum8; 14382b692628SMatthew Knepley y[9*i+8] = sum9; 14392b692628SMatthew Knepley } 14402b692628SMatthew Knepley 1441*dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 14421ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14431ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14442b692628SMatthew Knepley PetscFunctionReturn(0); 14452b692628SMatthew Knepley } 14462b692628SMatthew Knepley 1447b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 1448b9cda22cSBarry Smith 14492b692628SMatthew Knepley #undef __FUNCT__ 14502b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1451dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 14522b692628SMatthew Knepley { 14532b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 14542b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 14552b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1456dfbe8321SBarry Smith PetscErrorCode ierr; 1457d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 14582b692628SMatthew Knepley 14592b692628SMatthew Knepley PetscFunctionBegin; 14602dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 14611ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14621ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14632b692628SMatthew Knepley 14642b692628SMatthew Knepley for (i=0; i<m; i++) { 14652b692628SMatthew Knepley idx = a->j + a->i[i] ; 14662b692628SMatthew Knepley v = a->a + a->i[i] ; 14672b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 14682b692628SMatthew Knepley alpha1 = x[9*i]; 14692b692628SMatthew Knepley alpha2 = x[9*i+1]; 14702b692628SMatthew Knepley alpha3 = x[9*i+2]; 14712b692628SMatthew Knepley alpha4 = x[9*i+3]; 14722b692628SMatthew Knepley alpha5 = x[9*i+4]; 14732b692628SMatthew Knepley alpha6 = x[9*i+5]; 14742b692628SMatthew Knepley alpha7 = x[9*i+6]; 14752b692628SMatthew Knepley alpha8 = x[9*i+7]; 14762f6bd0c6SMatthew Knepley alpha9 = x[9*i+8]; 14772b692628SMatthew Knepley while (n-->0) { 14782b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 14792b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 14802b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 14812b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 14822b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 14832b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 14842b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 14852b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 14862b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 14872b692628SMatthew Knepley idx++; v++; 14882b692628SMatthew Knepley } 14892b692628SMatthew Knepley } 1490*dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 14911ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 14921ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14932b692628SMatthew Knepley PetscFunctionReturn(0); 14942b692628SMatthew Knepley } 14952b692628SMatthew Knepley 14962b692628SMatthew Knepley #undef __FUNCT__ 14972b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1498dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 14992b692628SMatthew Knepley { 15002b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15012b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15022b692628SMatthew Knepley PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1503dfbe8321SBarry Smith PetscErrorCode ierr; 1504d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1505b24ad042SBarry Smith PetscInt n,i,jrow,j; 15062b692628SMatthew Knepley 15072b692628SMatthew Knepley PetscFunctionBegin; 15082b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15091ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15101ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15112b692628SMatthew Knepley idx = a->j; 15122b692628SMatthew Knepley v = a->a; 15132b692628SMatthew Knepley ii = a->i; 15142b692628SMatthew Knepley 15152b692628SMatthew Knepley for (i=0; i<m; i++) { 15162b692628SMatthew Knepley jrow = ii[i]; 15172b692628SMatthew Knepley n = ii[i+1] - jrow; 15182b692628SMatthew Knepley sum1 = 0.0; 15192b692628SMatthew Knepley sum2 = 0.0; 15202b692628SMatthew Knepley sum3 = 0.0; 15212b692628SMatthew Knepley sum4 = 0.0; 15222b692628SMatthew Knepley sum5 = 0.0; 15232b692628SMatthew Knepley sum6 = 0.0; 15242b692628SMatthew Knepley sum7 = 0.0; 15252b692628SMatthew Knepley sum8 = 0.0; 15262b692628SMatthew Knepley sum9 = 0.0; 15272b692628SMatthew Knepley for (j=0; j<n; j++) { 15282b692628SMatthew Knepley sum1 += v[jrow]*x[9*idx[jrow]]; 15292b692628SMatthew Knepley sum2 += v[jrow]*x[9*idx[jrow]+1]; 15302b692628SMatthew Knepley sum3 += v[jrow]*x[9*idx[jrow]+2]; 15312b692628SMatthew Knepley sum4 += v[jrow]*x[9*idx[jrow]+3]; 15322b692628SMatthew Knepley sum5 += v[jrow]*x[9*idx[jrow]+4]; 15332b692628SMatthew Knepley sum6 += v[jrow]*x[9*idx[jrow]+5]; 15342b692628SMatthew Knepley sum7 += v[jrow]*x[9*idx[jrow]+6]; 15352b692628SMatthew Knepley sum8 += v[jrow]*x[9*idx[jrow]+7]; 15362b692628SMatthew Knepley sum9 += v[jrow]*x[9*idx[jrow]+8]; 15372b692628SMatthew Knepley jrow++; 15382b692628SMatthew Knepley } 15392b692628SMatthew Knepley y[9*i] += sum1; 15402b692628SMatthew Knepley y[9*i+1] += sum2; 15412b692628SMatthew Knepley y[9*i+2] += sum3; 15422b692628SMatthew Knepley y[9*i+3] += sum4; 15432b692628SMatthew Knepley y[9*i+4] += sum5; 15442b692628SMatthew Knepley y[9*i+5] += sum6; 15452b692628SMatthew Knepley y[9*i+6] += sum7; 15462b692628SMatthew Knepley y[9*i+7] += sum8; 15472b692628SMatthew Knepley y[9*i+8] += sum9; 15482b692628SMatthew Knepley } 15492b692628SMatthew Knepley 1550efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 15511ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15521ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15532b692628SMatthew Knepley PetscFunctionReturn(0); 15542b692628SMatthew Knepley } 15552b692628SMatthew Knepley 15562b692628SMatthew Knepley #undef __FUNCT__ 15572b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1558dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 15592b692628SMatthew Knepley { 15602b692628SMatthew Knepley Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 15612b692628SMatthew Knepley Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 15622b692628SMatthew Knepley PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1563dfbe8321SBarry Smith PetscErrorCode ierr; 1564d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 15652b692628SMatthew Knepley 15662b692628SMatthew Knepley PetscFunctionBegin; 15672b692628SMatthew Knepley if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 15681ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 15691ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 15702b692628SMatthew Knepley for (i=0; i<m; i++) { 15712b692628SMatthew Knepley idx = a->j + a->i[i] ; 15722b692628SMatthew Knepley v = a->a + a->i[i] ; 15732b692628SMatthew Knepley n = a->i[i+1] - a->i[i]; 15742b692628SMatthew Knepley alpha1 = x[9*i]; 15752b692628SMatthew Knepley alpha2 = x[9*i+1]; 15762b692628SMatthew Knepley alpha3 = x[9*i+2]; 15772b692628SMatthew Knepley alpha4 = x[9*i+3]; 15782b692628SMatthew Knepley alpha5 = x[9*i+4]; 15792b692628SMatthew Knepley alpha6 = x[9*i+5]; 15802b692628SMatthew Knepley alpha7 = x[9*i+6]; 15812b692628SMatthew Knepley alpha8 = x[9*i+7]; 15822b692628SMatthew Knepley alpha9 = x[9*i+8]; 15832b692628SMatthew Knepley while (n-->0) { 15842b692628SMatthew Knepley y[9*(*idx)] += alpha1*(*v); 15852b692628SMatthew Knepley y[9*(*idx)+1] += alpha2*(*v); 15862b692628SMatthew Knepley y[9*(*idx)+2] += alpha3*(*v); 15872b692628SMatthew Knepley y[9*(*idx)+3] += alpha4*(*v); 15882b692628SMatthew Knepley y[9*(*idx)+4] += alpha5*(*v); 15892b692628SMatthew Knepley y[9*(*idx)+5] += alpha6*(*v); 15902b692628SMatthew Knepley y[9*(*idx)+6] += alpha7*(*v); 15912b692628SMatthew Knepley y[9*(*idx)+7] += alpha8*(*v); 15922b692628SMatthew Knepley y[9*(*idx)+8] += alpha9*(*v); 15932b692628SMatthew Knepley idx++; v++; 15942b692628SMatthew Knepley } 15952b692628SMatthew Knepley } 1596*dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 15971ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15981ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 15992b692628SMatthew Knepley PetscFunctionReturn(0); 16002b692628SMatthew Knepley } 1601b9cda22cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 1602b9cda22cSBarry Smith #undef __FUNCT__ 1603b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1604b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1605b9cda22cSBarry Smith { 1606b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1607b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1608b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1609b9cda22cSBarry Smith PetscErrorCode ierr; 1610d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1611b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1612b9cda22cSBarry Smith 1613b9cda22cSBarry Smith PetscFunctionBegin; 1614b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1615b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1616b9cda22cSBarry Smith idx = a->j; 1617b9cda22cSBarry Smith v = a->a; 1618b9cda22cSBarry Smith ii = a->i; 1619b9cda22cSBarry Smith 1620b9cda22cSBarry Smith for (i=0; i<m; i++) { 1621b9cda22cSBarry Smith jrow = ii[i]; 1622b9cda22cSBarry Smith n = ii[i+1] - jrow; 1623b9cda22cSBarry Smith sum1 = 0.0; 1624b9cda22cSBarry Smith sum2 = 0.0; 1625b9cda22cSBarry Smith sum3 = 0.0; 1626b9cda22cSBarry Smith sum4 = 0.0; 1627b9cda22cSBarry Smith sum5 = 0.0; 1628b9cda22cSBarry Smith sum6 = 0.0; 1629b9cda22cSBarry Smith sum7 = 0.0; 1630b9cda22cSBarry Smith sum8 = 0.0; 1631b9cda22cSBarry Smith sum9 = 0.0; 1632b9cda22cSBarry Smith sum10 = 0.0; 1633b3c51c66SMatthew Knepley nonzerorow += (n>0); 1634b9cda22cSBarry Smith for (j=0; j<n; j++) { 1635b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1636b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1637b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1638b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1639b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1640b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1641b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1642b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1643b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1644b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1645b9cda22cSBarry Smith jrow++; 1646b9cda22cSBarry Smith } 1647b9cda22cSBarry Smith y[10*i] = sum1; 1648b9cda22cSBarry Smith y[10*i+1] = sum2; 1649b9cda22cSBarry Smith y[10*i+2] = sum3; 1650b9cda22cSBarry Smith y[10*i+3] = sum4; 1651b9cda22cSBarry Smith y[10*i+4] = sum5; 1652b9cda22cSBarry Smith y[10*i+5] = sum6; 1653b9cda22cSBarry Smith y[10*i+6] = sum7; 1654b9cda22cSBarry Smith y[10*i+7] = sum8; 1655b9cda22cSBarry Smith y[10*i+8] = sum9; 1656b9cda22cSBarry Smith y[10*i+9] = sum10; 1657b9cda22cSBarry Smith } 1658b9cda22cSBarry Smith 1659*dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1660b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1661b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1662b9cda22cSBarry Smith PetscFunctionReturn(0); 1663b9cda22cSBarry Smith } 1664b9cda22cSBarry Smith 1665b9cda22cSBarry Smith #undef __FUNCT__ 1666b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10" 1667b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1668b9cda22cSBarry Smith { 1669b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1670b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1671b9cda22cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1672b9cda22cSBarry Smith PetscErrorCode ierr; 1673d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1674b9cda22cSBarry Smith PetscInt n,i,jrow,j; 1675b9cda22cSBarry Smith 1676b9cda22cSBarry Smith PetscFunctionBegin; 1677b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1678b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1679b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1680b9cda22cSBarry Smith idx = a->j; 1681b9cda22cSBarry Smith v = a->a; 1682b9cda22cSBarry Smith ii = a->i; 1683b9cda22cSBarry Smith 1684b9cda22cSBarry Smith for (i=0; i<m; i++) { 1685b9cda22cSBarry Smith jrow = ii[i]; 1686b9cda22cSBarry Smith n = ii[i+1] - jrow; 1687b9cda22cSBarry Smith sum1 = 0.0; 1688b9cda22cSBarry Smith sum2 = 0.0; 1689b9cda22cSBarry Smith sum3 = 0.0; 1690b9cda22cSBarry Smith sum4 = 0.0; 1691b9cda22cSBarry Smith sum5 = 0.0; 1692b9cda22cSBarry Smith sum6 = 0.0; 1693b9cda22cSBarry Smith sum7 = 0.0; 1694b9cda22cSBarry Smith sum8 = 0.0; 1695b9cda22cSBarry Smith sum9 = 0.0; 1696b9cda22cSBarry Smith sum10 = 0.0; 1697b9cda22cSBarry Smith for (j=0; j<n; j++) { 1698b9cda22cSBarry Smith sum1 += v[jrow]*x[10*idx[jrow]]; 1699b9cda22cSBarry Smith sum2 += v[jrow]*x[10*idx[jrow]+1]; 1700b9cda22cSBarry Smith sum3 += v[jrow]*x[10*idx[jrow]+2]; 1701b9cda22cSBarry Smith sum4 += v[jrow]*x[10*idx[jrow]+3]; 1702b9cda22cSBarry Smith sum5 += v[jrow]*x[10*idx[jrow]+4]; 1703b9cda22cSBarry Smith sum6 += v[jrow]*x[10*idx[jrow]+5]; 1704b9cda22cSBarry Smith sum7 += v[jrow]*x[10*idx[jrow]+6]; 1705b9cda22cSBarry Smith sum8 += v[jrow]*x[10*idx[jrow]+7]; 1706b9cda22cSBarry Smith sum9 += v[jrow]*x[10*idx[jrow]+8]; 1707b9cda22cSBarry Smith sum10 += v[jrow]*x[10*idx[jrow]+9]; 1708b9cda22cSBarry Smith jrow++; 1709b9cda22cSBarry Smith } 1710b9cda22cSBarry Smith y[10*i] += sum1; 1711b9cda22cSBarry Smith y[10*i+1] += sum2; 1712b9cda22cSBarry Smith y[10*i+2] += sum3; 1713b9cda22cSBarry Smith y[10*i+3] += sum4; 1714b9cda22cSBarry Smith y[10*i+4] += sum5; 1715b9cda22cSBarry Smith y[10*i+5] += sum6; 1716b9cda22cSBarry Smith y[10*i+6] += sum7; 1717b9cda22cSBarry Smith y[10*i+7] += sum8; 1718b9cda22cSBarry Smith y[10*i+8] += sum9; 1719b9cda22cSBarry Smith y[10*i+9] += sum10; 1720b9cda22cSBarry Smith } 1721b9cda22cSBarry Smith 1722*dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1723b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1724b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1725b9cda22cSBarry Smith PetscFunctionReturn(0); 1726b9cda22cSBarry Smith } 1727b9cda22cSBarry Smith 1728b9cda22cSBarry Smith #undef __FUNCT__ 1729b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1730b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1731b9cda22cSBarry Smith { 1732b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1733b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1734b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1735b9cda22cSBarry Smith PetscErrorCode ierr; 1736d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1737b9cda22cSBarry Smith 1738b9cda22cSBarry Smith PetscFunctionBegin; 1739b9cda22cSBarry Smith ierr = VecSet(yy,zero);CHKERRQ(ierr); 1740b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1741b9cda22cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1742b9cda22cSBarry Smith 1743b9cda22cSBarry Smith for (i=0; i<m; i++) { 1744b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1745b9cda22cSBarry Smith v = a->a + a->i[i] ; 1746b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1747e29fdc22SBarry Smith alpha1 = x[10*i]; 1748e29fdc22SBarry Smith alpha2 = x[10*i+1]; 1749e29fdc22SBarry Smith alpha3 = x[10*i+2]; 1750e29fdc22SBarry Smith alpha4 = x[10*i+3]; 1751e29fdc22SBarry Smith alpha5 = x[10*i+4]; 1752e29fdc22SBarry Smith alpha6 = x[10*i+5]; 1753e29fdc22SBarry Smith alpha7 = x[10*i+6]; 1754e29fdc22SBarry Smith alpha8 = x[10*i+7]; 1755e29fdc22SBarry Smith alpha9 = x[10*i+8]; 1756b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1757b9cda22cSBarry Smith while (n-->0) { 1758e29fdc22SBarry Smith y[10*(*idx)] += alpha1*(*v); 1759e29fdc22SBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1760e29fdc22SBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1761e29fdc22SBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1762e29fdc22SBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1763e29fdc22SBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1764e29fdc22SBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1765e29fdc22SBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1766e29fdc22SBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1767b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1768b9cda22cSBarry Smith idx++; v++; 1769b9cda22cSBarry Smith } 1770b9cda22cSBarry Smith } 1771*dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1772b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1773b9cda22cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1774b9cda22cSBarry Smith PetscFunctionReturn(0); 1775b9cda22cSBarry Smith } 1776b9cda22cSBarry Smith 1777b9cda22cSBarry Smith #undef __FUNCT__ 1778b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1779b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1780b9cda22cSBarry Smith { 1781b9cda22cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1782b9cda22cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1783b9cda22cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1784b9cda22cSBarry Smith PetscErrorCode ierr; 1785d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1786b9cda22cSBarry Smith 1787b9cda22cSBarry Smith PetscFunctionBegin; 1788b9cda22cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1789b9cda22cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1790b9cda22cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1791b9cda22cSBarry Smith for (i=0; i<m; i++) { 1792b9cda22cSBarry Smith idx = a->j + a->i[i] ; 1793b9cda22cSBarry Smith v = a->a + a->i[i] ; 1794b9cda22cSBarry Smith n = a->i[i+1] - a->i[i]; 1795b9cda22cSBarry Smith alpha1 = x[10*i]; 1796b9cda22cSBarry Smith alpha2 = x[10*i+1]; 1797b9cda22cSBarry Smith alpha3 = x[10*i+2]; 1798b9cda22cSBarry Smith alpha4 = x[10*i+3]; 1799b9cda22cSBarry Smith alpha5 = x[10*i+4]; 1800b9cda22cSBarry Smith alpha6 = x[10*i+5]; 1801b9cda22cSBarry Smith alpha7 = x[10*i+6]; 1802b9cda22cSBarry Smith alpha8 = x[10*i+7]; 1803b9cda22cSBarry Smith alpha9 = x[10*i+8]; 1804b9cda22cSBarry Smith alpha10 = x[10*i+9]; 1805b9cda22cSBarry Smith while (n-->0) { 1806b9cda22cSBarry Smith y[10*(*idx)] += alpha1*(*v); 1807b9cda22cSBarry Smith y[10*(*idx)+1] += alpha2*(*v); 1808b9cda22cSBarry Smith y[10*(*idx)+2] += alpha3*(*v); 1809b9cda22cSBarry Smith y[10*(*idx)+3] += alpha4*(*v); 1810b9cda22cSBarry Smith y[10*(*idx)+4] += alpha5*(*v); 1811b9cda22cSBarry Smith y[10*(*idx)+5] += alpha6*(*v); 1812b9cda22cSBarry Smith y[10*(*idx)+6] += alpha7*(*v); 1813b9cda22cSBarry Smith y[10*(*idx)+7] += alpha8*(*v); 1814b9cda22cSBarry Smith y[10*(*idx)+8] += alpha9*(*v); 1815b9cda22cSBarry Smith y[10*(*idx)+9] += alpha10*(*v); 1816b9cda22cSBarry Smith idx++; v++; 1817b9cda22cSBarry Smith } 1818b9cda22cSBarry Smith } 1819*dc0b31edSSatish Balay ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1820b9cda22cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1821b9cda22cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1822b9cda22cSBarry Smith PetscFunctionReturn(0); 1823b9cda22cSBarry Smith } 1824b9cda22cSBarry Smith 18252b692628SMatthew Knepley 18262f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 18272f7816d4SBarry Smith #undef __FUNCT__ 18282f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16" 1829dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 18302f7816d4SBarry Smith { 18312f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 18322f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 18332f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 18342f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1835dfbe8321SBarry Smith PetscErrorCode ierr; 1836d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1837b24ad042SBarry Smith PetscInt n,i,jrow,j; 18382f7816d4SBarry Smith 18392f7816d4SBarry Smith PetscFunctionBegin; 18401ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 18411ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 18422f7816d4SBarry Smith idx = a->j; 18432f7816d4SBarry Smith v = a->a; 18442f7816d4SBarry Smith ii = a->i; 18452f7816d4SBarry Smith 18462f7816d4SBarry Smith for (i=0; i<m; i++) { 18472f7816d4SBarry Smith jrow = ii[i]; 18482f7816d4SBarry Smith n = ii[i+1] - jrow; 18492f7816d4SBarry Smith sum1 = 0.0; 18502f7816d4SBarry Smith sum2 = 0.0; 18512f7816d4SBarry Smith sum3 = 0.0; 18522f7816d4SBarry Smith sum4 = 0.0; 18532f7816d4SBarry Smith sum5 = 0.0; 18542f7816d4SBarry Smith sum6 = 0.0; 18552f7816d4SBarry Smith sum7 = 0.0; 18562f7816d4SBarry Smith sum8 = 0.0; 18572f7816d4SBarry Smith sum9 = 0.0; 18582f7816d4SBarry Smith sum10 = 0.0; 18592f7816d4SBarry Smith sum11 = 0.0; 18602f7816d4SBarry Smith sum12 = 0.0; 18612f7816d4SBarry Smith sum13 = 0.0; 18622f7816d4SBarry Smith sum14 = 0.0; 18632f7816d4SBarry Smith sum15 = 0.0; 18642f7816d4SBarry Smith sum16 = 0.0; 1865b3c51c66SMatthew Knepley nonzerorow += (n>0); 18662f7816d4SBarry Smith for (j=0; j<n; j++) { 18672f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 18682f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 18692f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 18702f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 18712f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 18722f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 18732f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 18742f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 18752f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 18762f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 18772f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 18782f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 18792f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 18802f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 18812f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 18822f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 18832f7816d4SBarry Smith jrow++; 18842f7816d4SBarry Smith } 18852f7816d4SBarry Smith y[16*i] = sum1; 18862f7816d4SBarry Smith y[16*i+1] = sum2; 18872f7816d4SBarry Smith y[16*i+2] = sum3; 18882f7816d4SBarry Smith y[16*i+3] = sum4; 18892f7816d4SBarry Smith y[16*i+4] = sum5; 18902f7816d4SBarry Smith y[16*i+5] = sum6; 18912f7816d4SBarry Smith y[16*i+6] = sum7; 18922f7816d4SBarry Smith y[16*i+7] = sum8; 18932f7816d4SBarry Smith y[16*i+8] = sum9; 18942f7816d4SBarry Smith y[16*i+9] = sum10; 18952f7816d4SBarry Smith y[16*i+10] = sum11; 18962f7816d4SBarry Smith y[16*i+11] = sum12; 18972f7816d4SBarry Smith y[16*i+12] = sum13; 18982f7816d4SBarry Smith y[16*i+13] = sum14; 18992f7816d4SBarry Smith y[16*i+14] = sum15; 19002f7816d4SBarry Smith y[16*i+15] = sum16; 19012f7816d4SBarry Smith } 19022f7816d4SBarry Smith 1903*dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 19041ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 19051ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 19062f7816d4SBarry Smith PetscFunctionReturn(0); 19072f7816d4SBarry Smith } 19082f7816d4SBarry Smith 19092f7816d4SBarry Smith #undef __FUNCT__ 19102f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1911dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 19122f7816d4SBarry Smith { 19132f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 19142f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19152f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 19162f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1917dfbe8321SBarry Smith PetscErrorCode ierr; 1918d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 19192f7816d4SBarry Smith 19202f7816d4SBarry Smith PetscFunctionBegin; 19212dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 19221ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19231ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1924bfec09a0SHong Zhang 19252f7816d4SBarry Smith for (i=0; i<m; i++) { 1926bfec09a0SHong Zhang idx = a->j + a->i[i] ; 1927bfec09a0SHong Zhang v = a->a + a->i[i] ; 19282f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 19292f7816d4SBarry Smith alpha1 = x[16*i]; 19302f7816d4SBarry Smith alpha2 = x[16*i+1]; 19312f7816d4SBarry Smith alpha3 = x[16*i+2]; 19322f7816d4SBarry Smith alpha4 = x[16*i+3]; 19332f7816d4SBarry Smith alpha5 = x[16*i+4]; 19342f7816d4SBarry Smith alpha6 = x[16*i+5]; 19352f7816d4SBarry Smith alpha7 = x[16*i+6]; 19362f7816d4SBarry Smith alpha8 = x[16*i+7]; 19372f7816d4SBarry Smith alpha9 = x[16*i+8]; 19382f7816d4SBarry Smith alpha10 = x[16*i+9]; 19392f7816d4SBarry Smith alpha11 = x[16*i+10]; 19402f7816d4SBarry Smith alpha12 = x[16*i+11]; 19412f7816d4SBarry Smith alpha13 = x[16*i+12]; 19422f7816d4SBarry Smith alpha14 = x[16*i+13]; 19432f7816d4SBarry Smith alpha15 = x[16*i+14]; 19442f7816d4SBarry Smith alpha16 = x[16*i+15]; 19452f7816d4SBarry Smith while (n-->0) { 19462f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 19472f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 19482f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 19492f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 19502f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 19512f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 19522f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 19532f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 19542f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 19552f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 19562f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 19572f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 19582f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 19592f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 19602f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 19612f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 19622f7816d4SBarry Smith idx++; v++; 19632f7816d4SBarry Smith } 19642f7816d4SBarry Smith } 1965*dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 19661ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 19671ebc52fbSHong Zhang ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 19682f7816d4SBarry Smith PetscFunctionReturn(0); 19692f7816d4SBarry Smith } 19702f7816d4SBarry Smith 19712f7816d4SBarry Smith #undef __FUNCT__ 19722f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1973dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 19742f7816d4SBarry Smith { 19752f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 19762f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 19772f7816d4SBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 19782f7816d4SBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1979dfbe8321SBarry Smith PetscErrorCode ierr; 1980d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1981b24ad042SBarry Smith PetscInt n,i,jrow,j; 19822f7816d4SBarry Smith 19832f7816d4SBarry Smith PetscFunctionBegin; 19842f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 19851ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 19861ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 19872f7816d4SBarry Smith idx = a->j; 19882f7816d4SBarry Smith v = a->a; 19892f7816d4SBarry Smith ii = a->i; 19902f7816d4SBarry Smith 19912f7816d4SBarry Smith for (i=0; i<m; i++) { 19922f7816d4SBarry Smith jrow = ii[i]; 19932f7816d4SBarry Smith n = ii[i+1] - jrow; 19942f7816d4SBarry Smith sum1 = 0.0; 19952f7816d4SBarry Smith sum2 = 0.0; 19962f7816d4SBarry Smith sum3 = 0.0; 19972f7816d4SBarry Smith sum4 = 0.0; 19982f7816d4SBarry Smith sum5 = 0.0; 19992f7816d4SBarry Smith sum6 = 0.0; 20002f7816d4SBarry Smith sum7 = 0.0; 20012f7816d4SBarry Smith sum8 = 0.0; 20022f7816d4SBarry Smith sum9 = 0.0; 20032f7816d4SBarry Smith sum10 = 0.0; 20042f7816d4SBarry Smith sum11 = 0.0; 20052f7816d4SBarry Smith sum12 = 0.0; 20062f7816d4SBarry Smith sum13 = 0.0; 20072f7816d4SBarry Smith sum14 = 0.0; 20082f7816d4SBarry Smith sum15 = 0.0; 20092f7816d4SBarry Smith sum16 = 0.0; 20102f7816d4SBarry Smith for (j=0; j<n; j++) { 20112f7816d4SBarry Smith sum1 += v[jrow]*x[16*idx[jrow]]; 20122f7816d4SBarry Smith sum2 += v[jrow]*x[16*idx[jrow]+1]; 20132f7816d4SBarry Smith sum3 += v[jrow]*x[16*idx[jrow]+2]; 20142f7816d4SBarry Smith sum4 += v[jrow]*x[16*idx[jrow]+3]; 20152f7816d4SBarry Smith sum5 += v[jrow]*x[16*idx[jrow]+4]; 20162f7816d4SBarry Smith sum6 += v[jrow]*x[16*idx[jrow]+5]; 20172f7816d4SBarry Smith sum7 += v[jrow]*x[16*idx[jrow]+6]; 20182f7816d4SBarry Smith sum8 += v[jrow]*x[16*idx[jrow]+7]; 20192f7816d4SBarry Smith sum9 += v[jrow]*x[16*idx[jrow]+8]; 20202f7816d4SBarry Smith sum10 += v[jrow]*x[16*idx[jrow]+9]; 20212f7816d4SBarry Smith sum11 += v[jrow]*x[16*idx[jrow]+10]; 20222f7816d4SBarry Smith sum12 += v[jrow]*x[16*idx[jrow]+11]; 20232f7816d4SBarry Smith sum13 += v[jrow]*x[16*idx[jrow]+12]; 20242f7816d4SBarry Smith sum14 += v[jrow]*x[16*idx[jrow]+13]; 20252f7816d4SBarry Smith sum15 += v[jrow]*x[16*idx[jrow]+14]; 20262f7816d4SBarry Smith sum16 += v[jrow]*x[16*idx[jrow]+15]; 20272f7816d4SBarry Smith jrow++; 20282f7816d4SBarry Smith } 20292f7816d4SBarry Smith y[16*i] += sum1; 20302f7816d4SBarry Smith y[16*i+1] += sum2; 20312f7816d4SBarry Smith y[16*i+2] += sum3; 20322f7816d4SBarry Smith y[16*i+3] += sum4; 20332f7816d4SBarry Smith y[16*i+4] += sum5; 20342f7816d4SBarry Smith y[16*i+5] += sum6; 20352f7816d4SBarry Smith y[16*i+6] += sum7; 20362f7816d4SBarry Smith y[16*i+7] += sum8; 20372f7816d4SBarry Smith y[16*i+8] += sum9; 20382f7816d4SBarry Smith y[16*i+9] += sum10; 20392f7816d4SBarry Smith y[16*i+10] += sum11; 20402f7816d4SBarry Smith y[16*i+11] += sum12; 20412f7816d4SBarry Smith y[16*i+12] += sum13; 20422f7816d4SBarry Smith y[16*i+13] += sum14; 20432f7816d4SBarry Smith y[16*i+14] += sum15; 20442f7816d4SBarry Smith y[16*i+15] += sum16; 20452f7816d4SBarry Smith } 20462f7816d4SBarry Smith 2047*dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 20481ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 20491ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 20502f7816d4SBarry Smith PetscFunctionReturn(0); 20512f7816d4SBarry Smith } 20522f7816d4SBarry Smith 20532f7816d4SBarry Smith #undef __FUNCT__ 20542f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2055dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 20562f7816d4SBarry Smith { 20572f7816d4SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 20582f7816d4SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 20592f7816d4SBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 20602f7816d4SBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2061dfbe8321SBarry Smith PetscErrorCode ierr; 2062d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 20632f7816d4SBarry Smith 20642f7816d4SBarry Smith PetscFunctionBegin; 20652f7816d4SBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 20661ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 20671ebc52fbSHong Zhang ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 20682f7816d4SBarry Smith for (i=0; i<m; i++) { 2069bfec09a0SHong Zhang idx = a->j + a->i[i] ; 2070bfec09a0SHong Zhang v = a->a + a->i[i] ; 20712f7816d4SBarry Smith n = a->i[i+1] - a->i[i]; 20722f7816d4SBarry Smith alpha1 = x[16*i]; 20732f7816d4SBarry Smith alpha2 = x[16*i+1]; 20742f7816d4SBarry Smith alpha3 = x[16*i+2]; 20752f7816d4SBarry Smith alpha4 = x[16*i+3]; 20762f7816d4SBarry Smith alpha5 = x[16*i+4]; 20772f7816d4SBarry Smith alpha6 = x[16*i+5]; 20782f7816d4SBarry Smith alpha7 = x[16*i+6]; 20792f7816d4SBarry Smith alpha8 = x[16*i+7]; 20802f7816d4SBarry Smith alpha9 = x[16*i+8]; 20812f7816d4SBarry Smith alpha10 = x[16*i+9]; 20822f7816d4SBarry Smith alpha11 = x[16*i+10]; 20832f7816d4SBarry Smith alpha12 = x[16*i+11]; 20842f7816d4SBarry Smith alpha13 = x[16*i+12]; 20852f7816d4SBarry Smith alpha14 = x[16*i+13]; 20862f7816d4SBarry Smith alpha15 = x[16*i+14]; 20872f7816d4SBarry Smith alpha16 = x[16*i+15]; 20882f7816d4SBarry Smith while (n-->0) { 20892f7816d4SBarry Smith y[16*(*idx)] += alpha1*(*v); 20902f7816d4SBarry Smith y[16*(*idx)+1] += alpha2*(*v); 20912f7816d4SBarry Smith y[16*(*idx)+2] += alpha3*(*v); 20922f7816d4SBarry Smith y[16*(*idx)+3] += alpha4*(*v); 20932f7816d4SBarry Smith y[16*(*idx)+4] += alpha5*(*v); 20942f7816d4SBarry Smith y[16*(*idx)+5] += alpha6*(*v); 20952f7816d4SBarry Smith y[16*(*idx)+6] += alpha7*(*v); 20962f7816d4SBarry Smith y[16*(*idx)+7] += alpha8*(*v); 20972f7816d4SBarry Smith y[16*(*idx)+8] += alpha9*(*v); 20982f7816d4SBarry Smith y[16*(*idx)+9] += alpha10*(*v); 20992f7816d4SBarry Smith y[16*(*idx)+10] += alpha11*(*v); 21002f7816d4SBarry Smith y[16*(*idx)+11] += alpha12*(*v); 21012f7816d4SBarry Smith y[16*(*idx)+12] += alpha13*(*v); 21022f7816d4SBarry Smith y[16*(*idx)+13] += alpha14*(*v); 21032f7816d4SBarry Smith y[16*(*idx)+14] += alpha15*(*v); 21042f7816d4SBarry Smith y[16*(*idx)+15] += alpha16*(*v); 21052f7816d4SBarry Smith idx++; v++; 21062f7816d4SBarry Smith } 21072f7816d4SBarry Smith } 2108*dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 21091ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 21101ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 211166ed3db0SBarry Smith PetscFunctionReturn(0); 211266ed3db0SBarry Smith } 211366ed3db0SBarry Smith 2114ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 2115ed1c418cSBarry Smith #undef __FUNCT__ 2116ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18" 2117ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2118ed1c418cSBarry Smith { 2119ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2120ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2121ed1c418cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2122ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2123ed1c418cSBarry Smith PetscErrorCode ierr; 2124d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 2125ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2126ed1c418cSBarry Smith 2127ed1c418cSBarry Smith PetscFunctionBegin; 2128ed1c418cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2129ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2130ed1c418cSBarry Smith idx = a->j; 2131ed1c418cSBarry Smith v = a->a; 2132ed1c418cSBarry Smith ii = a->i; 2133ed1c418cSBarry Smith 2134ed1c418cSBarry Smith for (i=0; i<m; i++) { 2135ed1c418cSBarry Smith jrow = ii[i]; 2136ed1c418cSBarry Smith n = ii[i+1] - jrow; 2137ed1c418cSBarry Smith sum1 = 0.0; 2138ed1c418cSBarry Smith sum2 = 0.0; 2139ed1c418cSBarry Smith sum3 = 0.0; 2140ed1c418cSBarry Smith sum4 = 0.0; 2141ed1c418cSBarry Smith sum5 = 0.0; 2142ed1c418cSBarry Smith sum6 = 0.0; 2143ed1c418cSBarry Smith sum7 = 0.0; 2144ed1c418cSBarry Smith sum8 = 0.0; 2145ed1c418cSBarry Smith sum9 = 0.0; 2146ed1c418cSBarry Smith sum10 = 0.0; 2147ed1c418cSBarry Smith sum11 = 0.0; 2148ed1c418cSBarry Smith sum12 = 0.0; 2149ed1c418cSBarry Smith sum13 = 0.0; 2150ed1c418cSBarry Smith sum14 = 0.0; 2151ed1c418cSBarry Smith sum15 = 0.0; 2152ed1c418cSBarry Smith sum16 = 0.0; 2153ed1c418cSBarry Smith sum17 = 0.0; 2154ed1c418cSBarry Smith sum18 = 0.0; 2155ed1c418cSBarry Smith nonzerorow += (n>0); 2156ed1c418cSBarry Smith for (j=0; j<n; j++) { 2157ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2158ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2159ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2160ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2161ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2162ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2163ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2164ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2165ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2166ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2167ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2168ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2169ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2170ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2171ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2172ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2173ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2174ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2175ed1c418cSBarry Smith jrow++; 2176ed1c418cSBarry Smith } 2177ed1c418cSBarry Smith y[18*i] = sum1; 2178ed1c418cSBarry Smith y[18*i+1] = sum2; 2179ed1c418cSBarry Smith y[18*i+2] = sum3; 2180ed1c418cSBarry Smith y[18*i+3] = sum4; 2181ed1c418cSBarry Smith y[18*i+4] = sum5; 2182ed1c418cSBarry Smith y[18*i+5] = sum6; 2183ed1c418cSBarry Smith y[18*i+6] = sum7; 2184ed1c418cSBarry Smith y[18*i+7] = sum8; 2185ed1c418cSBarry Smith y[18*i+8] = sum9; 2186ed1c418cSBarry Smith y[18*i+9] = sum10; 2187ed1c418cSBarry Smith y[18*i+10] = sum11; 2188ed1c418cSBarry Smith y[18*i+11] = sum12; 2189ed1c418cSBarry Smith y[18*i+12] = sum13; 2190ed1c418cSBarry Smith y[18*i+13] = sum14; 2191ed1c418cSBarry Smith y[18*i+14] = sum15; 2192ed1c418cSBarry Smith y[18*i+15] = sum16; 2193ed1c418cSBarry Smith y[18*i+16] = sum17; 2194ed1c418cSBarry Smith y[18*i+17] = sum18; 2195ed1c418cSBarry Smith } 2196ed1c418cSBarry Smith 2197*dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2198ed1c418cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2199ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2200ed1c418cSBarry Smith PetscFunctionReturn(0); 2201ed1c418cSBarry Smith } 2202ed1c418cSBarry Smith 2203ed1c418cSBarry Smith #undef __FUNCT__ 2204ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2205ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2206ed1c418cSBarry Smith { 2207ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2208ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2209ed1c418cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2210ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2211ed1c418cSBarry Smith PetscErrorCode ierr; 2212d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2213ed1c418cSBarry Smith 2214ed1c418cSBarry Smith PetscFunctionBegin; 2215ed1c418cSBarry Smith ierr = VecSet(yy,zero);CHKERRQ(ierr); 2216ed1c418cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2217ed1c418cSBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2218ed1c418cSBarry Smith 2219ed1c418cSBarry Smith for (i=0; i<m; i++) { 2220ed1c418cSBarry Smith idx = a->j + a->i[i] ; 2221ed1c418cSBarry Smith v = a->a + a->i[i] ; 2222ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2223ed1c418cSBarry Smith alpha1 = x[18*i]; 2224ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2225ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2226ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2227ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2228ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2229ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2230ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2231ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2232ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2233ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2234ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2235ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2236ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2237ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2238ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2239ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2240ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2241ed1c418cSBarry Smith while (n-->0) { 2242ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2243ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2244ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2245ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2246ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2247ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2248ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2249ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2250ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2251ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2252ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2253ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2254ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2255ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2256ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2257ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2258ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2259ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2260ed1c418cSBarry Smith idx++; v++; 2261ed1c418cSBarry Smith } 2262ed1c418cSBarry Smith } 2263*dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2264ed1c418cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2265ed1c418cSBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2266ed1c418cSBarry Smith PetscFunctionReturn(0); 2267ed1c418cSBarry Smith } 2268ed1c418cSBarry Smith 2269ed1c418cSBarry Smith #undef __FUNCT__ 2270ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2271ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2272ed1c418cSBarry Smith { 2273ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2274ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2275ed1c418cSBarry Smith PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2276ed1c418cSBarry Smith PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2277ed1c418cSBarry Smith PetscErrorCode ierr; 2278d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2279ed1c418cSBarry Smith PetscInt n,i,jrow,j; 2280ed1c418cSBarry Smith 2281ed1c418cSBarry Smith PetscFunctionBegin; 2282ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2283ed1c418cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2284ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2285ed1c418cSBarry Smith idx = a->j; 2286ed1c418cSBarry Smith v = a->a; 2287ed1c418cSBarry Smith ii = a->i; 2288ed1c418cSBarry Smith 2289ed1c418cSBarry Smith for (i=0; i<m; i++) { 2290ed1c418cSBarry Smith jrow = ii[i]; 2291ed1c418cSBarry Smith n = ii[i+1] - jrow; 2292ed1c418cSBarry Smith sum1 = 0.0; 2293ed1c418cSBarry Smith sum2 = 0.0; 2294ed1c418cSBarry Smith sum3 = 0.0; 2295ed1c418cSBarry Smith sum4 = 0.0; 2296ed1c418cSBarry Smith sum5 = 0.0; 2297ed1c418cSBarry Smith sum6 = 0.0; 2298ed1c418cSBarry Smith sum7 = 0.0; 2299ed1c418cSBarry Smith sum8 = 0.0; 2300ed1c418cSBarry Smith sum9 = 0.0; 2301ed1c418cSBarry Smith sum10 = 0.0; 2302ed1c418cSBarry Smith sum11 = 0.0; 2303ed1c418cSBarry Smith sum12 = 0.0; 2304ed1c418cSBarry Smith sum13 = 0.0; 2305ed1c418cSBarry Smith sum14 = 0.0; 2306ed1c418cSBarry Smith sum15 = 0.0; 2307ed1c418cSBarry Smith sum16 = 0.0; 2308ed1c418cSBarry Smith sum17 = 0.0; 2309ed1c418cSBarry Smith sum18 = 0.0; 2310ed1c418cSBarry Smith for (j=0; j<n; j++) { 2311ed1c418cSBarry Smith sum1 += v[jrow]*x[18*idx[jrow]]; 2312ed1c418cSBarry Smith sum2 += v[jrow]*x[18*idx[jrow]+1]; 2313ed1c418cSBarry Smith sum3 += v[jrow]*x[18*idx[jrow]+2]; 2314ed1c418cSBarry Smith sum4 += v[jrow]*x[18*idx[jrow]+3]; 2315ed1c418cSBarry Smith sum5 += v[jrow]*x[18*idx[jrow]+4]; 2316ed1c418cSBarry Smith sum6 += v[jrow]*x[18*idx[jrow]+5]; 2317ed1c418cSBarry Smith sum7 += v[jrow]*x[18*idx[jrow]+6]; 2318ed1c418cSBarry Smith sum8 += v[jrow]*x[18*idx[jrow]+7]; 2319ed1c418cSBarry Smith sum9 += v[jrow]*x[18*idx[jrow]+8]; 2320ed1c418cSBarry Smith sum10 += v[jrow]*x[18*idx[jrow]+9]; 2321ed1c418cSBarry Smith sum11 += v[jrow]*x[18*idx[jrow]+10]; 2322ed1c418cSBarry Smith sum12 += v[jrow]*x[18*idx[jrow]+11]; 2323ed1c418cSBarry Smith sum13 += v[jrow]*x[18*idx[jrow]+12]; 2324ed1c418cSBarry Smith sum14 += v[jrow]*x[18*idx[jrow]+13]; 2325ed1c418cSBarry Smith sum15 += v[jrow]*x[18*idx[jrow]+14]; 2326ed1c418cSBarry Smith sum16 += v[jrow]*x[18*idx[jrow]+15]; 2327ed1c418cSBarry Smith sum17 += v[jrow]*x[18*idx[jrow]+16]; 2328ed1c418cSBarry Smith sum18 += v[jrow]*x[18*idx[jrow]+17]; 2329ed1c418cSBarry Smith jrow++; 2330ed1c418cSBarry Smith } 2331ed1c418cSBarry Smith y[18*i] += sum1; 2332ed1c418cSBarry Smith y[18*i+1] += sum2; 2333ed1c418cSBarry Smith y[18*i+2] += sum3; 2334ed1c418cSBarry Smith y[18*i+3] += sum4; 2335ed1c418cSBarry Smith y[18*i+4] += sum5; 2336ed1c418cSBarry Smith y[18*i+5] += sum6; 2337ed1c418cSBarry Smith y[18*i+6] += sum7; 2338ed1c418cSBarry Smith y[18*i+7] += sum8; 2339ed1c418cSBarry Smith y[18*i+8] += sum9; 2340ed1c418cSBarry Smith y[18*i+9] += sum10; 2341ed1c418cSBarry Smith y[18*i+10] += sum11; 2342ed1c418cSBarry Smith y[18*i+11] += sum12; 2343ed1c418cSBarry Smith y[18*i+12] += sum13; 2344ed1c418cSBarry Smith y[18*i+13] += sum14; 2345ed1c418cSBarry Smith y[18*i+14] += sum15; 2346ed1c418cSBarry Smith y[18*i+15] += sum16; 2347ed1c418cSBarry Smith y[18*i+16] += sum17; 2348ed1c418cSBarry Smith y[18*i+17] += sum18; 2349ed1c418cSBarry Smith } 2350ed1c418cSBarry Smith 2351*dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2352ed1c418cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2353ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2354ed1c418cSBarry Smith PetscFunctionReturn(0); 2355ed1c418cSBarry Smith } 2356ed1c418cSBarry Smith 2357ed1c418cSBarry Smith #undef __FUNCT__ 2358ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2359ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2360ed1c418cSBarry Smith { 2361ed1c418cSBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2362ed1c418cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2363ed1c418cSBarry Smith PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2364ed1c418cSBarry Smith PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2365ed1c418cSBarry Smith PetscErrorCode ierr; 2366d0f46423SBarry Smith PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2367ed1c418cSBarry Smith 2368ed1c418cSBarry Smith PetscFunctionBegin; 2369ed1c418cSBarry Smith if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2370ed1c418cSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2371ed1c418cSBarry Smith ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2372ed1c418cSBarry Smith for (i=0; i<m; i++) { 2373ed1c418cSBarry Smith idx = a->j + a->i[i] ; 2374ed1c418cSBarry Smith v = a->a + a->i[i] ; 2375ed1c418cSBarry Smith n = a->i[i+1] - a->i[i]; 2376ed1c418cSBarry Smith alpha1 = x[18*i]; 2377ed1c418cSBarry Smith alpha2 = x[18*i+1]; 2378ed1c418cSBarry Smith alpha3 = x[18*i+2]; 2379ed1c418cSBarry Smith alpha4 = x[18*i+3]; 2380ed1c418cSBarry Smith alpha5 = x[18*i+4]; 2381ed1c418cSBarry Smith alpha6 = x[18*i+5]; 2382ed1c418cSBarry Smith alpha7 = x[18*i+6]; 2383ed1c418cSBarry Smith alpha8 = x[18*i+7]; 2384ed1c418cSBarry Smith alpha9 = x[18*i+8]; 2385ed1c418cSBarry Smith alpha10 = x[18*i+9]; 2386ed1c418cSBarry Smith alpha11 = x[18*i+10]; 2387ed1c418cSBarry Smith alpha12 = x[18*i+11]; 2388ed1c418cSBarry Smith alpha13 = x[18*i+12]; 2389ed1c418cSBarry Smith alpha14 = x[18*i+13]; 2390ed1c418cSBarry Smith alpha15 = x[18*i+14]; 2391ed1c418cSBarry Smith alpha16 = x[18*i+15]; 2392ed1c418cSBarry Smith alpha17 = x[18*i+16]; 2393ed1c418cSBarry Smith alpha18 = x[18*i+17]; 2394ed1c418cSBarry Smith while (n-->0) { 2395ed1c418cSBarry Smith y[18*(*idx)] += alpha1*(*v); 2396ed1c418cSBarry Smith y[18*(*idx)+1] += alpha2*(*v); 2397ed1c418cSBarry Smith y[18*(*idx)+2] += alpha3*(*v); 2398ed1c418cSBarry Smith y[18*(*idx)+3] += alpha4*(*v); 2399ed1c418cSBarry Smith y[18*(*idx)+4] += alpha5*(*v); 2400ed1c418cSBarry Smith y[18*(*idx)+5] += alpha6*(*v); 2401ed1c418cSBarry Smith y[18*(*idx)+6] += alpha7*(*v); 2402ed1c418cSBarry Smith y[18*(*idx)+7] += alpha8*(*v); 2403ed1c418cSBarry Smith y[18*(*idx)+8] += alpha9*(*v); 2404ed1c418cSBarry Smith y[18*(*idx)+9] += alpha10*(*v); 2405ed1c418cSBarry Smith y[18*(*idx)+10] += alpha11*(*v); 2406ed1c418cSBarry Smith y[18*(*idx)+11] += alpha12*(*v); 2407ed1c418cSBarry Smith y[18*(*idx)+12] += alpha13*(*v); 2408ed1c418cSBarry Smith y[18*(*idx)+13] += alpha14*(*v); 2409ed1c418cSBarry Smith y[18*(*idx)+14] += alpha15*(*v); 2410ed1c418cSBarry Smith y[18*(*idx)+15] += alpha16*(*v); 2411ed1c418cSBarry Smith y[18*(*idx)+16] += alpha17*(*v); 2412ed1c418cSBarry Smith y[18*(*idx)+17] += alpha18*(*v); 2413ed1c418cSBarry Smith idx++; v++; 2414ed1c418cSBarry Smith } 2415ed1c418cSBarry Smith } 2416*dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2417ed1c418cSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2418ed1c418cSBarry Smith ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2419ed1c418cSBarry Smith PetscFunctionReturn(0); 2420ed1c418cSBarry Smith } 2421ed1c418cSBarry Smith 2422f4a53059SBarry Smith /*===================================================================================*/ 24234a2ae208SSatish Balay #undef __FUNCT__ 24244a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2425dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2426f4a53059SBarry Smith { 24274cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2428dfbe8321SBarry Smith PetscErrorCode ierr; 2429f4a53059SBarry Smith 2430b24ad042SBarry Smith PetscFunctionBegin; 2431f4a53059SBarry Smith /* start the scatter */ 2432ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 24334cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2434ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 24354cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2436f4a53059SBarry Smith PetscFunctionReturn(0); 2437f4a53059SBarry Smith } 2438f4a53059SBarry Smith 24394a2ae208SSatish Balay #undef __FUNCT__ 24404a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2441dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 24424cb9d9b8SBarry Smith { 24434cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2444dfbe8321SBarry Smith PetscErrorCode ierr; 2445b24ad042SBarry Smith 24464cb9d9b8SBarry Smith PetscFunctionBegin; 24474cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 24484cb9d9b8SBarry Smith ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2449ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2450ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 24514cb9d9b8SBarry Smith PetscFunctionReturn(0); 24524cb9d9b8SBarry Smith } 24534cb9d9b8SBarry Smith 24544a2ae208SSatish Balay #undef __FUNCT__ 24554a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2456dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 24574cb9d9b8SBarry Smith { 24584cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2459dfbe8321SBarry Smith PetscErrorCode ierr; 24604cb9d9b8SBarry Smith 2461b24ad042SBarry Smith PetscFunctionBegin; 24624cb9d9b8SBarry Smith /* start the scatter */ 2463ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2464d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2465ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2466717f2ec8SHong Zhang ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 24674cb9d9b8SBarry Smith PetscFunctionReturn(0); 24684cb9d9b8SBarry Smith } 24694cb9d9b8SBarry Smith 24704a2ae208SSatish Balay #undef __FUNCT__ 24714a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2472dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 24734cb9d9b8SBarry Smith { 24744cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2475dfbe8321SBarry Smith PetscErrorCode ierr; 2476b24ad042SBarry Smith 24774cb9d9b8SBarry Smith PetscFunctionBegin; 24784cb9d9b8SBarry Smith ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2479ca9f406cSSatish Balay ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2480d72c5c08SBarry Smith ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2481ca9f406cSSatish Balay ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 24824cb9d9b8SBarry Smith PetscFunctionReturn(0); 24834cb9d9b8SBarry Smith } 24844cb9d9b8SBarry Smith 248595ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 24869c4fc4c7SBarry Smith #undef __FUNCT__ 24877ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 24887ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 24897ba1a0bfSKris Buschelman { 24907ba1a0bfSKris Buschelman /* This routine requires testing -- but it's getting better. */ 24917ba1a0bfSKris Buschelman PetscErrorCode ierr; 2492a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 24937ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 24947ba1a0bfSKris Buschelman Mat P=pp->AIJ; 24957ba1a0bfSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 24967ba1a0bfSKris Buschelman PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 24977ba1a0bfSKris Buschelman PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2498d0f46423SBarry Smith PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn; 24997ba1a0bfSKris Buschelman PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 25007ba1a0bfSKris Buschelman MatScalar *ca; 25017ba1a0bfSKris Buschelman 25027ba1a0bfSKris Buschelman PetscFunctionBegin; 25037ba1a0bfSKris Buschelman /* Start timer */ 25047ba1a0bfSKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 25057ba1a0bfSKris Buschelman 25067ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 25077ba1a0bfSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 25087ba1a0bfSKris Buschelman 25097ba1a0bfSKris Buschelman cn = pn*ppdof; 25107ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 25117ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 25127ba1a0bfSKris Buschelman ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 25137ba1a0bfSKris Buschelman ci[0] = 0; 25147ba1a0bfSKris Buschelman 25157ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 25167ba1a0bfSKris Buschelman ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 25177ba1a0bfSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 25187ba1a0bfSKris Buschelman ptasparserow = ptadenserow + an; 25197ba1a0bfSKris Buschelman denserow = ptasparserow + an; 25207ba1a0bfSKris Buschelman sparserow = denserow + cn; 25217ba1a0bfSKris Buschelman 25227ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 25237ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 25247ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2525a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 25267ba1a0bfSKris Buschelman current_space = free_space; 25277ba1a0bfSKris Buschelman 25287ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 25297ba1a0bfSKris Buschelman for (i=0;i<pn;i++) { 25307ba1a0bfSKris Buschelman ptnzi = pti[i+1] - pti[i]; 25317ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 25327ba1a0bfSKris Buschelman for (dof=0;dof<ppdof;dof++) { 25337ba1a0bfSKris Buschelman ptanzi = 0; 25347ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 25357ba1a0bfSKris Buschelman for (j=0;j<ptnzi;j++) { 25367ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 25377ba1a0bfSKris Buschelman arow = ptJ[j]*ppdof + dof; 25387ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 25397ba1a0bfSKris Buschelman anzj = ai[arow+1] - ai[arow]; 25407ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 25417ba1a0bfSKris Buschelman for (k=0;k<anzj;k++) { 25427ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 25437ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 25447ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 25457ba1a0bfSKris Buschelman } 25467ba1a0bfSKris Buschelman } 25477ba1a0bfSKris Buschelman } 25487ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 25497ba1a0bfSKris Buschelman ptaj = ptasparserow; 25507ba1a0bfSKris Buschelman cnzi = 0; 25517ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 25527ba1a0bfSKris Buschelman /* Get offset within block of P */ 25537ba1a0bfSKris Buschelman pshift = *ptaj%ppdof; 25547ba1a0bfSKris Buschelman /* Get block row of P */ 25557ba1a0bfSKris Buschelman prow = (*ptaj++)/ppdof; /* integer division */ 25567ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 25577ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 25587ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 25597ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 25607ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 25617ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 25627ba1a0bfSKris Buschelman if (!denserow[pjj[k]*ppdof+pshift]) { 25637ba1a0bfSKris Buschelman denserow[pjj[k]*ppdof+pshift] = -1; 25647ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k]*ppdof+pshift; 25657ba1a0bfSKris Buschelman } 25667ba1a0bfSKris Buschelman } 25677ba1a0bfSKris Buschelman } 25687ba1a0bfSKris Buschelman 25697ba1a0bfSKris Buschelman /* sort sparserow */ 25707ba1a0bfSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 25717ba1a0bfSKris Buschelman 25727ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 25737ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 25747ba1a0bfSKris Buschelman if (current_space->local_remaining<cnzi) { 25754238b7adSHong Zhang ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 25767ba1a0bfSKris Buschelman } 25777ba1a0bfSKris Buschelman 25787ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 25797ba1a0bfSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 25807ba1a0bfSKris Buschelman current_space->array += cnzi; 25817ba1a0bfSKris Buschelman current_space->local_used += cnzi; 25827ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 25837ba1a0bfSKris Buschelman 25847ba1a0bfSKris Buschelman for (j=0;j<ptanzi;j++) { 25857ba1a0bfSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 25867ba1a0bfSKris Buschelman } 25877ba1a0bfSKris Buschelman for (j=0;j<cnzi;j++) { 25887ba1a0bfSKris Buschelman denserow[sparserow[j]] = 0; 25897ba1a0bfSKris Buschelman } 25907ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 25917ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 25927ba1a0bfSKris Buschelman ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 25937ba1a0bfSKris Buschelman } 25947ba1a0bfSKris Buschelman } 25957ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 25967ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 25977ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 25987ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2599a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 26007ba1a0bfSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 26017ba1a0bfSKris Buschelman 26027ba1a0bfSKris Buschelman /* Allocate space for ca */ 26037ba1a0bfSKris Buschelman ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 26047ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 26057ba1a0bfSKris Buschelman 26067ba1a0bfSKris Buschelman /* put together the new matrix */ 26077adad957SLisandro Dalcin ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 26087ba1a0bfSKris Buschelman 26097ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 26107ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 26117ba1a0bfSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 2612e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2613e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 26147ba1a0bfSKris Buschelman c->nonew = 0; 26157ba1a0bfSKris Buschelman 26167ba1a0bfSKris Buschelman /* Clean up. */ 26177ba1a0bfSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 26187ba1a0bfSKris Buschelman 26197ba1a0bfSKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 26207ba1a0bfSKris Buschelman PetscFunctionReturn(0); 26217ba1a0bfSKris Buschelman } 26227ba1a0bfSKris Buschelman 26237ba1a0bfSKris Buschelman #undef __FUNCT__ 26247ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 26257ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 26267ba1a0bfSKris Buschelman { 26277ba1a0bfSKris Buschelman /* This routine requires testing -- first draft only */ 26287ba1a0bfSKris Buschelman PetscErrorCode ierr; 26297ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 26307ba1a0bfSKris Buschelman Mat P=pp->AIJ; 26317ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 26327ba1a0bfSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 26337ba1a0bfSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 26347ba1a0bfSKris Buschelman PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 26357ba1a0bfSKris Buschelman PetscInt *ci=c->i,*cj=c->j,*cjj; 2636d0f46423SBarry Smith PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 26377ba1a0bfSKris Buschelman PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 26387ba1a0bfSKris Buschelman MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 26397ba1a0bfSKris Buschelman 26407ba1a0bfSKris Buschelman PetscFunctionBegin; 26417ba1a0bfSKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 26427ba1a0bfSKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 26437ba1a0bfSKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 26447ba1a0bfSKris Buschelman 26457ba1a0bfSKris Buschelman apj = (PetscInt *)(apa + cn); 26467ba1a0bfSKris Buschelman apjdense = apj + cn; 26477ba1a0bfSKris Buschelman 26487ba1a0bfSKris Buschelman /* Clear old values in C */ 26497ba1a0bfSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 26507ba1a0bfSKris Buschelman 26517ba1a0bfSKris Buschelman for (i=0;i<am;i++) { 26527ba1a0bfSKris Buschelman /* Form sparse row of A*P */ 26537ba1a0bfSKris Buschelman anzi = ai[i+1] - ai[i]; 26547ba1a0bfSKris Buschelman apnzj = 0; 26557ba1a0bfSKris Buschelman for (j=0;j<anzi;j++) { 26567ba1a0bfSKris Buschelman /* Get offset within block of P */ 26577ba1a0bfSKris Buschelman pshift = *aj%ppdof; 26587ba1a0bfSKris Buschelman /* Get block row of P */ 26597ba1a0bfSKris Buschelman prow = *aj++/ppdof; /* integer division */ 26607ba1a0bfSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 26617ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 26627ba1a0bfSKris Buschelman paj = pa + pi[prow]; 26637ba1a0bfSKris Buschelman for (k=0;k<pnzj;k++) { 26647ba1a0bfSKris Buschelman poffset = pjj[k]*ppdof+pshift; 26657ba1a0bfSKris Buschelman if (!apjdense[poffset]) { 26667ba1a0bfSKris Buschelman apjdense[poffset] = -1; 26677ba1a0bfSKris Buschelman apj[apnzj++] = poffset; 26687ba1a0bfSKris Buschelman } 26697ba1a0bfSKris Buschelman apa[poffset] += (*aa)*paj[k]; 26707ba1a0bfSKris Buschelman } 2671*dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 26727ba1a0bfSKris Buschelman aa++; 26737ba1a0bfSKris Buschelman } 26747ba1a0bfSKris Buschelman 26757ba1a0bfSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 26767ba1a0bfSKris Buschelman /* Note: a array does not need sorting as it is in dense storage locations. */ 26777ba1a0bfSKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 26787ba1a0bfSKris Buschelman 26797ba1a0bfSKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 26807ba1a0bfSKris Buschelman prow = i/ppdof; /* integer division */ 26817ba1a0bfSKris Buschelman pshift = i%ppdof; 26827ba1a0bfSKris Buschelman poffset = pi[prow]; 26837ba1a0bfSKris Buschelman pnzi = pi[prow+1] - poffset; 26847ba1a0bfSKris Buschelman /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 26857ba1a0bfSKris Buschelman pJ = pj+poffset; 26867ba1a0bfSKris Buschelman pA = pa+poffset; 26877ba1a0bfSKris Buschelman for (j=0;j<pnzi;j++) { 26887ba1a0bfSKris Buschelman crow = (*pJ)*ppdof+pshift; 26897ba1a0bfSKris Buschelman cjj = cj + ci[crow]; 26907ba1a0bfSKris Buschelman caj = ca + ci[crow]; 26917ba1a0bfSKris Buschelman pJ++; 26927ba1a0bfSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 26937ba1a0bfSKris Buschelman for (k=0,nextap=0;nextap<apnzj;k++) { 26947ba1a0bfSKris Buschelman if (cjj[k]==apj[nextap]) { 26957ba1a0bfSKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 26967ba1a0bfSKris Buschelman } 26977ba1a0bfSKris Buschelman } 2698*dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 26997ba1a0bfSKris Buschelman pA++; 27007ba1a0bfSKris Buschelman } 27017ba1a0bfSKris Buschelman 27027ba1a0bfSKris Buschelman /* Zero the current row info for A*P */ 27037ba1a0bfSKris Buschelman for (j=0;j<apnzj;j++) { 27047ba1a0bfSKris Buschelman apa[apj[j]] = 0.; 27057ba1a0bfSKris Buschelman apjdense[apj[j]] = 0; 27067ba1a0bfSKris Buschelman } 27077ba1a0bfSKris Buschelman } 27087ba1a0bfSKris Buschelman 27097ba1a0bfSKris Buschelman /* Assemble the final matrix and clean up */ 27107ba1a0bfSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27117ba1a0bfSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27127ba1a0bfSKris Buschelman ierr = PetscFree(apa);CHKERRQ(ierr); 271395ddefa2SHong Zhang PetscFunctionReturn(0); 271495ddefa2SHong Zhang } 27157ba1a0bfSKris Buschelman 271695ddefa2SHong Zhang #undef __FUNCT__ 271795ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 271895ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 271995ddefa2SHong Zhang { 272095ddefa2SHong Zhang PetscErrorCode ierr; 272195ddefa2SHong Zhang 272295ddefa2SHong Zhang PetscFunctionBegin; 272395ddefa2SHong Zhang /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 272495ddefa2SHong Zhang ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 272595ddefa2SHong Zhang ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 272695ddefa2SHong Zhang PetscFunctionReturn(0); 272795ddefa2SHong Zhang } 272895ddefa2SHong Zhang 272995ddefa2SHong Zhang #undef __FUNCT__ 273095ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 273195ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 273295ddefa2SHong Zhang { 273395ddefa2SHong Zhang PetscFunctionBegin; 273495ddefa2SHong Zhang SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 27357ba1a0bfSKris Buschelman PetscFunctionReturn(0); 27367ba1a0bfSKris Buschelman } 27377ba1a0bfSKris Buschelman 2738be1d678aSKris Buschelman EXTERN_C_BEGIN 27397ba1a0bfSKris Buschelman #undef __FUNCT__ 27400fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2741f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 27429c4fc4c7SBarry Smith { 27439c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2744ceb03754SKris Buschelman Mat a = b->AIJ,B; 27459c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 27469c4fc4c7SBarry Smith PetscErrorCode ierr; 27470fd73130SBarry Smith PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2748ba8c8a56SBarry Smith PetscInt *cols; 2749ba8c8a56SBarry Smith PetscScalar *vals; 27509c4fc4c7SBarry Smith 27519c4fc4c7SBarry Smith PetscFunctionBegin; 27529c4fc4c7SBarry Smith ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 27537c307921SBarry Smith ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 27549c4fc4c7SBarry Smith for (i=0; i<m; i++) { 27559c4fc4c7SBarry Smith nmax = PetscMax(nmax,aij->ilen[i]); 27560fd73130SBarry Smith for (j=0; j<dof; j++) { 27570fd73130SBarry Smith ilen[dof*i+j] = aij->ilen[i]; 27589c4fc4c7SBarry Smith } 27599c4fc4c7SBarry Smith } 2760ceb03754SKris Buschelman ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 27619c4fc4c7SBarry Smith ierr = PetscFree(ilen);CHKERRQ(ierr); 27629c4fc4c7SBarry Smith ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 27639c4fc4c7SBarry Smith ii = 0; 27649c4fc4c7SBarry Smith for (i=0; i<m; i++) { 2765ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 27660fd73130SBarry Smith for (j=0; j<dof; j++) { 27679c4fc4c7SBarry Smith for (k=0; k<ncols; k++) { 27680fd73130SBarry Smith icols[k] = dof*cols[k]+j; 27699c4fc4c7SBarry Smith } 2770ceb03754SKris Buschelman ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 27719c4fc4c7SBarry Smith ii++; 27729c4fc4c7SBarry Smith } 2773ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 27749c4fc4c7SBarry Smith } 27759c4fc4c7SBarry Smith ierr = PetscFree(icols);CHKERRQ(ierr); 2776ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2778ceb03754SKris Buschelman 2779ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 27808ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2781c3d102feSKris Buschelman } else { 2782ceb03754SKris Buschelman *newmat = B; 2783c3d102feSKris Buschelman } 27849c4fc4c7SBarry Smith PetscFunctionReturn(0); 27859c4fc4c7SBarry Smith } 2786be1d678aSKris Buschelman EXTERN_C_END 27879c4fc4c7SBarry Smith 27887c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h" 2789be1d678aSKris Buschelman 2790be1d678aSKris Buschelman EXTERN_C_BEGIN 27910fd73130SBarry Smith #undef __FUNCT__ 27920fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2793f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 27940fd73130SBarry Smith { 27950fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2796ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 27970fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 27980fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 27990fd73130SBarry Smith Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 28000fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2801910ba992SMatthew Knepley PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2802910ba992SMatthew Knepley PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 28030fd73130SBarry Smith PetscInt rstart,cstart,*garray,ii,k; 28040fd73130SBarry Smith PetscErrorCode ierr; 28050fd73130SBarry Smith PetscScalar *vals,*ovals; 28060fd73130SBarry Smith 28070fd73130SBarry Smith PetscFunctionBegin; 2808d0f46423SBarry Smith ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 2809d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 28100fd73130SBarry Smith nmax = PetscMax(nmax,AIJ->ilen[i]); 28110fd73130SBarry Smith onmax = PetscMax(onmax,OAIJ->ilen[i]); 28120fd73130SBarry Smith for (j=0; j<dof; j++) { 28130fd73130SBarry Smith dnz[dof*i+j] = AIJ->ilen[i]; 28140fd73130SBarry Smith onz[dof*i+j] = OAIJ->ilen[i]; 28150fd73130SBarry Smith } 28160fd73130SBarry Smith } 2817d0f46423SBarry Smith ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 28180fd73130SBarry Smith ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 28190fd73130SBarry Smith 28207a1a7badSBarry Smith ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2821d0f46423SBarry Smith rstart = dof*maij->A->rmap->rstart; 2822d0f46423SBarry Smith cstart = dof*maij->A->cmap->rstart; 28230fd73130SBarry Smith garray = mpiaij->garray; 28240fd73130SBarry Smith 28250fd73130SBarry Smith ii = rstart; 2826d0f46423SBarry Smith for (i=0; i<A->rmap->n/dof; i++) { 28270fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 28280fd73130SBarry Smith ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 28290fd73130SBarry Smith for (j=0; j<dof; j++) { 28300fd73130SBarry Smith for (k=0; k<ncols; k++) { 28310fd73130SBarry Smith icols[k] = cstart + dof*cols[k]+j; 28320fd73130SBarry Smith } 28330fd73130SBarry Smith for (k=0; k<oncols; k++) { 28340fd73130SBarry Smith oicols[k] = dof*garray[ocols[k]]+j; 28350fd73130SBarry Smith } 2836ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2837ceb03754SKris Buschelman ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 28380fd73130SBarry Smith ii++; 28390fd73130SBarry Smith } 28400fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 28410fd73130SBarry Smith ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 28420fd73130SBarry Smith } 28430fd73130SBarry Smith ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 28440fd73130SBarry Smith 2845ceb03754SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2846ceb03754SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2847ceb03754SKris Buschelman 2848ceb03754SKris Buschelman if (reuse == MAT_REUSE_MATRIX) { 28497adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 28507adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 28518ab5b326SKris Buschelman ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 28527adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 2853c3d102feSKris Buschelman } else { 2854ceb03754SKris Buschelman *newmat = B; 2855c3d102feSKris Buschelman } 28560fd73130SBarry Smith PetscFunctionReturn(0); 28570fd73130SBarry Smith } 2858be1d678aSKris Buschelman EXTERN_C_END 28590fd73130SBarry Smith 28600fd73130SBarry Smith 2861bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 28625983afb6SSatish Balay /*MC 28630bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 28640bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 28650bad9183SKris Buschelman way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 28660bad9183SKris Buschelman and MATMPIAIJ for distributed matrices. 28670bad9183SKris Buschelman 28680bad9183SKris Buschelman Operations provided: 28690fd73130SBarry Smith + MatMult 28700bad9183SKris Buschelman . MatMultTranspose 28710bad9183SKris Buschelman . MatMultAdd 28720bad9183SKris Buschelman . MatMultTransposeAdd 28730fd73130SBarry Smith - MatView 28740bad9183SKris Buschelman 28750bad9183SKris Buschelman Level: advanced 28760bad9183SKris Buschelman 28770bad9183SKris Buschelman M*/ 28784a2ae208SSatish Balay #undef __FUNCT__ 28794a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ" 2880be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 288182b1193eSBarry Smith { 2882dfbe8321SBarry Smith PetscErrorCode ierr; 2883b24ad042SBarry Smith PetscMPIInt size; 2884b24ad042SBarry Smith PetscInt n; 28854cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 288682b1193eSBarry Smith Mat B; 288782b1193eSBarry Smith 288882b1193eSBarry Smith PetscFunctionBegin; 2889d72c5c08SBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2890d72c5c08SBarry Smith 2891d72c5c08SBarry Smith if (dof == 1) { 2892d72c5c08SBarry Smith *maij = A; 2893d72c5c08SBarry Smith } else { 28947adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2895d0f46423SBarry Smith ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 2896cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 2897d72c5c08SBarry Smith 28987adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2899f4a53059SBarry Smith if (size == 1) { 2900b9b97703SBarry Smith ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 29014cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 29020fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 2903b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2904b9b97703SBarry Smith b->dof = dof; 29054cb9d9b8SBarry Smith b->AIJ = A; 2906d72c5c08SBarry Smith if (dof == 2) { 2907cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 2908cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2909cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2910cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2911bcc973b7SBarry Smith } else if (dof == 3) { 2912bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 2913bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2914bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2915bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2916bcc973b7SBarry Smith } else if (dof == 4) { 2917bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 2918bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2919bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2920bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2921f9fae5adSBarry Smith } else if (dof == 5) { 2922f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 2923f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2924f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2925f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 29266cd98798SBarry Smith } else if (dof == 6) { 29276cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 29286cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 29296cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 29306cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2931ed8eea03SBarry Smith } else if (dof == 7) { 2932ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 2933ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2934ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2935ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 293666ed3db0SBarry Smith } else if (dof == 8) { 293766ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 293866ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 293966ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 294066ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 29412b692628SMatthew Knepley } else if (dof == 9) { 29422b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 29432b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 29442b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 29452b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2946b9cda22cSBarry Smith } else if (dof == 10) { 2947b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 2948b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2949b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2950b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 29512f7816d4SBarry Smith } else if (dof == 16) { 29522f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 29532f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 29542f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 29552f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2956ed1c418cSBarry Smith } else if (dof == 18) { 2957ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 2958ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 2959ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 2960ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 296182b1193eSBarry Smith } else { 296277431f27SBarry Smith SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 296382b1193eSBarry Smith } 29647ba1a0bfSKris Buschelman B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 29657ba1a0bfSKris Buschelman B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 29669c4fc4c7SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2967f4a53059SBarry Smith } else { 2968f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2969f4a53059SBarry Smith IS from,to; 2970f4a53059SBarry Smith Vec gvec; 2971b24ad042SBarry Smith PetscInt *garray,i; 2972f4a53059SBarry Smith 2973b9b97703SBarry Smith ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2974d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 29750fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 2976b9b97703SBarry Smith b = (Mat_MPIMAIJ*)B->data; 2977b9b97703SBarry Smith b->dof = dof; 2978b9b97703SBarry Smith b->A = A; 29794cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 29804cb9d9b8SBarry Smith ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 29814cb9d9b8SBarry Smith 2982f4a53059SBarry Smith ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2983f4a53059SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 298413288a74SBarry Smith ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2985f4a53059SBarry Smith 2986f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 2987b24ad042SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2988f4a53059SBarry Smith for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 29897adad957SLisandro Dalcin ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 2990f4a53059SBarry Smith ierr = PetscFree(garray);CHKERRQ(ierr); 2991f4a53059SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2992f4a53059SBarry Smith 2993f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 2994d0f46423SBarry Smith ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 299513288a74SBarry Smith ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2996f4a53059SBarry Smith 2997f4a53059SBarry Smith /* generate the scatter context */ 2998f4a53059SBarry Smith ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2999f4a53059SBarry Smith 3000f4a53059SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 3001f4a53059SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 3002f4a53059SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 3003f4a53059SBarry Smith 3004f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 30054cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 30064cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 30074cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 300895ddefa2SHong Zhang B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 300995ddefa2SHong Zhang B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 30100fd73130SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3011f4a53059SBarry Smith } 3012cd3bbe55SBarry Smith *maij = B; 30130fd73130SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 3014d72c5c08SBarry Smith } 301582b1193eSBarry Smith PetscFunctionReturn(0); 301682b1193eSBarry Smith } 301782b1193eSBarry Smith 301882b1193eSBarry Smith 301982b1193eSBarry Smith 302082b1193eSBarry Smith 302182b1193eSBarry Smith 302282b1193eSBarry Smith 302382b1193eSBarry Smith 302482b1193eSBarry Smith 302582b1193eSBarry Smith 302682b1193eSBarry Smith 302782b1193eSBarry Smith 302882b1193eSBarry Smith 3029