1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 32593348eSBarry Smith /* 4b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 52593348eSBarry Smith matrix storage format. 62593348eSBarry Smith */ 770f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 81a6a6d98SBarry Smith #include "src/inline/spops.h" 9325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 103b547af2SSatish Balay 11b01c7715SBarry Smith #include "src/inline/ilu.h" 12b01c7715SBarry Smith 13b01c7715SBarry Smith #undef __FUNCT__ 1443516a2dSKris Buschelman #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15bc08b0f1SBarry Smith /*@ 1643516a2dSKris Buschelman MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 1743516a2dSKris Buschelman 1843516a2dSKris Buschelman Collective on Mat 1943516a2dSKris Buschelman 2043516a2dSKris Buschelman Input Parameters: 2143516a2dSKris Buschelman . mat - the matrix 2243516a2dSKris Buschelman 2343516a2dSKris Buschelman Level: advanced 2443516a2dSKris Buschelman @*/ 2546129b97SKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 2643516a2dSKris Buschelman { 2743516a2dSKris Buschelman PetscErrorCode ierr,(*f)(Mat); 2843516a2dSKris Buschelman 2943516a2dSKris Buschelman PetscFunctionBegin; 3043516a2dSKris Buschelman PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 3143516a2dSKris Buschelman if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3243516a2dSKris Buschelman if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3343516a2dSKris Buschelman 3443516a2dSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 3543516a2dSKris Buschelman if (f) { 3643516a2dSKris Buschelman ierr = (*f)(mat);CHKERRQ(ierr); 3743516a2dSKris Buschelman } else { 3843516a2dSKris Buschelman SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 3943516a2dSKris Buschelman } 4043516a2dSKris Buschelman PetscFunctionReturn(0); 4143516a2dSKris Buschelman } 4243516a2dSKris Buschelman 4343516a2dSKris Buschelman EXTERN_C_BEGIN 4443516a2dSKris Buschelman #undef __FUNCT__ 45b01c7715SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 4646129b97SKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47b01c7715SBarry Smith { 48b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 496849ba73SBarry Smith PetscErrorCode ierr; 50899cda47SBarry Smith PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51b01c7715SBarry Smith PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52b01c7715SBarry Smith 53b01c7715SBarry Smith PetscFunctionBegin; 54b01c7715SBarry Smith if (a->idiagvalid) PetscFunctionReturn(0); 55b01c7715SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56b01c7715SBarry Smith diag_offset = a->diag; 57b01c7715SBarry Smith if (!a->idiag) { 58b01c7715SBarry Smith ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59b01c7715SBarry Smith } 60b01c7715SBarry Smith diag = a->idiag; 61b01c7715SBarry Smith mdiag = a->idiag+bs*bs*mbs; 62b01c7715SBarry Smith /* factor and invert each block */ 63521d7252SBarry Smith switch (bs){ 64b01c7715SBarry Smith case 2: 65b01c7715SBarry Smith for (i=0; i<mbs; i++) { 66b01c7715SBarry Smith odiag = v + 4*diag_offset[i]; 67b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70b01c7715SBarry Smith diag += 4; 71b01c7715SBarry Smith mdiag += 4; 72b01c7715SBarry Smith } 73b01c7715SBarry Smith break; 74b01c7715SBarry Smith case 3: 75b01c7715SBarry Smith for (i=0; i<mbs; i++) { 76b01c7715SBarry Smith odiag = v + 9*diag_offset[i]; 77b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78b01c7715SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79b01c7715SBarry Smith diag[8] = odiag[8]; 80b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81b01c7715SBarry Smith mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82b01c7715SBarry Smith mdiag[8] = odiag[8]; 83b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84b01c7715SBarry Smith diag += 9; 85b01c7715SBarry Smith mdiag += 9; 86b01c7715SBarry Smith } 87b01c7715SBarry Smith break; 88b01c7715SBarry Smith case 4: 89b01c7715SBarry Smith for (i=0; i<mbs; i++) { 90b01c7715SBarry Smith odiag = v + 16*diag_offset[i]; 91b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94b01c7715SBarry Smith diag += 16; 95b01c7715SBarry Smith mdiag += 16; 96b01c7715SBarry Smith } 97b01c7715SBarry Smith break; 98b01c7715SBarry Smith case 5: 99b01c7715SBarry Smith for (i=0; i<mbs; i++) { 100b01c7715SBarry Smith odiag = v + 25*diag_offset[i]; 101b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104b01c7715SBarry Smith diag += 25; 105b01c7715SBarry Smith mdiag += 25; 106b01c7715SBarry Smith } 107b01c7715SBarry Smith break; 108b01c7715SBarry Smith default: 109521d7252SBarry Smith SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110b01c7715SBarry Smith } 111b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 112b01c7715SBarry Smith PetscFunctionReturn(0); 113b01c7715SBarry Smith } 11443516a2dSKris Buschelman EXTERN_C_END 115b01c7715SBarry Smith 116b01c7715SBarry Smith #undef __FUNCT__ 117b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119b01c7715SBarry Smith { 120b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121b01c7715SBarry Smith PetscScalar *x,x1,x2,s1,s2; 122b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123dfbe8321SBarry Smith PetscErrorCode ierr; 124c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 125c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126b01c7715SBarry Smith 127b01c7715SBarry Smith PetscFunctionBegin; 12851f519a2SBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129b01c7715SBarry Smith its = its*lits; 13077431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 13371f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 134b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135b01c7715SBarry Smith 136b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137b01c7715SBarry Smith 138b01c7715SBarry Smith diag = a->diag; 139b01c7715SBarry Smith idiag = a->idiag; 1401ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1411ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142b01c7715SBarry Smith 143b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 144b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147b01c7715SBarry Smith i2 = 2; 148b01c7715SBarry Smith idiag += 4; 149b01c7715SBarry Smith for (i=1; i<m; i++) { 150b01c7715SBarry Smith v = aa + 4*ai[i]; 151b01c7715SBarry Smith vi = aj + ai[i]; 152b01c7715SBarry Smith nz = diag[i] - ai[i]; 153b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; 154b01c7715SBarry Smith while (nz--) { 155b01c7715SBarry Smith idx = 2*(*vi++); 156b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 157b01c7715SBarry Smith s1 -= v[0]*x1 + v[2]*x2; 158b01c7715SBarry Smith s2 -= v[1]*x1 + v[3]*x2; 159b01c7715SBarry Smith v += 4; 160b01c7715SBarry Smith } 161b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[2]*s2; 162b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163b01c7715SBarry Smith idiag += 4; 164b01c7715SBarry Smith i2 += 2; 165b01c7715SBarry Smith } 166b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167efee365bSSatish Balay ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168b01c7715SBarry Smith } 169b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171b01c7715SBarry Smith i2 = 0; 172b01c7715SBarry Smith mdiag = a->idiag+4*a->mbs; 173b01c7715SBarry Smith for (i=0; i<m; i++) { 174b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; 175b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177b01c7715SBarry Smith mdiag += 4; 178b01c7715SBarry Smith i2 += 2; 179b01c7715SBarry Smith } 180efee365bSSatish Balay ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182899cda47SBarry Smith ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183b01c7715SBarry Smith } 184b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185b01c7715SBarry Smith idiag = a->idiag+4*a->mbs - 4; 186b01c7715SBarry Smith i2 = 2*m - 2; 187b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; 188b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[2]*x2; 189b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190b01c7715SBarry Smith idiag -= 4; 191b01c7715SBarry Smith i2 -= 2; 192b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 193b01c7715SBarry Smith v = aa + 4*(diag[i]+1); 194b01c7715SBarry Smith vi = aj + diag[i] + 1; 195b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 196b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; 197b01c7715SBarry Smith while (nz--) { 198b01c7715SBarry Smith idx = 2*(*vi++); 199b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 200b01c7715SBarry Smith s1 -= v[0]*x1 + v[2]*x2; 201b01c7715SBarry Smith s2 -= v[1]*x1 + v[3]*x2; 202b01c7715SBarry Smith v += 4; 203b01c7715SBarry Smith } 204b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[2]*s2; 205b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206b01c7715SBarry Smith idiag -= 4; 207b01c7715SBarry Smith i2 -= 2; 208b01c7715SBarry Smith } 209efee365bSSatish Balay ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210b01c7715SBarry Smith } 211b01c7715SBarry Smith } else { 212634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213b01c7715SBarry Smith } 2141ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2151ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216b01c7715SBarry Smith PetscFunctionReturn(0); 217b01c7715SBarry Smith } 218b01c7715SBarry Smith 219b01c7715SBarry Smith #undef __FUNCT__ 220b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222b01c7715SBarry Smith { 223b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,s1,s2,s3; 225b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226dfbe8321SBarry Smith PetscErrorCode ierr; 227c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 228c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229b01c7715SBarry Smith 230b01c7715SBarry Smith PetscFunctionBegin; 231b01c7715SBarry Smith its = its*lits; 23271f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 23377431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 23671f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238b01c7715SBarry Smith 239b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 240b01c7715SBarry Smith 241b01c7715SBarry Smith diag = a->diag; 242b01c7715SBarry Smith idiag = a->idiag; 2431ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2441ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 245b01c7715SBarry Smith 246b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 247b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 249b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 250b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 251b01c7715SBarry Smith i2 = 3; 252b01c7715SBarry Smith idiag += 9; 253b01c7715SBarry Smith for (i=1; i<m; i++) { 254b01c7715SBarry Smith v = aa + 9*ai[i]; 255b01c7715SBarry Smith vi = aj + ai[i]; 256b01c7715SBarry Smith nz = diag[i] - ai[i]; 257b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 258b01c7715SBarry Smith while (nz--) { 259b01c7715SBarry Smith idx = 3*(*vi++); 260b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 261b01c7715SBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 262b01c7715SBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 263b01c7715SBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 264b01c7715SBarry Smith v += 9; 265b01c7715SBarry Smith } 266b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 267b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 268b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 269b01c7715SBarry Smith idiag += 9; 270b01c7715SBarry Smith i2 += 3; 271b01c7715SBarry Smith } 272b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 273efee365bSSatish Balay ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 274b01c7715SBarry Smith } 275b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 276b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 277b01c7715SBarry Smith i2 = 0; 278b01c7715SBarry Smith mdiag = a->idiag+9*a->mbs; 279b01c7715SBarry Smith for (i=0; i<m; i++) { 280b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 281b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 282b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 283b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 284b01c7715SBarry Smith mdiag += 9; 285b01c7715SBarry Smith i2 += 3; 286b01c7715SBarry Smith } 287efee365bSSatish Balay ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 288b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 289899cda47SBarry Smith ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 290b01c7715SBarry Smith } 291b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 292b01c7715SBarry Smith idiag = a->idiag+9*a->mbs - 9; 293b01c7715SBarry Smith i2 = 3*m - 3; 294b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 295b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 296b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 297b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 298b01c7715SBarry Smith idiag -= 9; 299b01c7715SBarry Smith i2 -= 3; 300b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 301b01c7715SBarry Smith v = aa + 9*(diag[i]+1); 302b01c7715SBarry Smith vi = aj + diag[i] + 1; 303b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 304b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 305b01c7715SBarry Smith while (nz--) { 306b01c7715SBarry Smith idx = 3*(*vi++); 307b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 308b01c7715SBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 309b01c7715SBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 310b01c7715SBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 311b01c7715SBarry Smith v += 9; 312b01c7715SBarry Smith } 313b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 314b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 315b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 316b01c7715SBarry Smith idiag -= 9; 317b01c7715SBarry Smith i2 -= 3; 318b01c7715SBarry Smith } 319efee365bSSatish Balay ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 320b01c7715SBarry Smith } 321b01c7715SBarry Smith } else { 322634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 323b01c7715SBarry Smith } 3241ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3251ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 326b01c7715SBarry Smith PetscFunctionReturn(0); 327b01c7715SBarry Smith } 328b01c7715SBarry Smith 329b01c7715SBarry Smith #undef __FUNCT__ 330b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 331c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 332b01c7715SBarry Smith { 333b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 334b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 335b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 336dfbe8321SBarry Smith PetscErrorCode ierr; 337c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 338c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 339b01c7715SBarry Smith 340b01c7715SBarry Smith PetscFunctionBegin; 34171f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 342b01c7715SBarry Smith its = its*lits; 34377431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 344b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 345b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 34671f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 347b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 348b01c7715SBarry Smith 349b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 350b01c7715SBarry Smith 351b01c7715SBarry Smith diag = a->diag; 352b01c7715SBarry Smith idiag = a->idiag; 3531ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3541ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 355b01c7715SBarry Smith 356b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 357b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 358b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 359b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 360b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 361b01c7715SBarry Smith x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 362b01c7715SBarry Smith i2 = 4; 363b01c7715SBarry Smith idiag += 16; 364b01c7715SBarry Smith for (i=1; i<m; i++) { 365b01c7715SBarry Smith v = aa + 16*ai[i]; 366b01c7715SBarry Smith vi = aj + ai[i]; 367b01c7715SBarry Smith nz = diag[i] - ai[i]; 368b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 369b01c7715SBarry Smith while (nz--) { 370b01c7715SBarry Smith idx = 4*(*vi++); 371b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 372b01c7715SBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 373b01c7715SBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 374b01c7715SBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 375b01c7715SBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 376b01c7715SBarry Smith v += 16; 377b01c7715SBarry Smith } 378b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 379b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 380b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 381b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 382b01c7715SBarry Smith idiag += 16; 383b01c7715SBarry Smith i2 += 4; 384b01c7715SBarry Smith } 385b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 386efee365bSSatish Balay ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 387b01c7715SBarry Smith } 388b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 389b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 390b01c7715SBarry Smith i2 = 0; 391b01c7715SBarry Smith mdiag = a->idiag+16*a->mbs; 392b01c7715SBarry Smith for (i=0; i<m; i++) { 393b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 394b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 395b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 396b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 397b01c7715SBarry Smith x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 398b01c7715SBarry Smith mdiag += 16; 399b01c7715SBarry Smith i2 += 4; 400b01c7715SBarry Smith } 401efee365bSSatish Balay ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 402b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 403899cda47SBarry Smith ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 404b01c7715SBarry Smith } 405b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 406b01c7715SBarry Smith idiag = a->idiag+16*a->mbs - 16; 407b01c7715SBarry Smith i2 = 4*m - 4; 408b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 409b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 410b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 411b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 412b01c7715SBarry Smith x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 413b01c7715SBarry Smith idiag -= 16; 414b01c7715SBarry Smith i2 -= 4; 415b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 416b01c7715SBarry Smith v = aa + 16*(diag[i]+1); 417b01c7715SBarry Smith vi = aj + diag[i] + 1; 418b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 419b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 420b01c7715SBarry Smith while (nz--) { 421b01c7715SBarry Smith idx = 4*(*vi++); 422b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 423b01c7715SBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 424b01c7715SBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 425b01c7715SBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 426b01c7715SBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 427b01c7715SBarry Smith v += 16; 428b01c7715SBarry Smith } 429b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 430b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 431b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 432b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 433b01c7715SBarry Smith idiag -= 16; 434b01c7715SBarry Smith i2 -= 4; 435b01c7715SBarry Smith } 436efee365bSSatish Balay ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 437b01c7715SBarry Smith } 438b01c7715SBarry Smith } else { 439634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 440b01c7715SBarry Smith } 4411ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4421ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 443b01c7715SBarry Smith PetscFunctionReturn(0); 444b01c7715SBarry Smith } 445b01c7715SBarry Smith 446b01c7715SBarry Smith #undef __FUNCT__ 447b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 448c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 449b01c7715SBarry Smith { 450b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 451b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 452b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 453dfbe8321SBarry Smith PetscErrorCode ierr; 454c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 455c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 456b01c7715SBarry Smith 457b01c7715SBarry Smith PetscFunctionBegin; 45871f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 459b01c7715SBarry Smith its = its*lits; 46077431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 461b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 462b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 46371f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 464b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 465b01c7715SBarry Smith 466b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 467b01c7715SBarry Smith 468b01c7715SBarry Smith diag = a->diag; 469b01c7715SBarry Smith idiag = a->idiag; 4701ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4711ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 472b01c7715SBarry Smith 473b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 474b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 475b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 476b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 477b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 478b01c7715SBarry Smith x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 479b01c7715SBarry Smith x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 480b01c7715SBarry Smith i2 = 5; 481b01c7715SBarry Smith idiag += 25; 482b01c7715SBarry Smith for (i=1; i<m; i++) { 483b01c7715SBarry Smith v = aa + 25*ai[i]; 484b01c7715SBarry Smith vi = aj + ai[i]; 485b01c7715SBarry Smith nz = diag[i] - ai[i]; 486b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 487b01c7715SBarry Smith while (nz--) { 488b01c7715SBarry Smith idx = 5*(*vi++); 489b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 490b01c7715SBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 491b01c7715SBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 492b01c7715SBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 493b01c7715SBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 494b01c7715SBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 495b01c7715SBarry Smith v += 25; 496b01c7715SBarry Smith } 497b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 498b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 499b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 500b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 501b01c7715SBarry Smith x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 502b01c7715SBarry Smith idiag += 25; 503b01c7715SBarry Smith i2 += 5; 504b01c7715SBarry Smith } 505b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506efee365bSSatish Balay ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 507b01c7715SBarry Smith } 508b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510b01c7715SBarry Smith i2 = 0; 511b01c7715SBarry Smith mdiag = a->idiag+25*a->mbs; 512b01c7715SBarry Smith for (i=0; i<m; i++) { 513b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 514b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 515b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 516b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 517b01c7715SBarry Smith x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 518b01c7715SBarry Smith x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 519b01c7715SBarry Smith mdiag += 25; 520b01c7715SBarry Smith i2 += 5; 521b01c7715SBarry Smith } 522efee365bSSatish Balay ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 523b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 524899cda47SBarry Smith ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 525b01c7715SBarry Smith } 526b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 527b01c7715SBarry Smith idiag = a->idiag+25*a->mbs - 25; 528b01c7715SBarry Smith i2 = 5*m - 5; 529b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 530b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 531b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 532b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 533b01c7715SBarry Smith x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 534b01c7715SBarry Smith x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 535b01c7715SBarry Smith idiag -= 25; 536b01c7715SBarry Smith i2 -= 5; 537b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 538b01c7715SBarry Smith v = aa + 25*(diag[i]+1); 539b01c7715SBarry Smith vi = aj + diag[i] + 1; 540b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 541b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 542b01c7715SBarry Smith while (nz--) { 543b01c7715SBarry Smith idx = 5*(*vi++); 544b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 545b01c7715SBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 546b01c7715SBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 547b01c7715SBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 548b01c7715SBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 549b01c7715SBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 550b01c7715SBarry Smith v += 25; 551b01c7715SBarry Smith } 552b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 553b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 554b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 555b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 556b01c7715SBarry Smith x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 557b01c7715SBarry Smith idiag -= 25; 558b01c7715SBarry Smith i2 -= 5; 559b01c7715SBarry Smith } 560efee365bSSatish Balay ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 561b01c7715SBarry Smith } 562b01c7715SBarry Smith } else { 563634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 564b01c7715SBarry Smith } 5651ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5661ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 567b01c7715SBarry Smith PetscFunctionReturn(0); 568b01c7715SBarry Smith } 569b01c7715SBarry Smith 570af674e45SBarry Smith /* 57181824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 572af674e45SBarry Smith */ 573af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 574af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 575af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 576af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 577af674e45SBarry Smith #endif 578af674e45SBarry Smith 5799c8c1041SBarry Smith EXTERN_C_BEGIN 580af674e45SBarry Smith #undef __FUNCT__ 581af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 582be1d678aSKris Buschelman void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 583af674e45SBarry Smith { 584af674e45SBarry Smith Mat A = *AA; 585af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 586c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 587c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 58817ec6a02SBarry Smith PetscInt *aj=a->j,stepval,lastcol = -1; 589f15d580aSBarry Smith const PetscScalar *value = v; 5904bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 591af674e45SBarry Smith 592af674e45SBarry Smith PetscFunctionBegin; 5937adad957SLisandro Dalcin if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 594af674e45SBarry Smith stepval = (n-1)*4; 595af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 596af674e45SBarry Smith row = im[k]; 597af674e45SBarry Smith rp = aj + ai[row]; 598af674e45SBarry Smith ap = aa + 16*ai[row]; 599af674e45SBarry Smith nrow = ailen[row]; 600af674e45SBarry Smith low = 0; 60117ec6a02SBarry Smith high = nrow; 602af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 603af674e45SBarry Smith col = in[l]; 60417ec6a02SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 60517ec6a02SBarry Smith lastcol = col; 6061e3347e8SBarry Smith value = v + k*(stepval+4 + l)*4; 607af674e45SBarry Smith while (high-low > 7) { 608af674e45SBarry Smith t = (low+high)/2; 609af674e45SBarry Smith if (rp[t] > col) high = t; 610af674e45SBarry Smith else low = t; 611af674e45SBarry Smith } 612af674e45SBarry Smith for (i=low; i<high; i++) { 613af674e45SBarry Smith if (rp[i] > col) break; 614af674e45SBarry Smith if (rp[i] == col) { 615af674e45SBarry Smith bap = ap + 16*i; 616af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 617af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 618af674e45SBarry Smith bap[jj] += *value++; 619af674e45SBarry Smith } 620af674e45SBarry Smith } 621af674e45SBarry Smith goto noinsert2; 622af674e45SBarry Smith } 623af674e45SBarry Smith } 624af674e45SBarry Smith N = nrow++ - 1; 62517ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 626af674e45SBarry Smith /* shift up all the later entries in this row */ 627af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 628af674e45SBarry Smith rp[ii+1] = rp[ii]; 629a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 630af674e45SBarry Smith } 631af674e45SBarry Smith if (N >= i) { 632a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 633af674e45SBarry Smith } 634af674e45SBarry Smith rp[i] = col; 635af674e45SBarry Smith bap = ap + 16*i; 636af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 637af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 638af674e45SBarry Smith bap[jj] = *value++; 639af674e45SBarry Smith } 640af674e45SBarry Smith } 641af674e45SBarry Smith noinsert2:; 642af674e45SBarry Smith low = i; 643af674e45SBarry Smith } 644af674e45SBarry Smith ailen[row] = nrow; 645af674e45SBarry Smith } 646be1d678aSKris Buschelman PetscFunctionReturnVoid(); 647af674e45SBarry Smith } 6489c8c1041SBarry Smith EXTERN_C_END 649af674e45SBarry Smith 650af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 651af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 652af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 653af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 654af674e45SBarry Smith #endif 655af674e45SBarry Smith 6569c8c1041SBarry Smith EXTERN_C_BEGIN 657af674e45SBarry Smith #undef __FUNCT__ 658af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 659be1d678aSKris Buschelman void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 660af674e45SBarry Smith { 661af674e45SBarry Smith Mat A = *AA; 662af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 663c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 664c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 665c1ac3661SBarry Smith PetscInt *aj=a->j,brow,bcol; 66617ec6a02SBarry Smith PetscInt ridx,cidx,lastcol = -1; 667af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 668af674e45SBarry Smith 669af674e45SBarry Smith PetscFunctionBegin; 670af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 671af674e45SBarry Smith row = im[k]; brow = row/4; 672af674e45SBarry Smith rp = aj + ai[brow]; 673af674e45SBarry Smith ap = aa + 16*ai[brow]; 674af674e45SBarry Smith nrow = ailen[brow]; 675af674e45SBarry Smith low = 0; 67617ec6a02SBarry Smith high = nrow; 677af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 678af674e45SBarry Smith col = in[l]; bcol = col/4; 679af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 680af674e45SBarry Smith value = v[l + k*n]; 68117ec6a02SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 68217ec6a02SBarry Smith lastcol = col; 683af674e45SBarry Smith while (high-low > 7) { 684af674e45SBarry Smith t = (low+high)/2; 685af674e45SBarry Smith if (rp[t] > bcol) high = t; 686af674e45SBarry Smith else low = t; 687af674e45SBarry Smith } 688af674e45SBarry Smith for (i=low; i<high; i++) { 689af674e45SBarry Smith if (rp[i] > bcol) break; 690af674e45SBarry Smith if (rp[i] == bcol) { 691af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 692af674e45SBarry Smith *bap += value; 693af674e45SBarry Smith goto noinsert1; 694af674e45SBarry Smith } 695af674e45SBarry Smith } 696af674e45SBarry Smith N = nrow++ - 1; 69717ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 698af674e45SBarry Smith /* shift up all the later entries in this row */ 699af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 700af674e45SBarry Smith rp[ii+1] = rp[ii]; 701a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 702af674e45SBarry Smith } 703af674e45SBarry Smith if (N>=i) { 704a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 705af674e45SBarry Smith } 706af674e45SBarry Smith rp[i] = bcol; 707af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 708af674e45SBarry Smith noinsert1:; 709af674e45SBarry Smith low = i; 710af674e45SBarry Smith } 711af674e45SBarry Smith ailen[brow] = nrow; 712af674e45SBarry Smith } 713be1d678aSKris Buschelman PetscFunctionReturnVoid(); 714af674e45SBarry Smith } 7159c8c1041SBarry Smith EXTERN_C_END 716af674e45SBarry Smith 71795d5f7c2SBarry Smith /* UGLY, ugly, ugly 71887828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 7193477af42SKris Buschelman not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 72095d5f7c2SBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 72195d5f7c2SBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 72295d5f7c2SBarry Smith into the single precision data structures. 72395d5f7c2SBarry Smith */ 72495d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 725690b6cddSBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 72695d5f7c2SBarry Smith #else 72795d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 72895d5f7c2SBarry Smith #endif 72995d5f7c2SBarry Smith 7302d61bbb3SSatish Balay #define CHUNKSIZE 10 731de6a44a3SBarry Smith 732be5855fcSBarry Smith /* 733be5855fcSBarry Smith Checks for missing diagonals 734be5855fcSBarry Smith */ 7354a2ae208SSatish Balay #undef __FUNCT__ 7364a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 7372af78befSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 738be5855fcSBarry Smith { 739be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7406849ba73SBarry Smith PetscErrorCode ierr; 741c1ac3661SBarry Smith PetscInt *diag,*jj = a->j,i; 742be5855fcSBarry Smith 743be5855fcSBarry Smith PetscFunctionBegin; 744c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 7452af78befSBarry Smith *missing = PETSC_FALSE; 746883fce79SBarry Smith diag = a->diag; 7470e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 748be5855fcSBarry Smith if (jj[diag[i]] != i) { 7492af78befSBarry Smith *missing = PETSC_TRUE; 7502af78befSBarry Smith if (d) *d = i; 751be5855fcSBarry Smith } 752be5855fcSBarry Smith } 753be5855fcSBarry Smith PetscFunctionReturn(0); 754be5855fcSBarry Smith } 755be5855fcSBarry Smith 7564a2ae208SSatish Balay #undef __FUNCT__ 7574a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 758dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 759de6a44a3SBarry Smith { 760de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7616849ba73SBarry Smith PetscErrorCode ierr; 76209f38230SBarry Smith PetscInt i,j,m = a->mbs; 763de6a44a3SBarry Smith 7643a40ed3dSBarry Smith PetscFunctionBegin; 76509f38230SBarry Smith if (!a->diag) { 76609f38230SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 76709f38230SBarry Smith } 7687fc0212eSBarry Smith for (i=0; i<m; i++) { 76909f38230SBarry Smith a->diag[i] = a->i[i+1]; 770de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 771de6a44a3SBarry Smith if (a->j[j] == i) { 77209f38230SBarry Smith a->diag[i] = j; 773de6a44a3SBarry Smith break; 774de6a44a3SBarry Smith } 775de6a44a3SBarry Smith } 776de6a44a3SBarry Smith } 7773a40ed3dSBarry Smith PetscFunctionReturn(0); 778de6a44a3SBarry Smith } 7792593348eSBarry Smith 7802593348eSBarry Smith 781690b6cddSBarry Smith EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 7823b2fbd54SBarry Smith 7834a2ae208SSatish Balay #undef __FUNCT__ 7844a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 7858f7157efSSatish Balay static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 7863b2fbd54SBarry Smith { 7873b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 788dfbe8321SBarry Smith PetscErrorCode ierr; 789*9985e31cSBarry Smith PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 7908f7157efSSatish Balay PetscInt *tia, *tja; 7913b2fbd54SBarry Smith 7923a40ed3dSBarry Smith PetscFunctionBegin; 7933b2fbd54SBarry Smith *nn = n; 7943a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 7953b2fbd54SBarry Smith if (symmetric) { 7968f7157efSSatish Balay ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 7973b2fbd54SBarry Smith } else { 7988f7157efSSatish Balay tia = a->i; tja = a->j; 7993b2fbd54SBarry Smith } 8003b2fbd54SBarry Smith 801ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 802ecc77c7aSBarry Smith (*nn) *= bs; 803ecc77c7aSBarry Smith nbs = bs; 8048f7157efSSatish Balay /* malloc & create the natural set of indices */ 805ecc77c7aSBarry Smith ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 806*9985e31cSBarry Smith if (n) { 807ecc77c7aSBarry Smith (*ia)[0] = 0; 808ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 809ecc77c7aSBarry Smith (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 810ecc77c7aSBarry Smith } 811*9985e31cSBarry Smith } 812ecc77c7aSBarry Smith 813ecc77c7aSBarry Smith for (i=1; i<n; i++) { 814ecc77c7aSBarry Smith (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 815ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 816ecc77c7aSBarry Smith (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 8178f7157efSSatish Balay } 8188f7157efSSatish Balay } 819*9985e31cSBarry Smith if (n) { 820ecc77c7aSBarry Smith (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 821*9985e31cSBarry Smith } 822ecc77c7aSBarry Smith 823*9985e31cSBarry Smith if (ja) { 824*9985e31cSBarry Smith ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 825*9985e31cSBarry Smith cnt = 0; 826*9985e31cSBarry Smith for (i=0; i<n; i++) { 827*9985e31cSBarry Smith for (j=0; j<bs; j++) { 828*9985e31cSBarry Smith for (k=tia[i]; k<tia[i+1]; k++) { 829*9985e31cSBarry Smith for (l=0; l<bs; l++) { 830*9985e31cSBarry Smith (*ja)[cnt++] = bs*tja[k] + l; 831*9985e31cSBarry Smith } 832*9985e31cSBarry Smith } 833*9985e31cSBarry Smith } 834*9985e31cSBarry Smith } 835*9985e31cSBarry Smith } 836*9985e31cSBarry Smith 837ecc77c7aSBarry Smith n *= bs; 838ecc77c7aSBarry Smith nz *= bs*bs; 8398f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 8408f7157efSSatish Balay ierr = PetscFree(tia);CHKERRQ(ierr); 8418f7157efSSatish Balay ierr = PetscFree(tja);CHKERRQ(ierr); 8428f7157efSSatish Balay } 8438f7157efSSatish Balay } else { 8448f7157efSSatish Balay *ia = tia; 845ecc77c7aSBarry Smith if (ja) *ja = tja; 8468f7157efSSatish Balay } 8478f7157efSSatish Balay if (oshift == 1) { 848ecc77c7aSBarry Smith for (i=0; i<n+nbs; i++) (*ia)[i]++; 849ecc77c7aSBarry Smith if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 8508f7157efSSatish Balay } 8513a40ed3dSBarry Smith PetscFunctionReturn(0); 8523b2fbd54SBarry Smith } 8533b2fbd54SBarry Smith 8544a2ae208SSatish Balay #undef __FUNCT__ 8554a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 8568f7157efSSatish Balay static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 8573b2fbd54SBarry Smith { 8583b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 8596849ba73SBarry Smith PetscErrorCode ierr; 8608f7157efSSatish Balay PetscInt i,n = a->mbs,nz = a->i[n]; 8613b2fbd54SBarry Smith 8623a40ed3dSBarry Smith PetscFunctionBegin; 8633a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 864ecc77c7aSBarry Smith if (!blockcompressed && A->rmap.bs > 1) { 865ecc77c7aSBarry Smith ierr = PetscFree(*ia);CHKERRQ(ierr); 866*9985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 8678f7157efSSatish Balay } else if (symmetric) { 868606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 869*9985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 8708f7157efSSatish Balay } else if (oshift == 1) { /* blockcompressed */ 8713b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]--; 872*9985e31cSBarry Smith if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 8733b2fbd54SBarry Smith } 8743a40ed3dSBarry Smith PetscFunctionReturn(0); 8753b2fbd54SBarry Smith } 8763b2fbd54SBarry Smith 8774a2ae208SSatish Balay #undef __FUNCT__ 8784a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 879dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 8802d61bbb3SSatish Balay { 8812d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 882dfbe8321SBarry Smith PetscErrorCode ierr; 8832d61bbb3SSatish Balay 884433994e6SBarry Smith PetscFunctionBegin; 885aa482453SBarry Smith #if defined(PETSC_USE_LOG) 886899cda47SBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 8872d61bbb3SSatish Balay #endif 888e6b907acSBarry Smith ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 889c38d4ed2SBarry Smith if (a->row) { 890c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 891c38d4ed2SBarry Smith } 892c38d4ed2SBarry Smith if (a->col) { 893c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 894c38d4ed2SBarry Smith } 89505b42c5fSBarry Smith ierr = PetscFree(a->diag);CHKERRQ(ierr); 89605b42c5fSBarry Smith ierr = PetscFree(a->idiag);CHKERRQ(ierr); 89705b42c5fSBarry Smith ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 89805b42c5fSBarry Smith ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 89905b42c5fSBarry Smith ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 900e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 90105b42c5fSBarry Smith ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 902563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 90305b42c5fSBarry Smith ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 904563b5814SBarry Smith #endif 90505b42c5fSBarry Smith ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 90673e7a558SHong Zhang if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 907c4319e64SHong Zhang 908606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 909901853e0SKris Buschelman 910dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 91143516a2dSKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 912901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 913901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 914901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 915901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 916901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 917901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 9182d61bbb3SSatish Balay PetscFunctionReturn(0); 9192d61bbb3SSatish Balay } 9202d61bbb3SSatish Balay 9214a2ae208SSatish Balay #undef __FUNCT__ 9224a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 9234e0d8c25SBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 9242d61bbb3SSatish Balay { 9252d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 92663ba0a88SBarry Smith PetscErrorCode ierr; 9272d61bbb3SSatish Balay 9282d61bbb3SSatish Balay PetscFunctionBegin; 929aa275fccSKris Buschelman switch (op) { 930aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 9314e0d8c25SBarry Smith a->roworiented = flg; 932aa275fccSKris Buschelman break; 933aa275fccSKris Buschelman case MAT_KEEP_ZEROED_ROWS: 9344e0d8c25SBarry Smith a->keepzeroedrows = flg; 935aa275fccSKris Buschelman break; 936512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 937512a5fc5SBarry Smith a->nonew = (flg ? 0 : 1); 938aa275fccSKris Buschelman break; 939aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 9404e0d8c25SBarry Smith a->nonew = (flg ? -1 : 0); 941aa275fccSKris Buschelman break; 942aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 9434e0d8c25SBarry Smith a->nonew = (flg ? -2 : 0); 944aa275fccSKris Buschelman break; 9454e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 946aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 947aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 948290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 949aa275fccSKris Buschelman break; 95077e54ba9SKris Buschelman case MAT_SYMMETRIC: 95177e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 9529a4540c5SBarry Smith case MAT_HERMITIAN: 9539a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 954290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 95577e54ba9SKris Buschelman break; 956aa275fccSKris Buschelman default: 957ad86a440SBarry Smith SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 9582d61bbb3SSatish Balay } 9592d61bbb3SSatish Balay PetscFunctionReturn(0); 9602d61bbb3SSatish Balay } 9612d61bbb3SSatish Balay 9624a2ae208SSatish Balay #undef __FUNCT__ 9634a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 964c1ac3661SBarry Smith PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 9652d61bbb3SSatish Balay { 9662d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 9676849ba73SBarry Smith PetscErrorCode ierr; 968c1ac3661SBarry Smith PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 9693f1db9ecSBarry Smith MatScalar *aa,*aa_i; 97087828ca2SBarry Smith PetscScalar *v_i; 9712d61bbb3SSatish Balay 9722d61bbb3SSatish Balay PetscFunctionBegin; 973899cda47SBarry Smith bs = A->rmap.bs; 9742d61bbb3SSatish Balay ai = a->i; 9752d61bbb3SSatish Balay aj = a->j; 9762d61bbb3SSatish Balay aa = a->a; 9772d61bbb3SSatish Balay bs2 = a->bs2; 9782d61bbb3SSatish Balay 979899cda47SBarry Smith if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 9802d61bbb3SSatish Balay 9812d61bbb3SSatish Balay bn = row/bs; /* Block number */ 9822d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 9832d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 9842d61bbb3SSatish Balay *nz = bs*M; 9852d61bbb3SSatish Balay 9862d61bbb3SSatish Balay if (v) { 9872d61bbb3SSatish Balay *v = 0; 9882d61bbb3SSatish Balay if (*nz) { 98987828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 9902d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 9912d61bbb3SSatish Balay v_i = *v + i*bs; 9922d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 9932d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 9942d61bbb3SSatish Balay } 9952d61bbb3SSatish Balay } 9962d61bbb3SSatish Balay } 9972d61bbb3SSatish Balay 9982d61bbb3SSatish Balay if (idx) { 9992d61bbb3SSatish Balay *idx = 0; 10002d61bbb3SSatish Balay if (*nz) { 1001c1ac3661SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 10022d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 10032d61bbb3SSatish Balay idx_i = *idx + i*bs; 10042d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 10052d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 10062d61bbb3SSatish Balay } 10072d61bbb3SSatish Balay } 10082d61bbb3SSatish Balay } 10092d61bbb3SSatish Balay PetscFunctionReturn(0); 10102d61bbb3SSatish Balay } 10112d61bbb3SSatish Balay 10124a2ae208SSatish Balay #undef __FUNCT__ 10134a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1014c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 10152d61bbb3SSatish Balay { 1016dfbe8321SBarry Smith PetscErrorCode ierr; 1017606d414cSSatish Balay 10182d61bbb3SSatish Balay PetscFunctionBegin; 101905b42c5fSBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 102005b42c5fSBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 10212d61bbb3SSatish Balay PetscFunctionReturn(0); 10222d61bbb3SSatish Balay } 10232d61bbb3SSatish Balay 10244a2ae208SSatish Balay #undef __FUNCT__ 10254a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 1026dfbe8321SBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 10272d61bbb3SSatish Balay { 10282d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 10292d61bbb3SSatish Balay Mat C; 10306849ba73SBarry Smith PetscErrorCode ierr; 1031899cda47SBarry Smith PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1032c1ac3661SBarry Smith PetscInt *rows,*cols,bs2=a->bs2; 103387828ca2SBarry Smith PetscScalar *array; 10342d61bbb3SSatish Balay 10352d61bbb3SSatish Balay PetscFunctionBegin; 103629bbc08cSBarry Smith if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1037c1ac3661SBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1038c1ac3661SBarry Smith ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 10392d61bbb3SSatish Balay 1040375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 104187828ca2SBarry Smith ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 104287828ca2SBarry Smith for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1043375fe846SBarry Smith #else 10443eda8832SBarry Smith array = a->a; 1045375fe846SBarry Smith #endif 1046375fe846SBarry Smith 10472d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 10487adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1049899cda47SBarry Smith ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 10507adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1051ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1052606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 1053c1ac3661SBarry Smith ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 10542d61bbb3SSatish Balay cols = rows + bs; 10552d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 10562d61bbb3SSatish Balay cols[0] = i*bs; 10572d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 10582d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 10592d61bbb3SSatish Balay for (j=0; j<len; j++) { 10602d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 10612d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 10622d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 10632d61bbb3SSatish Balay array += bs2; 10642d61bbb3SSatish Balay } 10652d61bbb3SSatish Balay } 1066606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 1067375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 1068375fe846SBarry Smith ierr = PetscFree(array); 1069375fe846SBarry Smith #endif 10702d61bbb3SSatish Balay 10712d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10722d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10732d61bbb3SSatish Balay 1074c4992f7dSBarry Smith if (B) { 10752d61bbb3SSatish Balay *B = C; 10762d61bbb3SSatish Balay } else { 1077273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 10782d61bbb3SSatish Balay } 10792d61bbb3SSatish Balay PetscFunctionReturn(0); 10802d61bbb3SSatish Balay } 10812d61bbb3SSatish Balay 10824a2ae208SSatish Balay #undef __FUNCT__ 10834a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 10846849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 10852593348eSBarry Smith { 1086b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 10876849ba73SBarry Smith PetscErrorCode ierr; 1088899cda47SBarry Smith PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1089b24ad042SBarry Smith int fd; 109087828ca2SBarry Smith PetscScalar *aa; 1091ce6f0cecSBarry Smith FILE *file; 10922593348eSBarry Smith 10933a40ed3dSBarry Smith PetscFunctionBegin; 1094b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1095899cda47SBarry Smith ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1096552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 10973b2fbd54SBarry Smith 1098899cda47SBarry Smith col_lens[1] = A->rmap.N; 1099899cda47SBarry Smith col_lens[2] = A->cmap.n; 11007e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 11012593348eSBarry Smith 11022593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 1103b6490206SBarry Smith count = 0; 1104b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1105b6490206SBarry Smith for (j=0; j<bs; j++) { 1106b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1107b6490206SBarry Smith } 11082593348eSBarry Smith } 1109899cda47SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1110606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 11112593348eSBarry Smith 11122593348eSBarry Smith /* store column indices (zero start index) */ 1113c1ac3661SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1114b6490206SBarry Smith count = 0; 1115b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1116b6490206SBarry Smith for (j=0; j<bs; j++) { 1117b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1118b6490206SBarry Smith for (l=0; l<bs; l++) { 1119b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 11202593348eSBarry Smith } 11212593348eSBarry Smith } 1122b6490206SBarry Smith } 1123b6490206SBarry Smith } 11246f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1125606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 11262593348eSBarry Smith 11272593348eSBarry Smith /* store nonzero values */ 112887828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1129b6490206SBarry Smith count = 0; 1130b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1131b6490206SBarry Smith for (j=0; j<bs; j++) { 1132b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1133b6490206SBarry Smith for (l=0; l<bs; l++) { 11347e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 1135b6490206SBarry Smith } 1136b6490206SBarry Smith } 1137b6490206SBarry Smith } 1138b6490206SBarry Smith } 11396f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1140606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1141ce6f0cecSBarry Smith 1142b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1143ce6f0cecSBarry Smith if (file) { 1144899cda47SBarry Smith fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1145ce6f0cecSBarry Smith } 11463a40ed3dSBarry Smith PetscFunctionReturn(0); 11472593348eSBarry Smith } 11482593348eSBarry Smith 11494a2ae208SSatish Balay #undef __FUNCT__ 11504a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 11516849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 11522593348eSBarry Smith { 1153b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1154dfbe8321SBarry Smith PetscErrorCode ierr; 1155899cda47SBarry Smith PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1156f3ef73ceSBarry Smith PetscViewerFormat format; 11572593348eSBarry Smith 11583a40ed3dSBarry Smith PetscFunctionBegin; 1159b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1160456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 116177431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1162fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1163bcd9e38bSBarry Smith Mat aij; 1164ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1165bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 1166bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 116704929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 116804929863SHong Zhang PetscFunctionReturn(0); 1169fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1170b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 117144cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 117244cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 117377431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 117444cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 117544cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 1176aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 11770e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1178a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 11790e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 11800e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1181a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 11820e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 11830e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1184a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 11850ef38995SBarry Smith } 118644cd7ae7SLois Curfman McInnes #else 11870ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 1188a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 11890ef38995SBarry Smith } 119044cd7ae7SLois Curfman McInnes #endif 119144cd7ae7SLois Curfman McInnes } 119244cd7ae7SLois Curfman McInnes } 1193b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 119444cd7ae7SLois Curfman McInnes } 119544cd7ae7SLois Curfman McInnes } 1196b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 11970ef38995SBarry Smith } else { 1198b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1199b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1200b6490206SBarry Smith for (j=0; j<bs; j++) { 120177431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1202b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1203b6490206SBarry Smith for (l=0; l<bs; l++) { 1204aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 12050e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1206a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 12070e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 12080e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1209a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 12100e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 12110ef38995SBarry Smith } else { 1212a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 121388685aaeSLois Curfman McInnes } 121488685aaeSLois Curfman McInnes #else 1215a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 121688685aaeSLois Curfman McInnes #endif 12172593348eSBarry Smith } 12182593348eSBarry Smith } 1219b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 12202593348eSBarry Smith } 12212593348eSBarry Smith } 1222b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1223b6490206SBarry Smith } 1224b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 12253a40ed3dSBarry Smith PetscFunctionReturn(0); 12262593348eSBarry Smith } 12272593348eSBarry Smith 12284a2ae208SSatish Balay #undef __FUNCT__ 12294a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 12306849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 12313270192aSSatish Balay { 123277ed5343SBarry Smith Mat A = (Mat) Aa; 12333270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 12346849ba73SBarry Smith PetscErrorCode ierr; 1235899cda47SBarry Smith PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 12360e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 12373f1db9ecSBarry Smith MatScalar *aa; 1238b0a32e0cSBarry Smith PetscViewer viewer; 12393270192aSSatish Balay 12403a40ed3dSBarry Smith PetscFunctionBegin; 12413270192aSSatish Balay 1242b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 124377ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 124477ed5343SBarry Smith 1245b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 124677ed5343SBarry Smith 12473270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1248b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 12493270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 12503270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1251899cda47SBarry Smith y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 12523270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 12533270192aSSatish Balay aa = a->a + j*bs2; 12543270192aSSatish Balay for (k=0; k<bs; k++) { 12553270192aSSatish Balay for (l=0; l<bs; l++) { 12560e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 1257b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 12583270192aSSatish Balay } 12593270192aSSatish Balay } 12603270192aSSatish Balay } 12613270192aSSatish Balay } 1262b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 12633270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 12643270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1265899cda47SBarry Smith y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 12663270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 12673270192aSSatish Balay aa = a->a + j*bs2; 12683270192aSSatish Balay for (k=0; k<bs; k++) { 12693270192aSSatish Balay for (l=0; l<bs; l++) { 12700e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 1271b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 12723270192aSSatish Balay } 12733270192aSSatish Balay } 12743270192aSSatish Balay } 12753270192aSSatish Balay } 12763270192aSSatish Balay 1277b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 12783270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 12793270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1280899cda47SBarry Smith y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 12813270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 12823270192aSSatish Balay aa = a->a + j*bs2; 12833270192aSSatish Balay for (k=0; k<bs; k++) { 12843270192aSSatish Balay for (l=0; l<bs; l++) { 12850e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 1286b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 12873270192aSSatish Balay } 12883270192aSSatish Balay } 12893270192aSSatish Balay } 12903270192aSSatish Balay } 129177ed5343SBarry Smith PetscFunctionReturn(0); 129277ed5343SBarry Smith } 12933270192aSSatish Balay 12944a2ae208SSatish Balay #undef __FUNCT__ 12954a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 12966849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 129777ed5343SBarry Smith { 1298dfbe8321SBarry Smith PetscErrorCode ierr; 12990e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 1300b0a32e0cSBarry Smith PetscDraw draw; 130177ed5343SBarry Smith PetscTruth isnull; 13023270192aSSatish Balay 130377ed5343SBarry Smith PetscFunctionBegin; 130477ed5343SBarry Smith 1305b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1306b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 130777ed5343SBarry Smith 130877ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1309899cda47SBarry Smith xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 131077ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 1311b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1312b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 131377ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 13143a40ed3dSBarry Smith PetscFunctionReturn(0); 13153270192aSSatish Balay } 13163270192aSSatish Balay 13174a2ae208SSatish Balay #undef __FUNCT__ 13184a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 1319dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 13202593348eSBarry Smith { 1321dfbe8321SBarry Smith PetscErrorCode ierr; 132232077d6dSBarry Smith PetscTruth iascii,isbinary,isdraw; 13232593348eSBarry Smith 13243a40ed3dSBarry Smith PetscFunctionBegin; 132532077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1326fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1327fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 132832077d6dSBarry Smith if (iascii){ 13293a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 13300f5bd95cSBarry Smith } else if (isbinary) { 13313a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 13320f5bd95cSBarry Smith } else if (isdraw) { 13333a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 13345cd90555SBarry Smith } else { 1335a5e6ed63SBarry Smith Mat B; 1336ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1337a5e6ed63SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1338a5e6ed63SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 13392593348eSBarry Smith } 13403a40ed3dSBarry Smith PetscFunctionReturn(0); 13412593348eSBarry Smith } 1342b6490206SBarry Smith 1343cd0e1443SSatish Balay 13444a2ae208SSatish Balay #undef __FUNCT__ 13454a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 1346c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1347cd0e1443SSatish Balay { 1348cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1349c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1350c1ac3661SBarry Smith PetscInt *ai = a->i,*ailen = a->ilen; 1351899cda47SBarry Smith PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 135297e567efSBarry Smith MatScalar *ap,*aa = a->a; 1353cd0e1443SSatish Balay 13543a40ed3dSBarry Smith PetscFunctionBegin; 13552d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 1356cd0e1443SSatish Balay row = im[k]; brow = row/bs; 135797e567efSBarry Smith if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1358899cda47SBarry Smith if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 13592d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 13602c3acbe9SBarry Smith nrow = ailen[brow]; 13612d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 136297e567efSBarry Smith if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1363899cda47SBarry Smith if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 13642d61bbb3SSatish Balay col = in[l] ; 13652d61bbb3SSatish Balay bcol = col/bs; 13662d61bbb3SSatish Balay cidx = col%bs; 13672d61bbb3SSatish Balay ridx = row%bs; 13682d61bbb3SSatish Balay high = nrow; 13692d61bbb3SSatish Balay low = 0; /* assume unsorted */ 13702d61bbb3SSatish Balay while (high-low > 5) { 1371cd0e1443SSatish Balay t = (low+high)/2; 1372cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 1373cd0e1443SSatish Balay else low = t; 1374cd0e1443SSatish Balay } 1375cd0e1443SSatish Balay for (i=low; i<high; i++) { 1376cd0e1443SSatish Balay if (rp[i] > bcol) break; 1377cd0e1443SSatish Balay if (rp[i] == bcol) { 13782d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 13792d61bbb3SSatish Balay goto finished; 1380cd0e1443SSatish Balay } 1381cd0e1443SSatish Balay } 138297e567efSBarry Smith *v++ = 0.0; 13832d61bbb3SSatish Balay finished:; 1384cd0e1443SSatish Balay } 1385cd0e1443SSatish Balay } 13863a40ed3dSBarry Smith PetscFunctionReturn(0); 1387cd0e1443SSatish Balay } 1388cd0e1443SSatish Balay 138995d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 13904a2ae208SSatish Balay #undef __FUNCT__ 13914a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1392c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 139395d5f7c2SBarry Smith { 1394563b5814SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1395dfbe8321SBarry Smith PetscErrorCode ierr; 1396c1ac3661SBarry Smith PetscInt i,N = m*n*b->bs2; 1397563b5814SBarry Smith MatScalar *vsingle; 139895d5f7c2SBarry Smith 139995d5f7c2SBarry Smith PetscFunctionBegin; 1400563b5814SBarry Smith if (N > b->setvalueslen) { 140105b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1402b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1403563b5814SBarry Smith b->setvalueslen = N; 1404563b5814SBarry Smith } 1405563b5814SBarry Smith vsingle = b->setvaluescopy; 140695d5f7c2SBarry Smith for (i=0; i<N; i++) { 140795d5f7c2SBarry Smith vsingle[i] = v[i]; 140895d5f7c2SBarry Smith } 140995d5f7c2SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 141095d5f7c2SBarry Smith PetscFunctionReturn(0); 141195d5f7c2SBarry Smith } 141295d5f7c2SBarry Smith #endif 141395d5f7c2SBarry Smith 14142d61bbb3SSatish Balay 14154a2ae208SSatish Balay #undef __FUNCT__ 14164a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1417c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 141892c4ed94SBarry Smith { 141992c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1420e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1421c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 14226849ba73SBarry Smith PetscErrorCode ierr; 1423899cda47SBarry Smith PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1424273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 1425f15d580aSBarry Smith const MatScalar *value = v; 1426f15d580aSBarry Smith MatScalar *ap,*aa = a->a,*bap; 142792c4ed94SBarry Smith 14283a40ed3dSBarry Smith PetscFunctionBegin; 14290e324ae4SSatish Balay if (roworiented) { 14300e324ae4SSatish Balay stepval = (n-1)*bs; 14310e324ae4SSatish Balay } else { 14320e324ae4SSatish Balay stepval = (m-1)*bs; 14330e324ae4SSatish Balay } 143492c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 143592c4ed94SBarry Smith row = im[k]; 14365ef9f2a5SBarry Smith if (row < 0) continue; 14372515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 143877431f27SBarry Smith if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 143992c4ed94SBarry Smith #endif 144092c4ed94SBarry Smith rp = aj + ai[row]; 144192c4ed94SBarry Smith ap = aa + bs2*ai[row]; 144292c4ed94SBarry Smith rmax = imax[row]; 144392c4ed94SBarry Smith nrow = ailen[row]; 144492c4ed94SBarry Smith low = 0; 1445c71e6ed7SBarry Smith high = nrow; 144692c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 14475ef9f2a5SBarry Smith if (in[l] < 0) continue; 14482515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 144977431f27SBarry Smith if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 145092c4ed94SBarry Smith #endif 145192c4ed94SBarry Smith col = in[l]; 145292c4ed94SBarry Smith if (roworiented) { 14530e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 14540e324ae4SSatish Balay } else { 14550e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 145692c4ed94SBarry Smith } 14577cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 1458e2ee6c50SBarry Smith lastcol = col; 145992c4ed94SBarry Smith while (high-low > 7) { 146092c4ed94SBarry Smith t = (low+high)/2; 146192c4ed94SBarry Smith if (rp[t] > col) high = t; 146292c4ed94SBarry Smith else low = t; 146392c4ed94SBarry Smith } 146492c4ed94SBarry Smith for (i=low; i<high; i++) { 146592c4ed94SBarry Smith if (rp[i] > col) break; 146692c4ed94SBarry Smith if (rp[i] == col) { 14678a84c255SSatish Balay bap = ap + bs2*i; 14680e324ae4SSatish Balay if (roworiented) { 14698a84c255SSatish Balay if (is == ADD_VALUES) { 1470dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1471dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 14728a84c255SSatish Balay bap[jj] += *value++; 1473dd9472c6SBarry Smith } 1474dd9472c6SBarry Smith } 14750e324ae4SSatish Balay } else { 1476dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1477dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 14780e324ae4SSatish Balay bap[jj] = *value++; 14798a84c255SSatish Balay } 1480dd9472c6SBarry Smith } 1481dd9472c6SBarry Smith } 14820e324ae4SSatish Balay } else { 14830e324ae4SSatish Balay if (is == ADD_VALUES) { 1484dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1485dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 14860e324ae4SSatish Balay *bap++ += *value++; 1487dd9472c6SBarry Smith } 1488dd9472c6SBarry Smith } 14890e324ae4SSatish Balay } else { 1490dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1491dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 14920e324ae4SSatish Balay *bap++ = *value++; 14930e324ae4SSatish Balay } 14948a84c255SSatish Balay } 1495dd9472c6SBarry Smith } 1496dd9472c6SBarry Smith } 1497f1241b54SBarry Smith goto noinsert2; 149892c4ed94SBarry Smith } 149992c4ed94SBarry Smith } 150089280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 1501085a36d4SBarry Smith if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1502421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1503c03d1d03SSatish Balay N = nrow++ - 1; high++; 150492c4ed94SBarry Smith /* shift up all the later entries in this row */ 150592c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 150692c4ed94SBarry Smith rp[ii+1] = rp[ii]; 1507549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 150892c4ed94SBarry Smith } 1509549d3d68SSatish Balay if (N >= i) { 1510549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1511549d3d68SSatish Balay } 151292c4ed94SBarry Smith rp[i] = col; 15138a84c255SSatish Balay bap = ap + bs2*i; 15140e324ae4SSatish Balay if (roworiented) { 1515dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1516dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 15170e324ae4SSatish Balay bap[jj] = *value++; 1518dd9472c6SBarry Smith } 1519dd9472c6SBarry Smith } 15200e324ae4SSatish Balay } else { 1521dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1522dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 15230e324ae4SSatish Balay *bap++ = *value++; 15240e324ae4SSatish Balay } 1525dd9472c6SBarry Smith } 1526dd9472c6SBarry Smith } 1527f1241b54SBarry Smith noinsert2:; 152892c4ed94SBarry Smith low = i; 152992c4ed94SBarry Smith } 153092c4ed94SBarry Smith ailen[row] = nrow; 153192c4ed94SBarry Smith } 15323a40ed3dSBarry Smith PetscFunctionReturn(0); 153392c4ed94SBarry Smith } 153426e093fcSHong Zhang 15354a2ae208SSatish Balay #undef __FUNCT__ 15364a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1537dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1538584200bdSSatish Balay { 1539584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1540c1ac3661SBarry Smith PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1541899cda47SBarry Smith PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 15426849ba73SBarry Smith PetscErrorCode ierr; 1543c1ac3661SBarry Smith PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 15443f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 15453447b6efSHong Zhang PetscReal ratio=0.6; 1546584200bdSSatish Balay 15473a40ed3dSBarry Smith PetscFunctionBegin; 15483a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1549584200bdSSatish Balay 155043ee02c3SBarry Smith if (m) rmax = ailen[0]; 1551584200bdSSatish Balay for (i=1; i<mbs; i++) { 1552584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1553584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1554d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1555584200bdSSatish Balay if (fshift) { 1556a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1557584200bdSSatish Balay N = ailen[i]; 1558584200bdSSatish Balay for (j=0; j<N; j++) { 1559584200bdSSatish Balay ip[j-fshift] = ip[j]; 1560549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1561584200bdSSatish Balay } 1562584200bdSSatish Balay } 1563584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1564584200bdSSatish Balay } 1565584200bdSSatish Balay if (mbs) { 1566584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1567584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1568584200bdSSatish Balay } 1569584200bdSSatish Balay /* reset ilen and imax for each row */ 1570584200bdSSatish Balay for (i=0; i<mbs; i++) { 1571584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1572584200bdSSatish Balay } 1573a7c10996SSatish Balay a->nz = ai[mbs]; 1574584200bdSSatish Balay 1575584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1576b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 1577584200bdSSatish Balay if (fshift && a->diag) { 1578606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 157952e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1580584200bdSSatish Balay a->diag = 0; 1581584200bdSSatish Balay } 1582899cda47SBarry Smith ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1583ae15b995SBarry Smith ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1584ae15b995SBarry Smith ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1585e2f3b5e9SSatish Balay a->reallocs = 0; 15860e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1587cf4441caSHong Zhang 1588cb5d8e9eSHong Zhang /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 15892f53aa61SHong Zhang if (a->compressedrow.use){ 1590317fbc4cSHong Zhang ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 159173e7a558SHong Zhang } 159288e51ccdSHong Zhang 159388e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 15943a40ed3dSBarry Smith PetscFunctionReturn(0); 1595584200bdSSatish Balay } 1596584200bdSSatish Balay 1597bea157c4SSatish Balay /* 1598bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1599bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1600bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1601bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1602bea157c4SSatish Balay */ 16034a2ae208SSatish Balay #undef __FUNCT__ 16044a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1605c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1606d9b7c43dSSatish Balay { 1607c1ac3661SBarry Smith PetscInt i,j,k,row; 1608bea157c4SSatish Balay PetscTruth flg; 16093a40ed3dSBarry Smith 1610433994e6SBarry Smith PetscFunctionBegin; 1611bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1612bea157c4SSatish Balay row = idx[i]; 1613bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1614bea157c4SSatish Balay sizes[j] = 1; 1615bea157c4SSatish Balay i++; 1616e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1617bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1618bea157c4SSatish Balay i++; 1619bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1620bea157c4SSatish Balay flg = PETSC_TRUE; 1621bea157c4SSatish Balay for (k=1; k<bs; k++) { 1622bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1623bea157c4SSatish Balay flg = PETSC_FALSE; 1624bea157c4SSatish Balay break; 1625d9b7c43dSSatish Balay } 1626bea157c4SSatish Balay } 1627abc0a331SBarry Smith if (flg) { /* No break in the bs */ 1628bea157c4SSatish Balay sizes[j] = bs; 1629bea157c4SSatish Balay i+= bs; 1630bea157c4SSatish Balay } else { 1631bea157c4SSatish Balay sizes[j] = 1; 1632bea157c4SSatish Balay i++; 1633bea157c4SSatish Balay } 1634bea157c4SSatish Balay } 1635bea157c4SSatish Balay } 1636bea157c4SSatish Balay *bs_max = j; 16373a40ed3dSBarry Smith PetscFunctionReturn(0); 1638d9b7c43dSSatish Balay } 1639d9b7c43dSSatish Balay 16404a2ae208SSatish Balay #undef __FUNCT__ 16414a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1642f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1643d9b7c43dSSatish Balay { 1644d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1645dfbe8321SBarry Smith PetscErrorCode ierr; 1646f4df32b1SMatthew Knepley PetscInt i,j,k,count,*rows; 1647899cda47SBarry Smith PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 164887828ca2SBarry Smith PetscScalar zero = 0.0; 16493f1db9ecSBarry Smith MatScalar *aa; 1650d9b7c43dSSatish Balay 16513a40ed3dSBarry Smith PetscFunctionBegin; 1652d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1653bea157c4SSatish Balay /* allocate memory for rows,sizes */ 1654c1ac3661SBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1655bea157c4SSatish Balay sizes = rows + is_n; 1656bea157c4SSatish Balay 1657563b5814SBarry Smith /* copy IS values to rows, and sort them */ 1658bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1659bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1660dffd3267SBarry Smith if (baij->keepzeroedrows) { 1661dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 1662dffd3267SBarry Smith bs_max = is_n; 166388e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 1664dffd3267SBarry Smith } else { 1665bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 166688e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 1667dffd3267SBarry Smith } 1668bea157c4SSatish Balay 1669bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1670bea157c4SSatish Balay row = rows[j]; 1671899cda47SBarry Smith if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1672bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1673b31fbe3bSSatish Balay aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1674dffd3267SBarry Smith if (sizes[i] == bs && !baij->keepzeroedrows) { 1675f4df32b1SMatthew Knepley if (diag != 0.0) { 1676bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 1677bea157c4SSatish Balay baij->ilen[row/bs] = 1; 1678bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 1679bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1680a07cd24cSSatish Balay } 1681563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 1682bea157c4SSatish Balay for (k=0; k<bs; k++) { 1683f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1684bea157c4SSatish Balay } 1685f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 1686bea157c4SSatish Balay baij->ilen[row/bs] = 0; 1687f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 1688bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 1689aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 1690634064b4SBarry Smith if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1691bea157c4SSatish Balay #endif 1692bea157c4SSatish Balay for (k=0; k<count; k++) { 1693d9b7c43dSSatish Balay aa[0] = zero; 1694d9b7c43dSSatish Balay aa += bs; 1695d9b7c43dSSatish Balay } 1696f4df32b1SMatthew Knepley if (diag != 0.0) { 1697f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1698d9b7c43dSSatish Balay } 1699d9b7c43dSSatish Balay } 1700bea157c4SSatish Balay } 1701bea157c4SSatish Balay 1702606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 17039a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17043a40ed3dSBarry Smith PetscFunctionReturn(0); 1705d9b7c43dSSatish Balay } 17061c351548SSatish Balay 17074a2ae208SSatish Balay #undef __FUNCT__ 17084a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 1709c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 17102d61bbb3SSatish Balay { 17112d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1712e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1713c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1714899cda47SBarry Smith PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 17156849ba73SBarry Smith PetscErrorCode ierr; 1716c1ac3661SBarry Smith PetscInt ridx,cidx,bs2=a->bs2; 1717273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 17183f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 17192d61bbb3SSatish Balay 17202d61bbb3SSatish Balay PetscFunctionBegin; 17212d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 1722085a36d4SBarry Smith row = im[k]; 1723085a36d4SBarry Smith brow = row/bs; 17245ef9f2a5SBarry Smith if (row < 0) continue; 17252515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 1726899cda47SBarry Smith if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 17272d61bbb3SSatish Balay #endif 17282d61bbb3SSatish Balay rp = aj + ai[brow]; 17292d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 17302d61bbb3SSatish Balay rmax = imax[brow]; 17312d61bbb3SSatish Balay nrow = ailen[brow]; 17322d61bbb3SSatish Balay low = 0; 1733c71e6ed7SBarry Smith high = nrow; 17342d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 17355ef9f2a5SBarry Smith if (in[l] < 0) continue; 17362515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 1737899cda47SBarry Smith if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 17382d61bbb3SSatish Balay #endif 17392d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 17402d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 17412d61bbb3SSatish Balay if (roworiented) { 17425ef9f2a5SBarry Smith value = v[l + k*n]; 17432d61bbb3SSatish Balay } else { 17442d61bbb3SSatish Balay value = v[k + l*m]; 17452d61bbb3SSatish Balay } 17467cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 1747e2ee6c50SBarry Smith lastcol = col; 17482d61bbb3SSatish Balay while (high-low > 7) { 17492d61bbb3SSatish Balay t = (low+high)/2; 17502d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 17512d61bbb3SSatish Balay else low = t; 17522d61bbb3SSatish Balay } 17532d61bbb3SSatish Balay for (i=low; i<high; i++) { 17542d61bbb3SSatish Balay if (rp[i] > bcol) break; 17552d61bbb3SSatish Balay if (rp[i] == bcol) { 17562d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 17572d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 17582d61bbb3SSatish Balay else *bap = value; 17592d61bbb3SSatish Balay goto noinsert1; 17602d61bbb3SSatish Balay } 17612d61bbb3SSatish Balay } 17622d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 1763085a36d4SBarry Smith if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1764421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1765c03d1d03SSatish Balay N = nrow++ - 1; high++; 17662d61bbb3SSatish Balay /* shift up all the later entries in this row */ 17672d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 17682d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 1769549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 17702d61bbb3SSatish Balay } 1771549d3d68SSatish Balay if (N>=i) { 1772549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1773549d3d68SSatish Balay } 17742d61bbb3SSatish Balay rp[i] = bcol; 17752d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 1776085a36d4SBarry Smith a->nz++; 17772d61bbb3SSatish Balay noinsert1:; 17782d61bbb3SSatish Balay low = i; 17792d61bbb3SSatish Balay } 17802d61bbb3SSatish Balay ailen[brow] = nrow; 17812d61bbb3SSatish Balay } 178288e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 17832d61bbb3SSatish Balay PetscFunctionReturn(0); 17842d61bbb3SSatish Balay } 17852d61bbb3SSatish Balay 17862d61bbb3SSatish Balay 17874a2ae208SSatish Balay #undef __FUNCT__ 17884a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1789dfbe8321SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 17902d61bbb3SSatish Balay { 17912d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 17922d61bbb3SSatish Balay Mat outA; 1793dfbe8321SBarry Smith PetscErrorCode ierr; 1794667159a5SBarry Smith PetscTruth row_identity,col_identity; 17952d61bbb3SSatish Balay 17962d61bbb3SSatish Balay PetscFunctionBegin; 1797d3d32019SBarry Smith if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1798667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1799667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1800667159a5SBarry Smith if (!row_identity || !col_identity) { 1801634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1802667159a5SBarry Smith } 18032d61bbb3SSatish Balay 18042d61bbb3SSatish Balay outA = inA; 18052d61bbb3SSatish Balay inA->factor = FACTOR_LU; 18062d61bbb3SSatish Balay 1807c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1808cf242676SKris Buschelman 1809c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1810c3122656SLisandro Dalcin if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1811c3122656SLisandro Dalcin a->row = row; 1812c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1813c3122656SLisandro Dalcin if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1814c3122656SLisandro Dalcin a->col = col; 1815c38d4ed2SBarry Smith 1816c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 18174c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 181852e6d16bSBarry Smith ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1819c38d4ed2SBarry Smith 1820cf242676SKris Buschelman /* 1821cf242676SKris Buschelman Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1822cf242676SKris Buschelman for ILU(0) factorization with natural ordering 1823cf242676SKris Buschelman */ 1824899cda47SBarry Smith if (inA->rmap.bs < 8) { 1825cf242676SKris Buschelman ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1826cf242676SKris Buschelman } else { 1827c38d4ed2SBarry Smith if (!a->solve_work) { 1828899cda47SBarry Smith ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1829899cda47SBarry Smith ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1830c38d4ed2SBarry Smith } 18312d61bbb3SSatish Balay } 18322d61bbb3SSatish Balay 1833af281ebdSHong Zhang ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1834667159a5SBarry Smith 18352d61bbb3SSatish Balay PetscFunctionReturn(0); 18362d61bbb3SSatish Balay } 1837d9b7c43dSSatish Balay 1838fb2e594dSBarry Smith EXTERN_C_BEGIN 18394a2ae208SSatish Balay #undef __FUNCT__ 18404a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1841be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 184227a8da17SBarry Smith { 184327a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1844c1ac3661SBarry Smith PetscInt i,nz,nbs; 184527a8da17SBarry Smith 184627a8da17SBarry Smith PetscFunctionBegin; 184714db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 184814db4f74SSatish Balay nbs = baij->nbs; 184927a8da17SBarry Smith for (i=0; i<nz; i++) { 185027a8da17SBarry Smith baij->j[i] = indices[i]; 185127a8da17SBarry Smith } 185227a8da17SBarry Smith baij->nz = nz; 185314db4f74SSatish Balay for (i=0; i<nbs; i++) { 185427a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 185527a8da17SBarry Smith } 185627a8da17SBarry Smith 185727a8da17SBarry Smith PetscFunctionReturn(0); 185827a8da17SBarry Smith } 1859fb2e594dSBarry Smith EXTERN_C_END 186027a8da17SBarry Smith 18614a2ae208SSatish Balay #undef __FUNCT__ 18624a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 186327a8da17SBarry Smith /*@ 186427a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 186527a8da17SBarry Smith in the matrix. 186627a8da17SBarry Smith 186727a8da17SBarry Smith Input Parameters: 186827a8da17SBarry Smith + mat - the SeqBAIJ matrix 186927a8da17SBarry Smith - indices - the column indices 187027a8da17SBarry Smith 187115091d37SBarry Smith Level: advanced 187215091d37SBarry Smith 187327a8da17SBarry Smith Notes: 187427a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 187527a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 187627a8da17SBarry Smith of the MatSetValues() operation. 187727a8da17SBarry Smith 187827a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 1879d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 188027a8da17SBarry Smith 188127a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 188227a8da17SBarry Smith 188327a8da17SBarry Smith @*/ 1884be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 188527a8da17SBarry Smith { 1886c1ac3661SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt *); 188727a8da17SBarry Smith 188827a8da17SBarry Smith PetscFunctionBegin; 18894482741eSBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 18904482741eSBarry Smith PetscValidPointer(indices,2); 1891c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 189227a8da17SBarry Smith if (f) { 189327a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 189427a8da17SBarry Smith } else { 1895634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 189627a8da17SBarry Smith } 189727a8da17SBarry Smith PetscFunctionReturn(0); 189827a8da17SBarry Smith } 189927a8da17SBarry Smith 19004a2ae208SSatish Balay #undef __FUNCT__ 1901985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1902985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1903273d9f13SBarry Smith { 1904273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1905dfbe8321SBarry Smith PetscErrorCode ierr; 1906c1ac3661SBarry Smith PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1907273d9f13SBarry Smith PetscReal atmp; 190887828ca2SBarry Smith PetscScalar *x,zero = 0.0; 1909273d9f13SBarry Smith MatScalar *aa; 1910c1ac3661SBarry Smith PetscInt ncols,brow,krow,kcol; 1911273d9f13SBarry Smith 1912273d9f13SBarry Smith PetscFunctionBegin; 1913273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1914899cda47SBarry Smith bs = A->rmap.bs; 1915273d9f13SBarry Smith aa = a->a; 1916273d9f13SBarry Smith ai = a->i; 1917273d9f13SBarry Smith aj = a->j; 1918273d9f13SBarry Smith mbs = a->mbs; 1919273d9f13SBarry Smith 19202dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 1921985db425SBarry Smith if (idx) { 1922985db425SBarry Smith for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1923985db425SBarry Smith } 19241ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1925273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1926899cda47SBarry Smith if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1927273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1928273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 1929273d9f13SBarry Smith brow = bs*i; 1930273d9f13SBarry Smith for (j=0; j<ncols; j++){ 1931273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 1932273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 1933273d9f13SBarry Smith atmp = PetscAbsScalar(*aa);aa++; 1934273d9f13SBarry Smith row = brow + krow; /* row index */ 1935a83599f4SBarry Smith /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1936985db425SBarry Smith if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1937273d9f13SBarry Smith } 1938273d9f13SBarry Smith } 1939273d9f13SBarry Smith aj++; 1940273d9f13SBarry Smith } 1941273d9f13SBarry Smith } 19421ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1943273d9f13SBarry Smith PetscFunctionReturn(0); 1944273d9f13SBarry Smith } 1945273d9f13SBarry Smith 19464a2ae208SSatish Balay #undef __FUNCT__ 19473c896bc6SHong Zhang #define __FUNCT__ "MatCopy_SeqBAIJ" 19483c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 19493c896bc6SHong Zhang { 19503c896bc6SHong Zhang PetscErrorCode ierr; 19513c896bc6SHong Zhang 19523c896bc6SHong Zhang PetscFunctionBegin; 19533c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 19543c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 19553c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 19563c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 19573c896bc6SHong Zhang 1958899cda47SBarry Smith if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 19593c896bc6SHong Zhang SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 19603c896bc6SHong Zhang } 1961899cda47SBarry Smith ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 19623c896bc6SHong Zhang } else { 19633c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 19643c896bc6SHong Zhang } 19653c896bc6SHong Zhang PetscFunctionReturn(0); 19663c896bc6SHong Zhang } 19673c896bc6SHong Zhang 19683c896bc6SHong Zhang #undef __FUNCT__ 19694a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1970dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1971273d9f13SBarry Smith { 1972dfbe8321SBarry Smith PetscErrorCode ierr; 1973273d9f13SBarry Smith 1974273d9f13SBarry Smith PetscFunctionBegin; 19757edd0491SSatish Balay ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1976273d9f13SBarry Smith PetscFunctionReturn(0); 1977273d9f13SBarry Smith } 1978273d9f13SBarry Smith 19794a2ae208SSatish Balay #undef __FUNCT__ 19804a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 1981dfbe8321SBarry Smith PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1982f2a5309cSSatish Balay { 1983f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1984f2a5309cSSatish Balay PetscFunctionBegin; 1985f2a5309cSSatish Balay *array = a->a; 1986f2a5309cSSatish Balay PetscFunctionReturn(0); 1987f2a5309cSSatish Balay } 1988f2a5309cSSatish Balay 19894a2ae208SSatish Balay #undef __FUNCT__ 19904a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1991dfbe8321SBarry Smith PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1992f2a5309cSSatish Balay { 1993f2a5309cSSatish Balay PetscFunctionBegin; 1994f2a5309cSSatish Balay PetscFunctionReturn(0); 1995f2a5309cSSatish Balay } 1996f2a5309cSSatish Balay 199742ee4b1aSHong Zhang #include "petscblaslapack.h" 199842ee4b1aSHong Zhang #undef __FUNCT__ 199942ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ" 2000f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 200142ee4b1aSHong Zhang { 200242ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2003dfbe8321SBarry Smith PetscErrorCode ierr; 2004899cda47SBarry Smith PetscInt i,bs=Y->rmap.bs,j,bs2; 20054ce68768SBarry Smith PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 200642ee4b1aSHong Zhang 200742ee4b1aSHong Zhang PetscFunctionBegin; 200842ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2009f4df32b1SMatthew Knepley PetscScalar alpha = a; 2010f4df32b1SMatthew Knepley BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2011c537a176SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2012c4319e64SHong Zhang if (y->xtoy && y->XtoY != X) { 2013c4319e64SHong Zhang ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2014c4319e64SHong Zhang ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2015c537a176SHong Zhang } 2016c4319e64SHong Zhang if (!y->xtoy) { /* get xtoy */ 2017c4319e64SHong Zhang ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2018c4319e64SHong Zhang y->XtoY = X; 2019c537a176SHong Zhang } 2020c4319e64SHong Zhang bs2 = bs*bs; 2021c537a176SHong Zhang for (i=0; i<x->nz; i++) { 2022c4319e64SHong Zhang j = 0; 2023c4319e64SHong Zhang while (j < bs2){ 2024f4df32b1SMatthew Knepley y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2025c4319e64SHong Zhang j++; 2026c537a176SHong Zhang } 2027c4319e64SHong Zhang } 20281e2582c4SBarry Smith ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 202942ee4b1aSHong Zhang } else { 2030f4df32b1SMatthew Knepley ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 203142ee4b1aSHong Zhang } 203242ee4b1aSHong Zhang PetscFunctionReturn(0); 203342ee4b1aSHong Zhang } 203442ee4b1aSHong Zhang 203599cafbc1SBarry Smith #undef __FUNCT__ 203699cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_SeqBAIJ" 203799cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 203899cafbc1SBarry Smith { 203999cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 204099cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 204199cafbc1SBarry Smith PetscScalar *aa = a->a; 204299cafbc1SBarry Smith 204399cafbc1SBarry Smith PetscFunctionBegin; 204499cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 204599cafbc1SBarry Smith PetscFunctionReturn(0); 204699cafbc1SBarry Smith } 204799cafbc1SBarry Smith 204899cafbc1SBarry Smith #undef __FUNCT__ 204999cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 205099cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 205199cafbc1SBarry Smith { 205299cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 205399cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 205499cafbc1SBarry Smith PetscScalar *aa = a->a; 205599cafbc1SBarry Smith 205699cafbc1SBarry Smith PetscFunctionBegin; 205799cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 205899cafbc1SBarry Smith PetscFunctionReturn(0); 205999cafbc1SBarry Smith } 206099cafbc1SBarry Smith 206199cafbc1SBarry Smith 20622593348eSBarry Smith /* -------------------------------------------------------------------*/ 2063cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2064cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2065cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2066cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 206797304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 20687c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 20697c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2070cc2dc46cSBarry Smith MatSolve_SeqBAIJ_N, 2071cc2dc46cSBarry Smith 0, 2072cc2dc46cSBarry Smith 0, 207397304618SKris Buschelman /*10*/ 0, 2074cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2075cc2dc46cSBarry Smith 0, 2076b6490206SBarry Smith 0, 2077f2501298SSatish Balay MatTranspose_SeqBAIJ, 207897304618SKris Buschelman /*15*/ MatGetInfo_SeqBAIJ, 2079cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2080cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2081cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2082cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 208397304618SKris Buschelman /*20*/ 0, 2084cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2085cc2dc46cSBarry Smith 0, 2086cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2087cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 208897304618SKris Buschelman /*25*/ MatZeroRows_SeqBAIJ, 2089cc2dc46cSBarry Smith MatLUFactorSymbolic_SeqBAIJ, 2090cc2dc46cSBarry Smith MatLUFactorNumeric_SeqBAIJ_N, 2091c05c3958SHong Zhang MatCholeskyFactorSymbolic_SeqBAIJ, 2092c05c3958SHong Zhang MatCholeskyFactorNumeric_SeqBAIJ_N, 209397304618SKris Buschelman /*30*/ MatSetUpPreallocation_SeqBAIJ, 2094cc2dc46cSBarry Smith MatILUFactorSymbolic_SeqBAIJ, 2095c05c3958SHong Zhang MatICCFactorSymbolic_SeqBAIJ, 2096f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 2097f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 209897304618SKris Buschelman /*35*/ MatDuplicate_SeqBAIJ, 2099cc2dc46cSBarry Smith 0, 2100cc2dc46cSBarry Smith 0, 2101cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2102cc2dc46cSBarry Smith 0, 210397304618SKris Buschelman /*40*/ MatAXPY_SeqBAIJ, 2104cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 2105cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2106cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 21073c896bc6SHong Zhang MatCopy_SeqBAIJ, 21088c07d4e3SBarry Smith /*45*/ 0, 2109cc2dc46cSBarry Smith MatScale_SeqBAIJ, 2110cc2dc46cSBarry Smith 0, 2111cc2dc46cSBarry Smith 0, 2112cc2dc46cSBarry Smith 0, 2113521d7252SBarry Smith /*50*/ 0, 21143b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 211592c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 2116cc2dc46cSBarry Smith 0, 2117cc2dc46cSBarry Smith 0, 211897304618SKris Buschelman /*55*/ 0, 2119cc2dc46cSBarry Smith 0, 2120cc2dc46cSBarry Smith 0, 2121cc2dc46cSBarry Smith 0, 2122d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 212397304618SKris Buschelman /*60*/ MatGetSubMatrix_SeqBAIJ, 2124b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2125b9b97703SBarry Smith MatView_SeqBAIJ, 2126357abbc8SBarry Smith 0, 2127273d9f13SBarry Smith 0, 212897304618SKris Buschelman /*65*/ 0, 2129273d9f13SBarry Smith 0, 2130273d9f13SBarry Smith 0, 2131273d9f13SBarry Smith 0, 2132273d9f13SBarry Smith 0, 2133985db425SBarry Smith /*70*/ MatGetRowMaxAbs_SeqBAIJ, 213497304618SKris Buschelman MatConvert_Basic, 2135273d9f13SBarry Smith 0, 213697304618SKris Buschelman 0, 213797304618SKris Buschelman 0, 213897304618SKris Buschelman /*75*/ 0, 213997304618SKris Buschelman 0, 214097304618SKris Buschelman 0, 214197304618SKris Buschelman 0, 214297304618SKris Buschelman 0, 214397304618SKris Buschelman /*80*/ 0, 214497304618SKris Buschelman 0, 214597304618SKris Buschelman 0, 214697304618SKris Buschelman 0, 2147865e5f61SKris Buschelman MatLoad_SeqBAIJ, 2148865e5f61SKris Buschelman /*85*/ 0, 2149b01c7715SBarry Smith 0, 2150b01c7715SBarry Smith 0, 2151b01c7715SBarry Smith 0, 2152865e5f61SKris Buschelman 0, 2153865e5f61SKris Buschelman /*90*/ 0, 2154865e5f61SKris Buschelman 0, 2155865e5f61SKris Buschelman 0, 2156865e5f61SKris Buschelman 0, 2157865e5f61SKris Buschelman 0, 2158865e5f61SKris Buschelman /*95*/ 0, 2159865e5f61SKris Buschelman 0, 2160865e5f61SKris Buschelman 0, 216199cafbc1SBarry Smith 0, 216299cafbc1SBarry Smith 0, 216399cafbc1SBarry Smith /*100*/0, 216499cafbc1SBarry Smith 0, 216599cafbc1SBarry Smith 0, 216699cafbc1SBarry Smith 0, 216799cafbc1SBarry Smith 0, 216899cafbc1SBarry Smith /*105*/0, 216999cafbc1SBarry Smith MatRealPart_SeqBAIJ, 21702af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 21712af78befSBarry Smith 0, 21722af78befSBarry Smith 0, 21732af78befSBarry Smith /*110*/0, 21742af78befSBarry Smith 0, 21752af78befSBarry Smith 0, 21762af78befSBarry Smith 0, 21772af78befSBarry Smith MatMissingDiagonal_SeqBAIJ 21782af78befSBarry Smith /*115*/ 217999cafbc1SBarry Smith }; 21802593348eSBarry Smith 21813e90b805SBarry Smith EXTERN_C_BEGIN 21824a2ae208SSatish Balay #undef __FUNCT__ 21834a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2184be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 21853e90b805SBarry Smith { 21863e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2187899cda47SBarry Smith PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2188dfbe8321SBarry Smith PetscErrorCode ierr; 21893e90b805SBarry Smith 21903e90b805SBarry Smith PetscFunctionBegin; 21913e90b805SBarry Smith if (aij->nonew != 1) { 2192512a5fc5SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 21933e90b805SBarry Smith } 21943e90b805SBarry Smith 21953e90b805SBarry Smith /* allocate space for values if not already there */ 21963e90b805SBarry Smith if (!aij->saved_values) { 219787828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 21983e90b805SBarry Smith } 21993e90b805SBarry Smith 22003e90b805SBarry Smith /* copy values over */ 220187828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 22023e90b805SBarry Smith PetscFunctionReturn(0); 22033e90b805SBarry Smith } 22043e90b805SBarry Smith EXTERN_C_END 22053e90b805SBarry Smith 22063e90b805SBarry Smith EXTERN_C_BEGIN 22074a2ae208SSatish Balay #undef __FUNCT__ 22084a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2209be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 22103e90b805SBarry Smith { 22113e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 22126849ba73SBarry Smith PetscErrorCode ierr; 2213899cda47SBarry Smith PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 22143e90b805SBarry Smith 22153e90b805SBarry Smith PetscFunctionBegin; 22163e90b805SBarry Smith if (aij->nonew != 1) { 2217512a5fc5SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 22183e90b805SBarry Smith } 22193e90b805SBarry Smith if (!aij->saved_values) { 2220634064b4SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 22213e90b805SBarry Smith } 22223e90b805SBarry Smith 22233e90b805SBarry Smith /* copy values over */ 222487828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 22253e90b805SBarry Smith PetscFunctionReturn(0); 22263e90b805SBarry Smith } 22273e90b805SBarry Smith EXTERN_C_END 22283e90b805SBarry Smith 2229273d9f13SBarry Smith EXTERN_C_BEGIN 2230f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2231f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2232273d9f13SBarry Smith EXTERN_C_END 2233273d9f13SBarry Smith 2234273d9f13SBarry Smith EXTERN_C_BEGIN 22354a2ae208SSatish Balay #undef __FUNCT__ 2236a23d5eceSKris Buschelman #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2237be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2238a23d5eceSKris Buschelman { 2239a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 22406849ba73SBarry Smith PetscErrorCode ierr; 2241a96a251dSBarry Smith PetscInt i,mbs,nbs,bs2,newbs = bs; 2242ab93d7beSBarry Smith PetscTruth flg,skipallocation = PETSC_FALSE; 2243a23d5eceSKris Buschelman 2244a23d5eceSKris Buschelman PetscFunctionBegin; 2245a23d5eceSKris Buschelman 2246ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 2247ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 2248ab93d7beSBarry Smith nz = 0; 2249ab93d7beSBarry Smith } 22508c07d4e3SBarry Smith 22517adad957SLisandro Dalcin ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 225217667f90SBarry Smith ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 22538c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 22548c07d4e3SBarry Smith 2255a23d5eceSKris Buschelman if (nnz && newbs != bs) { 2256634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2257a23d5eceSKris Buschelman } 2258a23d5eceSKris Buschelman bs = newbs; 2259a23d5eceSKris Buschelman 2260899cda47SBarry Smith B->rmap.bs = B->cmap.bs = bs; 22616148ca0dSBarry Smith ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 22626148ca0dSBarry Smith ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2263899cda47SBarry Smith 2264899cda47SBarry Smith B->preallocated = PETSC_TRUE; 2265899cda47SBarry Smith 2266899cda47SBarry Smith mbs = B->rmap.n/bs; 2267899cda47SBarry Smith nbs = B->cmap.n/bs; 2268a23d5eceSKris Buschelman bs2 = bs*bs; 2269a23d5eceSKris Buschelman 2270899cda47SBarry Smith if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2271899cda47SBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2272a23d5eceSKris Buschelman } 2273a23d5eceSKris Buschelman 2274a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 227577431f27SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2276a23d5eceSKris Buschelman if (nnz) { 2277a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { 227877431f27SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 227977431f27SBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2280a23d5eceSKris Buschelman } 2281a23d5eceSKris Buschelman } 2282a23d5eceSKris Buschelman 2283a23d5eceSKris Buschelman b = (Mat_SeqBAIJ*)B->data; 22847adad957SLisandro Dalcin ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 22858c07d4e3SBarry Smith ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 22868c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 22878c07d4e3SBarry Smith 2288a23d5eceSKris Buschelman B->ops->solve = MatSolve_SeqBAIJ_Update; 2289a23d5eceSKris Buschelman B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2290a23d5eceSKris Buschelman if (!flg) { 2291a23d5eceSKris Buschelman switch (bs) { 2292a23d5eceSKris Buschelman case 1: 2293a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2294a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 2295a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2296a23d5eceSKris Buschelman break; 2297a23d5eceSKris Buschelman case 2: 2298a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2299a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 2300a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2301b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2302a23d5eceSKris Buschelman break; 2303a23d5eceSKris Buschelman case 3: 2304a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2305a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 2306a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2307b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2308a23d5eceSKris Buschelman break; 2309a23d5eceSKris Buschelman case 4: 2310a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2311a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 2312a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2313b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2314a23d5eceSKris Buschelman break; 2315a23d5eceSKris Buschelman case 5: 2316a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2317a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 2318a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2319b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2320a23d5eceSKris Buschelman break; 2321a23d5eceSKris Buschelman case 6: 2322a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2323a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 2324a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2325a23d5eceSKris Buschelman break; 2326a23d5eceSKris Buschelman case 7: 2327a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2328a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 2329a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2330a23d5eceSKris Buschelman break; 2331a23d5eceSKris Buschelman default: 2332a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2333a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 2334a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2335a23d5eceSKris Buschelman break; 2336a23d5eceSKris Buschelman } 2337a23d5eceSKris Buschelman } 2338899cda47SBarry Smith B->rmap.bs = bs; 2339a23d5eceSKris Buschelman b->mbs = mbs; 2340a23d5eceSKris Buschelman b->nbs = nbs; 2341ab93d7beSBarry Smith if (!skipallocation) { 23422ee49352SLisandro Dalcin if (!b->imax) { 2343ab93d7beSBarry Smith ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 23442ee49352SLisandro Dalcin } 2345ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 2346ab93d7beSBarry Smith for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2347a23d5eceSKris Buschelman if (!nnz) { 2348a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2349a23d5eceSKris Buschelman else if (nz <= 0) nz = 1; 2350a23d5eceSKris Buschelman for (i=0; i<mbs; i++) b->imax[i] = nz; 2351a23d5eceSKris Buschelman nz = nz*mbs; 2352a23d5eceSKris Buschelman } else { 2353a23d5eceSKris Buschelman nz = 0; 2354a23d5eceSKris Buschelman for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2355a23d5eceSKris Buschelman } 2356a23d5eceSKris Buschelman 2357a23d5eceSKris Buschelman /* allocate the matrix space */ 23582ee49352SLisandro Dalcin ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2359899cda47SBarry Smith ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 23602ee49352SLisandro Dalcin ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2361a23d5eceSKris Buschelman ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2362c1ac3661SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2363a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 2364a23d5eceSKris Buschelman b->i[0] = 0; 2365a23d5eceSKris Buschelman for (i=1; i<mbs+1; i++) { 2366a23d5eceSKris Buschelman b->i[i] = b->i[i-1] + b->imax[i-1]; 2367a23d5eceSKris Buschelman } 2368e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2369e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2370e811da20SHong Zhang } else { 2371e6b907acSBarry Smith b->free_a = PETSC_FALSE; 2372e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 2373ab93d7beSBarry Smith } 2374a23d5eceSKris Buschelman 2375899cda47SBarry Smith B->rmap.bs = bs; 2376a23d5eceSKris Buschelman b->bs2 = bs2; 2377a23d5eceSKris Buschelman b->mbs = mbs; 2378a23d5eceSKris Buschelman b->nz = 0; 2379a23d5eceSKris Buschelman b->maxnz = nz*bs2; 2380a23d5eceSKris Buschelman B->info.nz_unneeded = (PetscReal)b->maxnz; 2381a23d5eceSKris Buschelman PetscFunctionReturn(0); 2382a23d5eceSKris Buschelman } 2383a23d5eceSKris Buschelman EXTERN_C_END 2384a23d5eceSKris Buschelman 23850bad9183SKris Buschelman /*MC 2386fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 23870bad9183SKris Buschelman block sparse compressed row format. 23880bad9183SKris Buschelman 23890bad9183SKris Buschelman Options Database Keys: 23900bad9183SKris Buschelman . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 23910bad9183SKris Buschelman 23920bad9183SKris Buschelman Level: beginner 23930bad9183SKris Buschelman 2394f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ() 23950bad9183SKris Buschelman M*/ 23960bad9183SKris Buschelman 2397a23d5eceSKris Buschelman EXTERN_C_BEGIN 2398a23d5eceSKris Buschelman #undef __FUNCT__ 23994a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 2400be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 24012593348eSBarry Smith { 2402dfbe8321SBarry Smith PetscErrorCode ierr; 2403c1ac3661SBarry Smith PetscMPIInt size; 2404b6490206SBarry Smith Mat_SeqBAIJ *b; 24053b2fbd54SBarry Smith 24063a40ed3dSBarry Smith PetscFunctionBegin; 24077adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 240829bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2409b6490206SBarry Smith 241038f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2411b0a32e0cSBarry Smith B->data = (void*)b; 2412549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 24132593348eSBarry Smith B->factor = 0; 241490f02eecSBarry Smith B->mapping = 0; 24152593348eSBarry Smith b->row = 0; 24162593348eSBarry Smith b->col = 0; 2417e51c0b9cSSatish Balay b->icol = 0; 24182593348eSBarry Smith b->reallocs = 0; 24193e90b805SBarry Smith b->saved_values = 0; 2420cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2421563b5814SBarry Smith b->setvalueslen = 0; 2422563b5814SBarry Smith b->setvaluescopy = PETSC_NULL; 2423563b5814SBarry Smith #endif 24242593348eSBarry Smith 2425c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 24262593348eSBarry Smith b->nonew = 0; 24272593348eSBarry Smith b->diag = 0; 24282593348eSBarry Smith b->solve_work = 0; 2429de6a44a3SBarry Smith b->mult_work = 0; 24302a1b7f2aSHong Zhang B->spptr = 0; 24310e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 2432883fce79SBarry Smith b->keepzeroedrows = PETSC_FALSE; 2433c4319e64SHong Zhang b->xtoy = 0; 2434c4319e64SHong Zhang b->XtoY = 0; 243573e7a558SHong Zhang b->compressedrow.use = PETSC_FALSE; 243626e093fcSHong Zhang b->compressedrow.nrows = 0; 243773e7a558SHong Zhang b->compressedrow.i = PETSC_NULL; 243873e7a558SHong Zhang b->compressedrow.rindex = PETSC_NULL; 243973e7a558SHong Zhang b->compressedrow.checked = PETSC_FALSE; 244088e51ccdSHong Zhang B->same_nonzero = PETSC_FALSE; 24414e220ebcSLois Curfman McInnes 244243516a2dSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 244343516a2dSKris Buschelman "MatInvertBlockDiagonal_SeqBAIJ", 244443516a2dSKris Buschelman MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2445f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 24463e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 2447bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2448f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 24493e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 2450bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2451f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 245227a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2453bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2454a6175056SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2455273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 2456273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2457a0e1a404SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2458a0e1a404SHong Zhang "MatConvert_SeqBAIJ_SeqSBAIJ", 2459a0e1a404SHong Zhang MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2460a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2461a23d5eceSKris Buschelman "MatSeqBAIJSetPreallocation_SeqBAIJ", 2462a23d5eceSKris Buschelman MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 246317667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 24643a40ed3dSBarry Smith PetscFunctionReturn(0); 24652593348eSBarry Smith } 2466273d9f13SBarry Smith EXTERN_C_END 24672593348eSBarry Smith 24684a2ae208SSatish Balay #undef __FUNCT__ 24694a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2470dfbe8321SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 24712593348eSBarry Smith { 24722593348eSBarry Smith Mat C; 2473b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 24746849ba73SBarry Smith PetscErrorCode ierr; 2475a96a251dSBarry Smith PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2476de6a44a3SBarry Smith 24773a40ed3dSBarry Smith PetscFunctionBegin; 247829bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 24792593348eSBarry Smith 24802593348eSBarry Smith *B = 0; 24817adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2482899cda47SBarry Smith ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 24837adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 24841d5dac46SHong Zhang ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2485273d9f13SBarry Smith c = (Mat_SeqBAIJ*)C->data; 248644cd7ae7SLois Curfman McInnes 2487899cda47SBarry Smith C->rmap.N = A->rmap.N; 2488899cda47SBarry Smith C->cmap.N = A->cmap.N; 2489899cda47SBarry Smith C->rmap.bs = A->rmap.bs; 24909df24120SSatish Balay c->bs2 = a->bs2; 2491b6490206SBarry Smith c->mbs = a->mbs; 2492b6490206SBarry Smith c->nbs = a->nbs; 24932593348eSBarry Smith 249433b91e9fSSatish Balay ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2495b6490206SBarry Smith for (i=0; i<mbs; i++) { 24962593348eSBarry Smith c->imax[i] = a->imax[i]; 24972593348eSBarry Smith c->ilen[i] = a->ilen[i]; 24982593348eSBarry Smith } 24992593348eSBarry Smith 25002593348eSBarry Smith /* allocate the matrix space */ 2501a96a251dSBarry Smith ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2502c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 2503c1ac3661SBarry Smith ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2504b6490206SBarry Smith if (mbs > 0) { 2505c1ac3661SBarry Smith ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 25062e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 2507549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 25082e8a6d31SBarry Smith } else { 2509549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 25102593348eSBarry Smith } 25112593348eSBarry Smith } 25122593348eSBarry Smith c->roworiented = a->roworiented; 25132593348eSBarry Smith c->nonew = a->nonew; 25142593348eSBarry Smith 25152593348eSBarry Smith if (a->diag) { 2516c1ac3661SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 251752e6d16bSBarry Smith ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2518b6490206SBarry Smith for (i=0; i<mbs; i++) { 25192593348eSBarry Smith c->diag[i] = a->diag[i]; 25202593348eSBarry Smith } 252198305bb5SBarry Smith } else c->diag = 0; 25222593348eSBarry Smith c->nz = a->nz; 25232593348eSBarry Smith c->maxnz = a->maxnz; 25242593348eSBarry Smith c->solve_work = 0; 25257fc0212eSBarry Smith c->mult_work = 0; 2526e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2527e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 2528273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 2529273d9f13SBarry Smith C->assembled = PETSC_TRUE; 253088e51ccdSHong Zhang 253188e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 253288e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 253388e51ccdSHong Zhang c->compressedrow.checked = a->compressedrow.checked; 253488e51ccdSHong Zhang if ( a->compressedrow.checked && a->compressedrow.use){ 253588e51ccdSHong Zhang i = a->compressedrow.nrows; 253688e51ccdSHong Zhang ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 253788e51ccdSHong Zhang c->compressedrow.rindex = c->compressedrow.i + i + 1; 253888e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 253988e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 254088e51ccdSHong Zhang } else { 254188e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 254288e51ccdSHong Zhang c->compressedrow.i = PETSC_NULL; 254388e51ccdSHong Zhang c->compressedrow.rindex = PETSC_NULL; 254488e51ccdSHong Zhang } 254588e51ccdSHong Zhang C->same_nonzero = A->same_nonzero; 25462593348eSBarry Smith *B = C; 25477adad957SLisandro Dalcin ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 25483a40ed3dSBarry Smith PetscFunctionReturn(0); 25492593348eSBarry Smith } 25502593348eSBarry Smith 25514a2ae208SSatish Balay #undef __FUNCT__ 25524a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 2553f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 25542593348eSBarry Smith { 2555b6490206SBarry Smith Mat_SeqBAIJ *a; 25562593348eSBarry Smith Mat B; 25576849ba73SBarry Smith PetscErrorCode ierr; 2558b24ad042SBarry Smith PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2559c1ac3661SBarry Smith PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2560c1ac3661SBarry Smith PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2561c1ac3661SBarry Smith PetscInt *masked,nmask,tmp,bs2,ishift; 2562b24ad042SBarry Smith PetscMPIInt size; 2563b24ad042SBarry Smith int fd; 256487828ca2SBarry Smith PetscScalar *aa; 256519bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 25662593348eSBarry Smith 25673a40ed3dSBarry Smith PetscFunctionBegin; 25688c07d4e3SBarry Smith ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 25698c07d4e3SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 25708c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2571de6a44a3SBarry Smith bs2 = bs*bs; 2572b6490206SBarry Smith 2573d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 257429bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2575b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 25760752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2577552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 25782593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 25792593348eSBarry Smith 2580d64ed03dSBarry Smith if (header[3] < 0) { 258129bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2582d64ed03dSBarry Smith } 2583d64ed03dSBarry Smith 258429bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 258535aab85fSBarry Smith 258635aab85fSBarry Smith /* 258735aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 258835aab85fSBarry Smith divisible by the blocksize 258935aab85fSBarry Smith */ 2590b6490206SBarry Smith mbs = M/bs; 259135aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 259235aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 259335aab85fSBarry Smith else mbs++; 259435aab85fSBarry Smith if (extra_rows) { 25951e2582c4SBarry Smith ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 259635aab85fSBarry Smith } 2597b6490206SBarry Smith 25982593348eSBarry Smith /* read in row lengths */ 2599c1ac3661SBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 26000752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 260135aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 26022593348eSBarry Smith 2603b6490206SBarry Smith /* read in column indices */ 2604c1ac3661SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 26050752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 260635aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2607b6490206SBarry Smith 2608b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2609c1ac3661SBarry Smith ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2610c1ac3661SBarry Smith ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2611c1ac3661SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2612c1ac3661SBarry Smith ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 261335aab85fSBarry Smith masked = mask + mbs; 2614b6490206SBarry Smith rowcount = 0; nzcount = 0; 2615b6490206SBarry Smith for (i=0; i<mbs; i++) { 261635aab85fSBarry Smith nmask = 0; 2617b6490206SBarry Smith for (j=0; j<bs; j++) { 2618b6490206SBarry Smith kmax = rowlengths[rowcount]; 2619b6490206SBarry Smith for (k=0; k<kmax; k++) { 262035aab85fSBarry Smith tmp = jj[nzcount++]/bs; 262135aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2622b6490206SBarry Smith } 2623b6490206SBarry Smith rowcount++; 2624b6490206SBarry Smith } 262535aab85fSBarry Smith browlengths[i] += nmask; 262635aab85fSBarry Smith /* zero out the mask elements we set */ 262735aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2628b6490206SBarry Smith } 2629b6490206SBarry Smith 26302593348eSBarry Smith /* create our matrix */ 2631f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&B); 2632f69a0ea3SMatthew Knepley ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 263378ae41b4SKris Buschelman ierr = MatSetType(B,type);CHKERRQ(ierr); 2634ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2635b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 26362593348eSBarry Smith 26372593348eSBarry Smith /* set matrix "i" values */ 2638de6a44a3SBarry Smith a->i[0] = 0; 2639b6490206SBarry Smith for (i=1; i<= mbs; i++) { 2640b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2641b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 26422593348eSBarry Smith } 2643b6490206SBarry Smith a->nz = 0; 2644b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 26452593348eSBarry Smith 2646b6490206SBarry Smith /* read in nonzero values */ 264787828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 26480752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 264935aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2650b6490206SBarry Smith 2651b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2652b6490206SBarry Smith nzcount = 0; jcount = 0; 2653b6490206SBarry Smith for (i=0; i<mbs; i++) { 2654b6490206SBarry Smith nzcountb = nzcount; 265535aab85fSBarry Smith nmask = 0; 2656b6490206SBarry Smith for (j=0; j<bs; j++) { 2657b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2658b6490206SBarry Smith for (k=0; k<kmax; k++) { 265935aab85fSBarry Smith tmp = jj[nzcount++]/bs; 266035aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2661b6490206SBarry Smith } 2662b6490206SBarry Smith } 2663de6a44a3SBarry Smith /* sort the masked values */ 2664433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2665de6a44a3SBarry Smith 2666b6490206SBarry Smith /* set "j" values into matrix */ 2667b6490206SBarry Smith maskcount = 1; 266835aab85fSBarry Smith for (j=0; j<nmask; j++) { 266935aab85fSBarry Smith a->j[jcount++] = masked[j]; 2670de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2671b6490206SBarry Smith } 2672b6490206SBarry Smith /* set "a" values into matrix */ 2673de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2674b6490206SBarry Smith for (j=0; j<bs; j++) { 2675b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2676b6490206SBarry Smith for (k=0; k<kmax; k++) { 2677de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2678de6a44a3SBarry Smith block = mask[tmp] - 1; 2679de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2680de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2681375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 2682b6490206SBarry Smith } 2683b6490206SBarry Smith } 268435aab85fSBarry Smith /* zero out the mask elements we set */ 268535aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2686b6490206SBarry Smith } 268729bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2688b6490206SBarry Smith 2689606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2690606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 2691606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 2692606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 2693606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 2694b6490206SBarry Smith 269578ae41b4SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269678ae41b4SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 26979c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 269878ae41b4SKris Buschelman 269978ae41b4SKris Buschelman *A = B; 27003a40ed3dSBarry Smith PetscFunctionReturn(0); 27012593348eSBarry Smith } 27022593348eSBarry Smith 27034a2ae208SSatish Balay #undef __FUNCT__ 27044a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 2705273d9f13SBarry Smith /*@C 2706273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2707273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 2708273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 2709273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 2710273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 27112593348eSBarry Smith 2712273d9f13SBarry Smith Collective on MPI_Comm 2713273d9f13SBarry Smith 2714273d9f13SBarry Smith Input Parameters: 2715273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 2716273d9f13SBarry Smith . bs - size of block 2717273d9f13SBarry Smith . m - number of rows 2718273d9f13SBarry Smith . n - number of columns 271935d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 272035d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 2721273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 2722273d9f13SBarry Smith 2723273d9f13SBarry Smith Output Parameter: 2724273d9f13SBarry Smith . A - the matrix 2725273d9f13SBarry Smith 2726175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2727175b88e8SBarry Smith MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 2728175b88e8SBarry Smith true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 2729175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2730175b88e8SBarry Smith 2731273d9f13SBarry Smith Options Database Keys: 2732273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2733273d9f13SBarry Smith block calculations (much slower) 2734273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2735273d9f13SBarry Smith 2736273d9f13SBarry Smith Level: intermediate 2737273d9f13SBarry Smith 2738273d9f13SBarry Smith Notes: 2739d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 2740d1be2dadSMatthew Knepley 274149a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 274249a6f317SBarry Smith 274335d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 274435d8aa7fSBarry Smith 2745273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 2746273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 2747273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 2748273d9f13SBarry Smith 2749273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 2750273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2751273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 2752273d9f13SBarry Smith matrices. 2753273d9f13SBarry Smith 2754273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2755273d9f13SBarry Smith @*/ 2756be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2757273d9f13SBarry Smith { 2758dfbe8321SBarry Smith PetscErrorCode ierr; 2759273d9f13SBarry Smith 2760273d9f13SBarry Smith PetscFunctionBegin; 2761f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 2762f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2763273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2764ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2765273d9f13SBarry Smith PetscFunctionReturn(0); 2766273d9f13SBarry Smith } 2767273d9f13SBarry Smith 27684a2ae208SSatish Balay #undef __FUNCT__ 27694a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2770273d9f13SBarry Smith /*@C 2771273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2772273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 2773273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 2774273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 2775273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 2776273d9f13SBarry Smith 2777273d9f13SBarry Smith Collective on MPI_Comm 2778273d9f13SBarry Smith 2779273d9f13SBarry Smith Input Parameters: 2780273d9f13SBarry Smith + A - the matrix 2781273d9f13SBarry Smith . bs - size of block 2782273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 2783273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 2784273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 2785273d9f13SBarry Smith 2786273d9f13SBarry Smith Options Database Keys: 2787273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2788273d9f13SBarry Smith block calculations (much slower) 2789273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2790273d9f13SBarry Smith 2791273d9f13SBarry Smith Level: intermediate 2792273d9f13SBarry Smith 2793273d9f13SBarry Smith Notes: 279449a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 279549a6f317SBarry Smith 2796aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 2797aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2798aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 2799aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2800aa95bbe8SBarry Smith 2801273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 2802273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 2803273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 2804273d9f13SBarry Smith 2805273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 2806273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2807273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 2808273d9f13SBarry Smith matrices. 2809273d9f13SBarry Smith 2810aa95bbe8SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 2811273d9f13SBarry Smith @*/ 2812be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2813273d9f13SBarry Smith { 2814c1ac3661SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2815273d9f13SBarry Smith 2816273d9f13SBarry Smith PetscFunctionBegin; 2817a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2818a23d5eceSKris Buschelman if (f) { 2819a23d5eceSKris Buschelman ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2820273d9f13SBarry Smith } 2821273d9f13SBarry Smith PetscFunctionReturn(0); 2822273d9f13SBarry Smith } 2823a1d92eedSBarry Smith 2824c75a6043SHong Zhang #undef __FUNCT__ 2825c75a6043SHong Zhang #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2826c75a6043SHong Zhang /*@ 2827c75a6043SHong Zhang MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2828c75a6043SHong Zhang (upper triangular entries in CSR format) provided by the user. 2829c75a6043SHong Zhang 2830c75a6043SHong Zhang Collective on MPI_Comm 2831c75a6043SHong Zhang 2832c75a6043SHong Zhang Input Parameters: 2833c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 2834c75a6043SHong Zhang . bs - size of block 2835c75a6043SHong Zhang . m - number of rows 2836c75a6043SHong Zhang . n - number of columns 2837c75a6043SHong Zhang . i - row indices 2838c75a6043SHong Zhang . j - column indices 2839c75a6043SHong Zhang - a - matrix values 2840c75a6043SHong Zhang 2841c75a6043SHong Zhang Output Parameter: 2842c75a6043SHong Zhang . mat - the matrix 2843c75a6043SHong Zhang 2844c75a6043SHong Zhang Level: intermediate 2845c75a6043SHong Zhang 2846c75a6043SHong Zhang Notes: 2847c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 2848c75a6043SHong Zhang once the matrix is destroyed 2849c75a6043SHong Zhang 2850c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 2851c75a6043SHong Zhang 2852c75a6043SHong Zhang The i and j indices are 0 based 2853c75a6043SHong Zhang 2854c75a6043SHong Zhang .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2855c75a6043SHong Zhang 2856c75a6043SHong Zhang @*/ 2857c75a6043SHong Zhang PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2858c75a6043SHong Zhang { 2859c75a6043SHong Zhang PetscErrorCode ierr; 2860c75a6043SHong Zhang PetscInt ii; 2861c75a6043SHong Zhang Mat_SeqBAIJ *baij; 2862c75a6043SHong Zhang 2863c75a6043SHong Zhang PetscFunctionBegin; 2864c75a6043SHong Zhang if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2865c75a6043SHong Zhang if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2866c75a6043SHong Zhang 2867c75a6043SHong Zhang ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2868c75a6043SHong Zhang ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2869c75a6043SHong Zhang ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2870c75a6043SHong Zhang ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2871c75a6043SHong Zhang baij = (Mat_SeqBAIJ*)(*mat)->data; 2872c75a6043SHong Zhang ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2873c75a6043SHong Zhang 2874c75a6043SHong Zhang baij->i = i; 2875c75a6043SHong Zhang baij->j = j; 2876c75a6043SHong Zhang baij->a = a; 2877c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 2878c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2879e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 2880e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 2881c75a6043SHong Zhang 2882c75a6043SHong Zhang for (ii=0; ii<m; ii++) { 2883c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2884c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG) 2885c75a6043SHong Zhang if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2886c75a6043SHong Zhang #endif 2887c75a6043SHong Zhang } 2888c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG) 2889c75a6043SHong Zhang for (ii=0; ii<baij->i[m]; ii++) { 2890c75a6043SHong Zhang if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2891c75a6043SHong Zhang if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2892c75a6043SHong Zhang } 2893c75a6043SHong Zhang #endif 2894c75a6043SHong Zhang 2895c75a6043SHong Zhang ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2896c75a6043SHong Zhang ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2897c75a6043SHong Zhang PetscFunctionReturn(0); 2898c75a6043SHong Zhang } 2899